Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _download_libraries(self, libname):
""" download enrichr libraries."""
self._logger.info("Downloading and generating Enrichr library gene sets......")
s = retry(5)
# queery string
ENRICHR_URL = 'http://amp.pharm.mssm.edu/Enrichr/geneSetLibrary'
query_string = '?mode=text&libraryName=%s'
# get
response = s.get( ENRICHR_URL + query_string % libname, timeout=None)
if not response.ok:
raise Exception('Error fetching enrichment results, check internet connection first.')
# reformat to dict and save to disk
mkdirs(DEFAULT_CACHE_PATH)
genesets_dict = {}
outname = "enrichr.%s.gmt"%libname
gmtout = open(os.path.join(DEFAULT_CACHE_PATH, outname), "w")
for line in response.iter_lines(chunk_size=1024, decode_unicode='utf-8'):
line=line.strip()
k = line.split("\t")[0]
v = list(map(lambda x: x.split(",")[0], line.split("\t")[2:]))
genesets_dict.update({ k: v})
outline = "%s\t\t%s\n"%(k, "\t".join(v))
gmtout.write(outline)
gmtout.close()
return genesets_dict
def _download_libraries(self, libname):
""" download enrichr libraries."""
self._logger.info("Downloading and generating Enrichr library gene sets......")
s = retry(5)
# queery string
ENRICHR_URL = 'http://amp.pharm.mssm.edu/Enrichr/geneSetLibrary'
query_string = '?mode=text&libraryName=%s'
# get
response = s.get( ENRICHR_URL + query_string % libname, timeout=None)
if not response.ok:
raise Exception('Error fetching enrichment results, check internet connection first.')
# reformat to dict and save to disk
mkdirs(DEFAULT_CACHE_PATH)
genesets_dict = {}
outname = "enrichr.%s.gmt"%libname
gmtout = open(os.path.join(DEFAULT_CACHE_PATH, outname), "w")
for line in response.iter_lines(chunk_size=1024, decode_unicode='utf-8'):
line=line.strip()
k = line.split("\t")[0]
v = list(map(lambda x: x.split(",")[0], line.split("\t")[2:]))
genesets_dict.update({ k: v})
outline = "%s\t\t%s\n"%(k, "\t".join(v))
gmtout.write(outline)
gmtout.close()
return genesets_dict
if gmt.lower().endswith(".gmt"):
with open(gmt) as genesets:
genesets_dict = { line.strip().split("\t")[0]: line.strip().split("\t")[2:]
for line in genesets.readlines()}
return genesets_dict
elif gmt in DEFAULT_LIBRARY:
pass
elif gmt in self.get_libraries():
pass
else:
self._logger.error("No supported gene_sets: %s"%gmt)
sys.exit(0)
tmpname = "enrichr." + gmt + ".gmt"
tempath = os.path.join(DEFAULT_CACHE_PATH, tmpname)
# if file already download
if os.path.isfile(tempath):
self._logger.info("Enrichr library gene sets already downloaded in: %s, use local file"%DEFAULT_CACHE_PATH)
return self.parse_gmt(tempath)
else:
return self._download_libraries(gmt)
self.add_dataset_to_xml(dataset)
for at in attributes:
self.add_attribute_to_xml(at)
# add filters
if filters:
for k, v in filters.items():
if isinstance(v, list): v = ",".join(v)
self.add_filter_to_xml(k, v)
xml_query = self.get_xml()
results = super(Biomart, self).query(xml_query)
df = pd.read_csv(StringIO(results), header=None, sep="\t",
names=attributes, index_col=None)
# save file to cache path.
if filename is None:
mkdirs(DEFAULT_CACHE_PATH)
filename = os.path.join(DEFAULT_CACHE_PATH, "{}.background.genes.txt".format(dataset))
df.to_csv(filename, sep="\t", index=False)
return df
for at in attributes:
self.add_attribute_to_xml(at)
# add filters
if filters:
for k, v in filters.items():
if isinstance(v, list): v = ",".join(v)
self.add_filter_to_xml(k, v)
xml_query = self.get_xml()
results = super(Biomart, self).query(xml_query)
df = pd.read_csv(StringIO(results), header=None, sep="\t",
names=attributes, index_col=None)
# save file to cache path.
if filename is None:
mkdirs(DEFAULT_CACHE_PATH)
filename = os.path.join(DEFAULT_CACHE_PATH, "{}.background.genes.txt".format(dataset))
df.to_csv(filename, sep="\t", index=False)
return df