Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def __init__(self, indir, outdir='GSEApy_Replot', weighted_score_type=1,
min_size=3, max_size=1000, figsize=(6.5,6), graph_num=20, format='pdf', verbose=False):
self.indir=indir
self.outdir=outdir
self.weighted_score_type=weighted_score_type
self.min_size=min_size
self.max_size=max_size
self.figsize=figsize
self.fignum=int(graph_num)
self.format=format
self.verbose=bool(verbose)
self.module='replot'
self.gene_sets=None
self.ascending=False
# init logger
mkdirs(self.outdir)
outlog = os.path.join(self.outdir,"gseapy.%s.%s.log"%(self.module,"run"))
self._logger = log_init(outlog=outlog,
log_level=logging.INFO if self.verbose else logging.WARNING)
def run(self):
def prepare_outdir(self):
"""create temp directory."""
self._outdir = self.outdir
if self._outdir is None:
self._tmpdir = TemporaryDirectory()
self.outdir = self._tmpdir.name
elif isinstance(self.outdir, str):
mkdirs(self.outdir)
else:
raise Exception("Error parsing outdir: %s"%type(self.outdir))
# handle gmt type
if isinstance(self.gene_sets, str):
_gset = os.path.split(self.gene_sets)[-1].lower().rstrip(".gmt")
elif isinstance(self.gene_sets, dict):
_gset = "blank_name"
else:
raise Exception("Error parsing gene_sets parameter for gene sets")
logfile = os.path.join(self.outdir, "gseapy.%s.%s.log" % (self.module, _gset))
return logfile
def runSamplesPermu(self, df, gmt=None):
"""Single Sample GSEA workflow with permutation procedure"""
assert self.min_size <= self.max_size
mkdirs(self.outdir)
self.resultsOnSamples = OrderedDict()
outdir = self.outdir
# iter through each sample
for name, ser in df.iteritems():
self.outdir = os.path.join(outdir, str(name))
self._logger.info("Run Sample: %s " % name)
mkdirs(self.outdir)
# sort ranking values from high to low or reverse
dat2 = ser.sort_values(ascending=self.ascending)
# reset integer index, or caused unwanted problems
# df.reset_index(drop=True, inplace=True)
# compute ES, NES, pval, FDR, RES
gsea_results, hit_ind,rank_ES, subsets = gsea_compute(data=dat2, n=self.permutation_num, gmt=gmt,
weighted_score_type=self.weighted_score_type,
permutation_type='gene_set', method=None,
def runSamplesPermu(self, df, gmt=None):
"""Single Sample GSEA workflow with permutation procedure"""
assert self.min_size <= self.max_size
mkdirs(self.outdir)
self.resultsOnSamples = OrderedDict()
outdir = self.outdir
# iter through each sample
for name, ser in df.iteritems():
self.outdir = os.path.join(outdir, str(name))
self._logger.info("Run Sample: %s " % name)
mkdirs(self.outdir)
# sort ranking values from high to low or reverse
dat2 = ser.sort_values(ascending=self.ascending)
# reset integer index, or caused unwanted problems
# df.reset_index(drop=True, inplace=True)
# compute ES, NES, pval, FDR, RES
gsea_results, hit_ind,rank_ES, subsets = gsea_compute(data=dat2, n=self.permutation_num, gmt=gmt,
weighted_score_type=self.weighted_score_type,
permutation_type='gene_set', method=None,
pheno_pos='', pheno_neg='',
classes=None, ascending=self.ascending,
processes=self._processes,
seed=self.seed, single=True, scale=self.scale)
# write file
res_zip = zip(subsets, list(gsea_results), hit_ind, rank_ES)
# apply_async
tempes.append(pool.apply_async(enrichment_score_tensor,
args=(genes_sorted, cor_vec, gmt,
self.weighted_score_type,
self.permutation_num, rs, True,
self.scale)))
pool.close()
pool.join()
# save results and plotting
for i, temp in enumerate(tempes):
name, rnk = names[i], rankings[i]
self._logger.info("Calculate Enrichment Score for Sample: %s "%name)
es, esnull, hit_ind, RES = temp.get()
# create results subdir
self.outdir= os.path.join(outdir, str(name))
mkdirs(self.outdir)
# save results
self.resultsOnSamples[name] = pd.Series(data=es, index=subsets, name=name)
# plotting
if self._noplot: continue
self._logger.info("Plotting Sample: %s \n" % name)
for i, term in enumerate(subsets):
term = term.replace('/','_').replace(":","_")
outfile = '{0}/{1}.{2}.{3}'.format(self.outdir, term, self.module, self.format)
gseaplot(rank_metric=rnk, term=term,
hit_indices=hit_ind[i], nes=es[i], pval=1, fdr=1,
RES=RES[i], pheno_pos='', pheno_neg='',
figsize=self.figsize, ofname=outfile)
# save es, nes to file
self._save(outdir)
return