Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""Test GoeaResults in plotting package."""
# --------------------------------------------------------------------
# --------------------------------------------------------------------
# Gene Ontology Enrichment Analysis (GOEA)
# --------------------------------------------------------------------
# --------------------------------------------------------------------
taxid = 10090 # Mouse study
# Load ontologies, associations, and population ids
geneids_pop = GeneID2nt_mus.keys()
geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
geneids_study = geneids2symbol_study.keys()
goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid)
# Run GOEA on study
goea_results_all = goeaobj.run_study(geneids_study)
goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05]
goea_results_nt = MgrNtGOEAs(goea_results_sig).get_goea_nts_all()
# Test managing GOEA results
objres = GoeaResults(goea_results_sig)
run(objres, prt)
objnts = GoeaResults(goea_results_nt)
run(objnts, prt)
# Plot GOEA results
fout_img = os.path.join(REPO, "test_plot_objgoearesults_{NS}.png")
plot_results(fout_img, goea_results_sig, id2symbol=geneids2symbol_study)
# --------------------------------------------------------------------
# --------------------------------------------------------------------
# Gene Ontology Enrichment Analysis (GOEA)
# --------------------------------------------------------------------
# --------------------------------------------------------------------
taxid = 10090 # Mouse study
# Load ontologies, associations, and population ids
geneids_pop = GeneID2nt_mus.keys()
geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
geneids_study = geneids2symbol_study.keys()
goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid)
go2obj = goeaobj.obo_dag
# Run GOEA on study
goea_results_all = goeaobj.run_study(geneids_study)
goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05]
goea_results_nt = MgrNtGOEAs(goea_results_sig).get_goea_nts_all()
assert goea_results_nt
ns2gos = get_ns2gos(goea_results_sig)
# Test plotting GOEA results
gosubdag = GoSubDag(set(r.GO for r in goea_results_sig), go2obj)
plot_results("test_plot_goids_a_goea_{NS}.png", goea_results_sig,
id2symbol=geneids2symbol_study, parentcnt=True, childcnt=True)
for nss, goids in ns2gos.items():
plt_goids(gosubdag, "test_plot_goids_b_{NS}.png".format(NS=nss), goids)
plot_gos("test_plot_goids_c_{NS}.png".format(NS=nss), goids, go2obj)
def wr_txt(self, fout_txt, goea_results, prtfmt=None, **kws):
"""Print GOEA results to text file."""
if not goea_results:
sys.stdout.write(" 0 GOEA results. NOT WRITING {FOUT}\n".format(FOUT=fout_txt))
return
with open(fout_txt, 'w') as prt:
if 'title' in kws:
prt.write("{TITLE}\n".format(TITLE=kws['title']))
data_nts = self.prt_txt(prt, goea_results, prtfmt, **kws)
log = self.log if self.log is not None else sys.stdout
log.write(" {N:>5} GOEA results for {CUR:5} study items. WROTE: {F}\n".format(
N=len(data_nts),
CUR=len(MgrNtGOEAs(goea_results).get_study_items()),
F=fout_txt))
def get_sortobj(self, goea_results, **kws):
"""Return a Grouper object, given a list of GOEnrichmentRecord."""
nts_goea = MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws)
goids = set(nt.GO for nt in nts_goea)
go2nt = {nt.GO:nt for nt in nts_goea}
grprobj = Grouper("GOEA", goids, self.hdrobj, self.grprdflt.gosubdag, go2nt=go2nt)
grprobj.prt_summary(sys.stdout)
# hdrgo_prt", "section_prt", "top_n", "use_sections"
sortobj = Sorter(grprobj, section_sortby=lambda nt: getattr(nt, self.pval_fld))
return sortobj
def run_study_nts(self, study, **kws):
"""Run GOEA on study ids. Return results as a list of namedtuples."""
goea_results = self.run_study(study, **kws)
return MgrNtGOEAs(goea_results).get_goea_nts_all()
def get_goea_nts_prt(goea_results, **kws):
"""Get namedtuples containing user-specified (or default) data from GOATOOLS GOEA results."""
return MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws)
def wr_py_goea_results(self, fout_py, goea_results, **kws):
"""Save GOEA results into Python package containing list of namedtuples."""
var_name = kws.get("var_name", "goea_results")
docstring = kws.get("docstring", "")
sortby = kws.get("sortby", None)
if goea_results:
from goatools.nt_utils import wr_py_nts
nts_goea = goea_results
# If list has GOEnrichmentRecords or verbose namedtuples, exclude some fields.
if hasattr(goea_results[0], "_fldsdefprt") or hasattr(goea_results[0], 'goterm'):
# Exclude some attributes from the namedtuple when saving results
# to a Python file because the information is redundant or verbose.
nts_goea = MgrNtGOEAs(goea_results).get_goea_nts_prt(**kws)
docstring = "\n".join([docstring, "# {VER}\n\n".format(VER=self.obo_dag.version)])
assert hasattr(nts_goea[0], '_fields')
if sortby is None:
sortby = MgrNtGOEAs.dflt_sortby_objgoea
nts_goea = sorted(nts_goea, key=sortby)
wr_py_nts(fout_py, nts_goea, docstring, var_name)
def prt_txt(prt, goea_results, prtfmt=None, **kws):
"""Print GOEA results in text format."""
objprt = PrtFmt()
if prtfmt is None:
flds = ['GO', 'NS', 'p_uncorrected',
'ratio_in_study', 'ratio_in_pop', 'depth', 'name', 'study_items']
prtfmt = objprt.get_prtfmt_str(flds)
prtfmt = objprt.adjust_prtfmt(prtfmt)
prt_flds = RPT.get_fmtflds(prtfmt)
data_nts = MgrNtGOEAs(goea_results).get_goea_nts_prt(prt_flds, **kws)
RPT.prt_txt(prt, data_nts, prtfmt, prt_flds, **kws)
return data_nts
def get_study_items(goea_results):
"""Get all study items found in a GOATOOLS GOEA (e.g., geneids)."""
return MgrNtGOEAs(goea_results).get_study_items()