Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def run_bonferroni():
"""Do Gene Ontology Enrichment Analysis w/Bonferroni multipletest. Print results 3 ways."""
# ---------------------------------------------------------------------
# Run Gene Ontology Analysis (GOEA)
#
# 1. Initialize
godag = get_godag(os.path.join(os.getcwd(), "go-basic.obo"), loading_bar=None)
fin_assc = os.path.join(REPO, "data/association")
assoc = read_associations(fin_assc, 'id2gos', no_top=True)
popul_ids = [line.rstrip() for line in open(os.path.join(REPO, "data/population"))]
study_ids = [line.rstrip() for line in open(os.path.join(REPO, "data/study"))]
# 2. Run enrichment analysis
goea = GOEnrichmentStudy(popul_ids, assoc, godag, alpha=0.05, methods=['bonferroni'])
results_nt = goea.run_study(study_ids)
return results_nt, goea
def test_fdr_bh(fout_log=None):
"""Do Gene Ontology Enrichment Analysis w/Benjamini-Hochberg multipletest. Print results"""
# ---------------------------------------------------------------------
# Run Gene Ontology Analysis (GOEA)
#
# 1. Initialize
log = sys.stdout if fout_log is None else open(fout_log, 'w')
obo_dag = GODag("go-basic.obo")
assoc = read_associations("../data/association", no_top=True)
popul_ids = [line.rstrip() for line in open("../data/population")]
study_ids = [line.rstrip() for line in open("../data/study")]
# 2. Run enrichment analysis
goea = GOEA(obo_dag, assoc, log)
goea.set_population(popul_ids)
goea.set_params(alpha=0.05, method='fdr_bh')
results_nt = goea.find_enrichment(study_ids)
# ---------------------------------------------------------------------
# Print results 3 ways: to screen, to tsv(tab-separated file), to xlsx(Excel spreadsheet)
fout_tsv = "goea_fdr_bh.tsv"
fout_xls = "goea_fdr_bh.xlsx"
field_names = ['NS', 'study_cnt', 'fdr_bh', 'level', 'depth', 'GO', 'name', 'fdr_bh_sig'] # collect these
print_names = ['NS', 'study_cnt', 'fdr_bh', 'level', 'depth', 'GO', 'name'] # print these in tsv and xlsx
# Optional user customizable sort:
def init_goea(**kws):
"""Initialize GODag and GOEnrichmentStudy."""
godag = get_godag(os.path.join(os.getcwd(), "go-basic.obo"), loading_bar=None)
fin_assc = ROOT + "association"
assoc = read_associations(fin_assc, 'id2gos', no_top=True)
popul_ids = [line.rstrip() for line in open(ROOT + "population")]
methods = kws['methods'] if 'methods' in kws else ['not_bonferroni']
study_ids = [line.rstrip() for line in open(ROOT + "study")]
return GOEnrichmentStudy(popul_ids, assoc, godag, methods=methods), study_ids
def init_goea(log):
"""Read Ontologies and Annotations once."""
# ---------------------------------------------------------------------
# Run Gene Ontology Analysis (GOEA)
#
# 1. Initialize
obo_dag = GODag("go-basic.obo")
assoc = read_associations("../data/association", no_top=True)
popul_ids = [line.rstrip() for line in open("../data/population")]
# 2. Run enrichment analysis
goeaobj = GOEA(obo_dag, assoc, log)
goeaobj.set_population(popul_ids)
return goeaobj
def get_goeaobj(methods=None):
"""Test GOEA with method, fdr."""
obo_fin = os.path.join(REPO, "go-basic.obo")
obo_dag = get_godag(obo_fin, loading_bar=None)
fin_assc = "{REPO}/tests/data/small_association".format(REPO=REPO)
assoc = read_associations(fin_assc, 'id2gos', no_top=True)
popul_fin = "{REPO}/tests/data/small_population".format(REPO=REPO)
popul_ids = [line.rstrip() for line in open(popul_fin)]
goeaobj = GOEnrichmentStudy(popul_ids, assoc, obo_dag, methods=methods)
return goeaobj
def get_goea_results(method="fdr_bh"):
"""Get GOEA results."""
root_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
obo_fin = os.path.join(root_dir, "goslim_generic.obo")
obo_dag = GODag(obo_fin)
fin_slim = os.path.join(root_dir, "slim_association")
assoc = read_associations(fin_slim, 'id2gos', no_top=True)
popul_ids = [line.rstrip() for line in open(os.path.join(root_dir, "small_population"))]
goeaobj = GOEnrichmentStudy(popul_ids, assoc, obo_dag, methods=[method])
study_ids = [line.rstrip() for line in open(os.path.join(root_dir, "small_study"))]
goea_results = goeaobj.run_study(study_ids, methods=[method])
return goea_results
def get_goeaobj(methods=None):
"""Test GOEA with method, fdr."""
obo_dag = GODag(ROOT + "goslim_generic.obo")
fin_assc = ROOT + "slim_association"
assoc = read_associations(fin_assc, 'id2gos', no_top=True)
popul_ids = [line.rstrip() for line in open(ROOT + "small_population")]
goeaobj = GOEnrichmentStudy(popul_ids, assoc, obo_dag, methods=methods)
return goeaobj
if opts.term not in go_dag:
sys.stderr.write(("term %s not found!\n" % opts.term))
sys.exit(1)
direct_anc, all_anc = mapslim(opts.term, go_dag, goslim_dag)
# output either all or only direct slims, depending on user command
if only_direct:
slim_terms_str = ";".join(direct_anc)
else:
slim_terms_str = ";".join(all_anc)
print(slim_terms_str)
# in case a association file is given as input
if opts.ass_file_name:
assert os.path.exists(opts.ass_file_name), ("file %s not found!"
% opts.ass_file_name)
assocs = read_associations(opts.ass_file_name, 'id2gos')
for protein_product, go_terms in assocs.items():
all_direct_anc = set()
all_covered_anc = set()
all_all_anc = set()
for go_term in go_terms:
if go_term not in go_dag:
continue
direct_anc, all_anc = mapslim(go_term, go_dag, goslim_dag)
all_all_anc |= all_anc
# collect all covered ancestors, so the direct ancestors
# can be calculated afterwards
all_covered_anc |= (all_anc - direct_anc)
all_direct_anc = all_all_anc - all_covered_anc
# output either all or only direct, depending on user command
if only_direct:
slim_terms_str = ";".join(all_direct_anc)