Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_alt_id():
"""Ensure that alternate GO IDs."""
obo_dag = get_godag(os.path.join(REPO, "go-basic.obo"))
# Create/Initialize GoSubDag
goids = _get_data0()
gosubdag = GoSubDag(obo_dag.keys(), obo_dag)
grprdflt = _get_grprdflt(gosubdag)
# Create/Initialize Grouper
hdrobj = HdrgosSections(grprdflt.gosubdag, grprdflt.hdrgos_dflt, sections=None, hdrgos=None)
Grouper("test_altid_gosubdag", goids, hdrobj, grprdflt.gosubdag, go2nt=None)
alt_ids = _get_altids(obo_dag)
obo_goids = obo_dag.keys()
obo_goids_set = set(obo_goids)
assert len(alt_ids.intersection(obo_goids_set)) == len(alt_ids)
def _get_gosubdag():
"""Get GO DAG."""
fin = os.path.join(REPO, 'go-basic.obo')
godag = get_godag(fin, prt=sys.stdout, loading_bar=False, optional_attrs=['relationship'])
return GoSubDag(None, godag)
def _plt(self, goid, exp_goids, act_goids, diff_exp, diff_act):
"""Plot GO IDs, colored by differences in expected and actual"""
fout_png = '{NAME}_{GO}.png'.format(NAME=self.name, GO=goid.replace(':', ''))
go_sources = set.union(exp_goids, act_goids, {goid})
gosubdag = GoSubDag(go_sources, self.godag, relationships=True)
go2color = {goid: '#c8ffb0'} # xkcd light light green
for go_diff in diff_exp:
go2color[go_diff] = '#cafffb' # xkcd light light blue
for go_diff in diff_act:
go2color[go_diff] = '#ffd1df' # xkcd light pink
goploter = GoSubDagPlot(gosubdag, go2color=go2color)
goploter.plt_dag(fout_png)
def _get_gosubdag():
"""Get GO DAG."""
fin = os.path.join(REPO, 'go-basic.obo')
godag = get_godag(fin, prt=sys.stdout, loading_bar=False, optional_attrs=['relationship'])
return GoSubDag(None, godag)
# --------------------------------------------------------------------
taxid = 10090 # Mouse study
# Load ontologies, associations, and population ids
geneids_pop = GeneID2nt_mus.keys()
geneids2symbol_study = get_geneid2symbol("nbt.3102-S4_GeneIDs.xlsx")
geneids_study = geneids2symbol_study.keys()
goeaobj = get_goeaobj("fdr_bh", geneids_pop, taxid)
go2obj = goeaobj.obo_dag
# Run GOEA on study
goea_results_all = goeaobj.run_study(geneids_study)
goea_results_sig = [r for r in goea_results_all if r.p_fdr_bh < 0.05]
goea_results_nt = MgrNtGOEAs(goea_results_sig).get_goea_nts_all()
assert goea_results_nt
ns2gos = get_ns2gos(goea_results_sig)
# Test plotting GOEA results
gosubdag = GoSubDag(set(r.GO for r in goea_results_sig), go2obj)
plot_results("test_plot_goids_a_goea_{NS}.png", goea_results_sig,
id2symbol=geneids2symbol_study, parentcnt=True, childcnt=True)
for nss, goids in ns2gos.items():
plt_goids(gosubdag, "test_plot_goids_b_{NS}.png".format(NS=nss), goids)
plot_gos("test_plot_goids_c_{NS}.png".format(NS=nss), goids, go2obj)
def test_full(out=sys.stdout, opt_fields=None):
"""Use OBOReader in default operation."""
dag_fin = "./go-basic.obo"
dag = _load_dag(dag_fin, opt_fields, out)
goleafs = set(o.id for o in dag.values() if not o.children)
gosubdag = GoSubDag(goleafs, dag)
test_write_hier_all("FULL", "GO:0000009", gosubdag, out)
test_write_hier_norep("FULL", "GO:0000010", gosubdag, out)
# Get all GO terms above this low-level GO ID using all relationships
if wr_new_obo_subset:
_wr_sub_obo(file_sub, goid_chosen, godag_r1, fin_obo)
# RELATIONSHIPS: None
gosubdag_r0 = GoSubDag(set([goid_chosen]), godag_r0)
assert len(gosubdag_r0.rcntobj.go2ancestors[goid_chosen]) == 12
# RELATIONSHIPS: ALL
gosubdag_r1 = GoSubDag(set([goid_chosen]), godag_r1, relationships=True)
assert gosubdag_r1.relationships == RELATIONSHIP_SET
#### set(['part_of', 'regulates', 'positively_regulates', 'negatively_regulates'])
assert len(gosubdag_r1.rcntobj.go2ancestors[goid_chosen]) == 50
# RELATIONSHIPS: part_of
gosubdag_rp = GoSubDag(set([goid_chosen]), godag_r1, relationships={'part_of'})
assert gosubdag_rp.relationships == set(['part_of'])
rp_par = gosubdag_rp.rcntobj.go2ancestors[goid_chosen]
assert 'GO:0016441' not in gosubdag_rp.go2obj, '**FATAL: REGULATION TERM GoSubDag(part_of) go2obj'
assert 'GO:0016441' not in rp_par, '**FATAL: REGULATION TERM GoSubDag(part_of) go2parents'
# RELATIONSHIPS: regulates
gosubdag_rr = GoSubDag(set([goid_chosen]), godag_r1, relationships={'regulates'})
assert gosubdag_rr.relationships == set(['regulates'])
rp_par = gosubdag_rr.rcntobj.go2ancestors[goid_chosen]
# assert 'GO:0016441' not in gosubdag_rp.go2obj, '**FATAL: REGULATION TERM GoSubDag(part_of) go2obj'
# assert 'GO:0016441' not in rp_par, '**FATAL: REGULATION TERM GoSubDag(part_of) go2parents'
# RELATIONSHIPS: positively_regulates
gosubdag_rp = GoSubDag(set([goid_chosen]), godag_r1, relationships={'positively_regulates'})
assert gosubdag_rp.relationships == set(['positively_regulates'])
rp_par = gosubdag_rp.rcntobj.go2ancestors[goid_chosen]
def test_semantic_i88():
"""Computing basic semantic similarities between GO terms."""
godag = obo_parser.GODag("go-basic.obo")
goids = set(go for go, o in godag.items() if go == o.id)
goids = set(godag.keys())
# Get all the annotations from arabidopsis.
fin_gaf = os.path.join(REPO, "tair.gaf")
# dnld_assc includes read_gaf
associations = dnld_assc(fin_gaf, godag, prt=None)
# First get the counts and information content for each GO term.
termcounts = TermCounts(godag, associations)
gosubdag = GoSubDag(goids, godag, tcntobj=termcounts)
# Now we can calculate the semantic distance and semantic similarity, as so:
# "The semantic similarity between terms GO:0048364 and GO:0044707 is 0.25.
go_id3 = 'GO:0048364' # BP level-03 depth-04 root development
go_id4 = 'GO:0044707' # BP level-02 depth-02 single-multicellular organism process
go_root = deepest_common_ancestor([go_id3, go_id4], godag)
sim = semantic_similarity(go_id3, go_id4, godag)
print('\nThe semantic similarity between terms {GO1} and {GO2} is {VAL}.'.format(
GO1=go_id3, GO2=go_id4, VAL=sim))
gosubdag.prt_goids([go_root, go_id3, go_id4])
# Calculate the information content
go_id = "GO:0048364"
infocontent = get_info_content(go_id, termcounts)
print('\nInformation content ({GO}) = {INFO}\n'.format(GO=go_id, INFO=infocontent))
def get_nts_sections(self, sections, sortby=None):
"""Given a list of sections containing GO IDs, get a list of sections w/GO nts."""
goids = self.get_goids_sections(sections)
gosubdag = GoSubDag(goids, self.go2obj)
return [(sec, gosubdag.get_nts(gos, sortby)) for sec, gos in sections]
def _get_tcntobj(goids, go2obj, **kws):
"""Get a TermCounts object if the user provides an annotation file, otherwise None."""
# kws: gaf (gene2go taxid)
if not AnnoReaderBase.valid_formats.isdisjoint(kws):
# Get a reduced go2obj set for TermCounts
_gosubdag = GoSubDag(goids, go2obj, rcntobj=False)
kws = dict(kws)
kws['godag'] = go2obj
return get_tcntobj(go2obj, **kws) # TermCounts