Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
name2go['B']: 1,
name2go['C']: 10,
name2go['D']: 10,
name2go['E']: 10,
name2go['F']: 10,
name2go['G']: 10,
name2go['H']: 10,
name2go['I']: 18,
}
go2genes = cx.defaultdict(set)
genenum = 0
for goid, qty in id2num.items():
for _ in range(qty):
go2genes[goid].add(genenum)
genenum += 1
id2gos = get_b2aset(go2genes)
IdToGosReader.wr_id2gos(file_id2gos, id2gos)
return id2gos
def describe_assc(org, fin_assc, go2obj, obj, prt):
"""Report statistics for a single association."""
# Assc. | # Assc| range | 25th | median | 75th | mean | stddev
# ------------|-------|------------|------|--------|------|------|-------
# hsa GO/gene | 19394 | 1 to 212 | 5 | 9 | 17 | 13 | 14
# hsa gene/GO | 17277 | 1 to 8,897 | 1 | 3 | 8 | 15 | 120
#
# mus GO/gene | 19870 | 1 to 261 | 5 | 10 | 18 | 14 | 15
# mus gene/GO | 17491 | 1 to 7,009 | 1 | 3 | 8 | 16 | 129
#
# dme GO/gene | 12551 | 1 to 137 | 2 | 4 | 8 | 6 | 7
# dme gene/GO | 7878 | 1 to 1,675 | 1 | 3 | 7 | 10 | 41
gene2gos = dnld_assc(fin_assc, go2obj, prt=None) # Associations
go2genes = get_b2aset(gene2gos)
assert gene2gos
assert go2genes
cnts_gos_p_gene = [len(gos) for gos in gene2gos.values()]
cnts_genes_p_go = [len(genes) for genes in go2genes.values()]
obj.prt_data("{ORG} GO/gene".format(ORG=org), cnts_gos_p_gene, prt)
obj.prt_data("{ORG} gene/GO".format(ORG=org), cnts_genes_p_go, prt)
name2go['E']: 10,
name2go['F']: 10,
name2go['G']: 10,
name2go['H']: 10,
name2go['I']: 30,
name2go['L']: 30,
name2go['M']: 20,
name2go['N']: 30,
}
go2genes = cx.defaultdict(set)
genenum = 0
for goid, qty in id2num.items():
for _ in range(qty):
go2genes[goid].add(genenum)
genenum += 1
id2gos = get_b2aset(go2genes)
IdToGosReader.wr_id2gos(file_id2gos, id2gos)
return id2gos
def _get_id2gos(file_id2gos, godag, name2go, name2num):
"""Get annotations"""
if os.path.exists(file_id2gos):
return IdToGosReader(file_id2gos, godag=godag).get_id2gos('CC')
go2genes = cx.defaultdict(set)
genenum = 0
for name, qty in name2num.items():
goid = name2go[name]
for _ in range(qty):
go2genes[goid].add(genenum)
genenum += 1
id2gos = get_b2aset(go2genes)
IdToGosReader.wr_id2gos(file_id2gos, id2gos)
return id2gos
def get_assc_pruned(assc_geneid2gos, min_genecnt=None, max_genecnt=None, prt=sys.stdout):
"""Remove GO IDs associated with large numbers of genes. Used in stochastic simulations."""
# DEFN WAS: get_assc_pruned(assc_geneid2gos, max_genecnt=None, prt=sys.stdout):
# ADDED min_genecnt argument and functionality
if max_genecnt is None and min_genecnt is None:
return assc_geneid2gos, set()
go2genes_orig = utils_get_b2aset(assc_geneid2gos)
# go2genes_prun = {go:gs for go, gs in go2genes_orig.items() if len(gs) <= max_genecnt}
go2genes_prun = {}
for goid, genes in go2genes_orig.items():
num_genes = len(genes)
if (min_genecnt is None or num_genes >= min_genecnt) and \
(max_genecnt is None or num_genes <= max_genecnt):
go2genes_prun[goid] = genes
num_was = len(go2genes_orig)
num_now = len(go2genes_prun)
gos_rm = set(go2genes_orig.keys()).difference(set(go2genes_prun.keys()))
assert num_was-num_now == len(gos_rm)
if prt is not None:
if min_genecnt is None:
min_genecnt = 1
if max_genecnt is None:
max_genecnt = "Max"
def get_go2chrs(sec2gos, sec2chr):
"""Dict: given a GO return a set of letters representing it's section membership(s)."""
go2chrs = {}
for goid, sections in get_b2aset(sec2gos).items():
go2chrs[goid] = set(sec2chr[s] for s in sections)
return go2chrs
num_genes = len(genes)
if (min_genecnt is None or num_genes >= min_genecnt) and \
(max_genecnt is None or num_genes <= max_genecnt):
go2genes_prun[goid] = genes
num_was = len(go2genes_orig)
num_now = len(go2genes_prun)
gos_rm = set(go2genes_orig.keys()).difference(set(go2genes_prun.keys()))
assert num_was-num_now == len(gos_rm)
if prt is not None:
if min_genecnt is None:
min_genecnt = 1
if max_genecnt is None:
max_genecnt = "Max"
prt.write("{N:4} GO IDs pruned. Kept {NOW} GOs assc w/({m} to {M} genes)\n".format(
m=min_genecnt, M=max_genecnt, N=num_was-num_now, NOW=num_now))
return utils_get_b2aset(go2genes_prun), gos_rm