Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""
from jcvi.formats.vcf import VcfLine
from six.moves.urllib.parse import parse_qsl
p = OptionParser(mitocompile.__doc__)
opts, args = p.parse_args(args)
if len(args) < 1:
sys.exit(not p.print_help())
vcfs = args
print("\t".join("vcf samplekey depth seqid pos alt svlen pe sr".split()))
for i, vcf in enumerate(vcfs):
if (i + 1) % 100 == 0:
logging.debug("Process `{}` [{}]".
format(vcf, percentage(i + 1, len(vcfs))))
depthfile = vcf.replace(".sv.vcf.gz", ".depth")
fp = must_open(depthfile)
chrm, depth = fp.next().split()
depth = int(float(depth))
samplekey = op.basename(vcf).split("_")[0]
fp = must_open(vcf)
for row in fp:
if row[0] == '#':
continue
v = VcfLine(row)
info = dict(parse_qsl(v.info))
print("\t".join(str(x) for x in (vcf,
samplekey, depth,
v.seqid, v.pos, v.alt,
info.get("SVLEN"), info["PE"], info["SR"])))
blocksfile, = args
fp = open(blocksfile)
counts = defaultdict(int)
total = orthologous = 0
for row in fp:
atoms = row.rstrip().split("\t")
hits = [x for x in atoms[1:] if x != '.']
counts[len(hits)] += 1
total += 1
if atoms[1] != '.':
orthologous += 1
print("Total lines: {0}".format(total), file=sys.stderr)
for i, n in sorted(counts.items()):
print("Count {0}: {1}".format(i, percentage(n, total)), file=sys.stderr)
print(file=sys.stderr)
matches = sum(n for i, n in counts.items() if i != 0)
print("Total lines with matches: {0}".\
format(percentage(matches, total)), file=sys.stderr)
for i, n in sorted(counts.items()):
if i == 0:
continue
print("Count {0}: {1}".format(i, percentage(n, matches)), file=sys.stderr)
print(file=sys.stderr)
print("Orthologous matches: {0}".\
format(percentage(orthologous, matches)), file=sys.stderr)
atoms = row.rstrip().split("\t")
hits = [x for x in atoms[1:] if x != '.']
counts[len(hits)] += 1
total += 1
if atoms[1] != '.':
orthologous += 1
print("Total lines: {0}".format(total), file=sys.stderr)
for i, n in sorted(counts.items()):
print("Count {0}: {1}".format(i, percentage(n, total)), file=sys.stderr)
print(file=sys.stderr)
matches = sum(n for i, n in counts.items() if i != 0)
print("Total lines with matches: {0}".\
format(percentage(matches, total)), file=sys.stderr)
for i, n in sorted(counts.items()):
if i == 0:
continue
print("Count {0}: {1}".format(i, percentage(n, matches)), file=sys.stderr)
print(file=sys.stderr)
print("Orthologous matches: {0}".\
format(percentage(orthologous, matches)), file=sys.stderr)
for row in fp:
prog, pcounts, tcounts, shared = row.split()
pcounts = int(pcounts)
tcounts = int(tcounts)
shared = int(shared)
data.append((prog, pcounts, tcounts, shared))
xstart = 0
xwidth = 1. / len(data)
for prog, pcounts, tcounts, shared in data:
a, b, c = pcounts - shared, tcounts - shared, shared
ax = fig.add_axes([xstart + pad, ystart - ywidth + pad,
xwidth - 2 * pad, ywidth - 2 * pad])
venn2(subsets=(a, b, c), set_labels=(prog, tag), ax=ax)
message = "Sn={0} Pu={1}".\
format(percentage(shared, tcounts, precision=0, mode=-1),
percentage(shared, pcounts, precision=0, mode=-1))
print(message, file=sys.stderr)
ax.text(.5, .92, latex(message), ha="center", va="center",
transform=ax.transAxes, color='b')
ax.set_axis_off()
xstart += xwidth
ystart -= ywidth
panel_labels(root, ((.04, .96, "A"), (.04, .96 - ywidth, "B"),
(.04, .96 - 2 * ywidth, "C")))
panel_labels(root, ((.5, .98, "A. thaliana duplicates"),
(.5, .98 - ywidth, "14 Yeast genomes"),
(.5, .98 - 2 * ywidth, "4 Grass genomes")))
normalize_axes(root)
savefig("venn.pdf", dpi=opts.dpi)
continue
if achr == bchr:
dist = abs(apos - bpos)
if dist < minsize:
continue
db = distbin_number(dist)
B[db] += 1
abin, bbin = bin_number(achr, apos), bin_number(bchr, bpos)
A[abin, bbin] += 1
if abin != bbin:
A[bbin, abin] += 1
k += 1
logging.debug("Total reads counted: {}".format(percentage(2 * k, j)))
bamfile.close()
np.save(pf, A)
logging.debug("Link counts written to `{}.npy`".format(pf))
np.save(pf + ".dist", B)
logging.debug("Link dists written to `{}.dist.npy`".format(pf))
ids = set(gene_name(x) for x in ids)
bed = Bed(bedfile)
ntotal = nkeep = 0
for b in bed:
ntotal += 1
keep = b.accn in ids
if inverse:
keep = not keep
if keep:
nkeep += 1
print(b, file=fw)
fw.close()
logging.debug("Stats: {0} features kept.".\
format(percentage(nkeep, ntotal)))
table = {}
for tred, inheritance in zip(treds["abbreviation"], treds["inheritance"]):
x_linked = inheritance[0] == 'X' # X-linked
name = tred
if x_linked:
name += " (X-linked)"
print("[TRED] {}".format(name))
n_total = len(duos)
n_error = 0
for duo in duos:
n_error += duo.check_mendelian(df, tred, tolerance=tolerance,
x_linked=x_linked, verbose=verbose)
tag = "Duos - Mendelian errors"
print("{}: {}".format(tag, percentage(n_error, n_total)))
duo_error = percentage(n_error, n_total, mode=2)
table[(name, tag)] = "{0:.1f}%".format(duo_error)
n_total = len(trios)
n_error = 0
for trio in trios:
n_error += trio.check_mendelian(df, tred, tolerance=tolerance,
x_linked=x_linked, verbose=verbose)
tag = "Trios - Mendelian errors"
print("{}: {}".format(tag, percentage(n_error, n_total)))
trio_error = percentage(n_error, n_total, mode=2)
table[(name, tag)] = "{0:.1f}%".format(trio_error)
# Summarize
print(tabulate(table))
chainfile, oldfasta, newfasta = args
chain = Chain(chainfile)
ungapped, dt, dq = chain.ungapped, chain.dt, chain.dq
print("File `{0}` contains {1} chains.".\
format(chainfile, len(chain)), file=sys.stderr)
print("ungapped={0} dt={1} dq={2}".\
format(human_size(ungapped), human_size(dt), human_size(dq)), file=sys.stderr)
oldreal, oldnn, oldlen = fsummary([oldfasta, "--outfile=/dev/null"])
print("Old fasta (`{0}`) mapped: {1}".\
format(oldfasta, percentage(ungapped, oldreal)), file=sys.stderr)
newreal, newnn, newlen = fsummary([newfasta, "--outfile=/dev/null"])
print("New fasta (`{0}`) mapped: {1}".\
format(newfasta, percentage(ungapped, newreal)), file=sys.stderr)
def get_bins(self, function, remove_outliers):
s = defaultdict(list)
for m in self:
s[(m.mlg, m.seqid)].append(m)
if remove_outliers:
original = clean = 0
for pair, markers in s.items():
cm = self.remove_outliers(markers, function)
s[pair] = cm
original += len(markers)
clean += len(cm)
logging.debug("Retained {0} clean markers."\
.format(percentage(clean, original)))
return s
except IndexError:
print(row.strip(), file=fw)
continue
span = b.span
total.append(span)
if not minsize <= span <= maxsize:
continue
if minaccn and int(b.accn) < minaccn:
continue
if minscore and int(b.score) < minscore:
continue
print(b, file=fw)
keep.append(span)
logging.debug("Stats: {0} features kept.".\
format(percentage(len(keep), len(total))))
logging.debug("Stats: {0} bases kept.".\
format(percentage(sum(keep), sum(total))))