Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def stats(args):
"""
%prog stats agpfile
Print out a report for length of gaps and components.
"""
from jcvi.utils.table import tabulate
p = OptionParser(stats.__doc__)
p.add_option("--warn", default=False, action="store_true",
help="Warnings on small component spans [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(p.print_help())
agpfile, = args
agp = AGP(agpfile)
gap_lengths = []
component_lengths = []
for a in agp:
span = a.object_span
if a.is_gap:
label = a.gap_type
def bed(args):
"""
%prog bed anchorsfile
Convert ANCHORS file to BED format.
"""
from collections import defaultdict
from jcvi.compara.synteny import AnchorFile, check_beds
from jcvi.formats.bed import Bed
from jcvi.formats.base import get_number
p = OptionParser(bed.__doc__)
p.add_option("--switch", default=False, action="store_true",
help="Switch reference and aligned map elements")
p.add_option("--scale", type="float",
help="Scale the aligned map distance by factor")
p.set_beds()
p.set_outfile()
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
anchorsfile, = args
switch = opts.switch
scale = opts.scale
ac = AnchorFile(anchorsfile)
pairs = defaultdict(list)
def csv(args):
"""
%prog csv excelfile
Convert EXCEL to csv file.
"""
from xlrd import open_workbook
p = OptionParser(csv.__doc__)
p.set_sep(sep=',')
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
excelfile, = args
sep = opts.sep
csvfile = excelfile.rsplit(".", 1)[0] + ".csv"
wb = open_workbook(excelfile)
fw = open(csvfile, "w")
for s in wb.sheets():
print('Sheet:',s.name, file=sys.stderr)
for row in range(s.nrows):
values = []
for col in range(s.ncols):
def hetsmooth(args):
"""
%prog hetsmooth reads_1.fq reads_2.fq jf-23_0
Wrapper against het-smooth. Below is the command used in het-smooth manual.
$ het-smooth --kmer-len=23 --bottom-threshold=38 --top-threshold=220
--no-multibase-replacements --jellyfish-hash-file=23-mers.jf
reads_1.fq reads_2.fq
"""
p = OptionParser(hetsmooth.__doc__)
p.add_option("-K", default=23, type="int",
help="K-mer size [default: %default]")
p.add_option("-L", type="int",
help="Bottom threshold, first min [default: %default]")
p.add_option("-U", type="int",
help="Top threshold, second min [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 3:
sys.exit(not p.print_help())
reads1fq, reads2fq, jfdb = args
K = opts.K
L = opts.L
U = opts.U
def coverage(args):
"""
%prog coverage coordsfile
Report the coverage per query record, useful to see which query matches
reference. The coords file MUST be filtered with supermap::
jcvi.algorithms.supermap --filter query
"""
p = OptionParser(coverage.__doc__)
p.add_option("-c", dest="cutoff", default=0.5, type="float",
help="only report query with coverage greater than [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(not p.print_help())
coordsfile, = args
fp = open(coordsfile)
coords = []
for row in fp:
try:
c = CoordsLine(row)
except AssertionError:
def bam(args):
"""
%prog snp input.gsnap ref.fasta
Convert GSNAP output to BAM.
"""
from jcvi.formats.sizes import Sizes
from jcvi.formats.sam import index
p = OptionParser(bam.__doc__)
p.set_home("eddyyeh")
p.set_cpus()
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
gsnapfile, fastafile = args
EYHOME = opts.eddyyeh_home
pf = gsnapfile.rsplit(".", 1)[0]
uniqsam = pf + ".unique.sam"
samstats = uniqsam + ".stats"
sizesfile = Sizes(fastafile).filename
if need_update((gsnapfile, sizesfile), samstats):
cmd = op.join(EYHOME, "gsnap2gff3.pl")
cmd += " --format sam -i {0} -o {1}".format(gsnapfile, uniqsam)
def bin(args):
"""
%prog bin filename filename.bin
Serialize counts to bitarrays.
"""
from bitarray import bitarray
p = OptionParser(bin.__doc__)
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
inp, outp = args
fp = must_open(inp)
fw = must_open(outp, "w")
a = bitarray()
for row in fp:
c = row.split()[-1]
a.append(int(c))
a.tofile(fw)
fw.close()
def density(args):
"""
%prog density bedfile ref.fasta
Calculates density of features per seqid.
"""
p = OptionParser(density.__doc__)
opts, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())
bedfile, fastafile = args
bed = Bed(bedfile)
sizes = Sizes(fastafile).mapping
header = "seqid features size density_per_Mb".split()
print("\t".join(header))
for seqid, bb in bed.sub_beds():
nfeats = len(bb)
size = sizes[seqid]
ds = nfeats * 1e6 / size
print("\t".join(str(x) for x in \
(seqid, nfeats, size, "{0:.1f}".format(ds))))
def bundle(args):
"""
%prog bundle linkfiles
Bundle contig links into high confidence contig edges. This is useful to
combine multiple linkfiles (from different libraries).
"""
import numpy as np
p = OptionParser(bundle.__doc__)
p.add_option("--links", type="int", default=1,
help="Minimum number of mate pairs to bundle [default: %default]")
opts, args = p.parse_args(args)
if len(args) < 1:
sys.exit(not p.print_help())
fp = must_open(args)
contigGraph = defaultdict(list)
for row in fp:
c = LinkLine(row)
contigGraph[(c.aseqid, c.bseqid)].append((c.orientation, c.distance))
for (aseqid, bseqid), distances in contigGraph.items():
# For the same pair of contigs, their might be conflicting orientations
# or distances. Only keep the one with the most pairs.
@depends
def run_FastbAndQualb2Fastq(infile=None, outfile=None, rc=False):
corr = op.basename(infile).rsplit(".", 1)[0]
cmd = "FastbQualbToFastq HEAD_IN={0} HEAD_OUT={0}".format(corr)
cmd += " PAIRED=False PHRED_OFFSET=33"
if rc:
cmd += " FLIP=True"
sh(cmd)