Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@depends
def run_FastbAndQualb2Fastq(infile=None, outfile=None, rc=False):
corr = op.basename(infile).rsplit(".", 1)[0]
cmd = "FastbQualbToFastq HEAD_IN={0} HEAD_OUT={0}".format(corr)
cmd += " PAIRED=False PHRED_OFFSET=33"
if rc:
cmd += " FLIP=True"
sh(cmd)
p.add_option("-m", dest="multi", default=False, action="store_true",
help="report multi-matches [default: %default]")
opts, args = p.parse_args(args)
if len(args) != 1:
sys.exit(p.print_help())
casfile, = args
txtfile = casfile.replace(".cas", ".txt")
assert op.exists(casfile)
cmd = "assembly_table -n -s -p "
if opts.multi:
cmd += "-m "
cmd += casfile
sh(cmd, outfile=txtfile)
return txtfile
def check_index(dbfile, supercat=False, go=True):
if supercat:
updated = False
pf = dbfile.rsplit(".", 1)[0]
supercatfile = pf + ".supercat"
coordsfile = supercatfile + ".coords"
if go and need_update(dbfile, supercatfile):
cmd = "tGBS-Generate_Pseudo_Genome.pl"
cmd += " -f {0} -o {1}".format(dbfile, supercatfile)
sh(cmd)
# Rename .coords file since gmap_build will overwrite it
coordsbak = backup(coordsfile)
updated = True
dbfile = supercatfile + ".fasta"
#dbfile = get_abs_path(dbfile)
dbdir, filename = op.split(dbfile)
if not dbdir:
dbdir = "."
dbname = filename.rsplit(".", 1)[0]
safile = op.join(dbdir, "{0}/{0}.genomecomp".format(dbname))
if dbname == filename:
dbname = filename + ".db"
if not go:
return dbdir, dbname
pctid = opts.pctid
hitlen = opts.hitlen
filename, = args
if pctid == 0 and hitlen == 0:
return filename
pf, suffix = filename.rsplit(".", 1)
outfile = "".join((pf, ".P{0}L{1}.".format(int(pctid), int(hitlen)), suffix))
if not need_update(filename, outfile):
return outfile
if suffix == "delta":
cmd = "delta-filter -i {0} -l {1} {2}".format(pctid, hitlen, filename)
sh(cmd, outfile=outfile)
return outfile
fp = open(filename)
fw = must_open(outfile, "w")
for row in fp:
try:
c = CoordsLine(row)
except AssertionError:
continue
if c.identity < pctid:
continue
if c.len2 < hitlen:
continue
if opts.overlap and not c.overlap:
continue
opts, args = p.parse_args(args)
if len(args) not in (1, 2):
sys.exit(not p.print_help())
path = op.expanduser(opts.path)
url = \
"http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-{0}.zip"\
.format(tv)
if not op.exists(path):
path = download(url)
TrimUnzipped = "Trimmomatic-" + tv
if not op.exists(TrimUnzipped):
sh("unzip " + path)
os.remove(path)
path = op.join(TrimUnzipped, TrimJar)
assert op.exists(path), \
"Couldn't find Trimmomatic jar file at `{0}`".\
format(path)
adaptersfile = "adapters.fasta"
write_file(adaptersfile, Adapters, skipcheck=True)
assert op.exists(adaptersfile), \
"Please place the illumina adapter sequence in `{0}`".\
format(adaptersfile)
if opts.phred is None:
offset = guessoffset([args[0]])
"""
from jcvi.assembly.soap import prepare
logging.debug("Work on {0} ({1})".format(pf, ','.join(p)))
asm = "{0}.closed.scafSeq".format(pf)
if not need_update(p, asm):
logging.debug("Assembly found: {0}. Skipped.".format(asm))
return
slink(p, pf, tag, extra)
cwd = os.getcwd()
os.chdir(pf)
prepare(sorted(glob("*.fastq") + glob("*.fastq.gz")) + \
["--assemble_1st_rank_only", "-K 31"])
sh("./run.sh")
sh("cp asm31.closed.scafSeq ../{0}".format(asm))
logging.debug("Assembly finished: {0}".format(asm))
os.chdir(cwd)
mkdir(augdir)
os.chdir(augdir)
target = "{0}/config/species/{1}".format(mhome, species)
if op.exists(target):
logging.debug("Removing existing target `{0}`".format(target))
sh("rm -rf {0}".format(target))
config_path = "{0}/config".format(mhome)
sh("{0}/scripts/new_species.pl --species={1} --AUGUSTUS_CONFIG_PATH={2}".format(mhome, species, config_path))
sh("{0}/scripts/gff2gbSmallDNA.pl {1} {2} 1000 raw.gb".\
format(mhome, gffile, fastafile))
sh("{0}/bin/etraining --species={1} raw.gb 2> train.err".\
format(mhome, species))
sh("cat train.err | perl -pe 's/.*in sequence (\S+): .*/$1/' > badgenes.lst")
sh("{0}/scripts/filterGenes.pl badgenes.lst raw.gb > training.gb".\
format(mhome))
sh("grep -c LOCUS raw.gb training.gb")
# autoAugTrain failed to execute, disable for now
if opts.autotrain:
sh("rm -rf {0}".format(target))
sh("{0}/scripts/autoAugTrain.pl --trainingset=training.gb --species={1}".\
format(mhome, species))
os.chdir(cwd)
sh("cp -r {0} augustus/".format(target))
def run_formatdb(infile=None, outfile=None, dbtype="nucl"):
cmd = "makeblastdb"
cmd += " -dbtype {0} -in {1}".format(dbtype, infile)
sh(cmd)
if need_update(filtfastb, editfastb):
cmd = "FindErrors HEAD_IN={0} HEAD_OUT={1}".format(filt, edit)
cmd += " PLOIDY_FILE=data/ploidy"
cmd += nthreads
sh(cmd)
corr = datadir + "/{0}_corr".format(tag)
corrfastb = corr + ".fastb"
if need_update(editfastb, corrfastb):
cmd = "CleanCorrectedReads DELETE=True"
cmd += " HEAD_IN={0} HEAD_OUT={1}".format(edit, corr)
cmd += " PLOIDY_FILE={0}/ploidy".format(datadir)
if haploidify:
cmd += " HAPLOIDIFY=True"
cmd += nthreads
sh(cmd)
pf = op.basename(corr)
cwd = os.getcwd()
os.chdir(datadir)
corrfastq = pf + ".fastq"
run_FastbAndQualb2Fastq(infile=op.basename(corrfastb), outfile=corrfastq)
os.chdir(cwd)
pairsfile = pf + ".pairs"
fragsfastq = pf + ".corr.fastq"
run_pairs(infile=[op.join(datadir, pairsfile), op.join(datadir, corrfastq)],
outfile=fragsfastq)
def merylhistogram(merylfile):
"""
Run meryl to dump histogram to be used in kmer.histogram(). The merylfile
are the files ending in .mcidx or .mcdat.
"""
pf, sf = op.splitext(merylfile)
outfile = pf + ".histogram"
if need_update(merylfile, outfile):
cmd = "meryl -Dh -s {0}".format(pf)
sh(cmd, outfile=outfile)
return outfile