Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from snakemake.shell import shell
extra = snakemake.params.get("extra", "")
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
n = len(snakemake.input.sample)
assert n == 1 or n == 2, "input->sample must have 1 (single-end) or 2 (paired-end) elements."
if n == 1:
reads = "-U {}".format(*snakemake.input.sample)
else:
reads = "-1 {} -2 {}".format(*snakemake.input.sample)
shell(
"(bowtie2 --threads {snakemake.threads} {snakemake.params.extra} "
"-x {snakemake.params.index} {reads} "
if 'rdata' in snakemake.output.keys():
R_template += dedent("""
save.image("{snakemake.output.rdata}")
""")
# write the filled-in template to the output directory for later debugging
script_filename = snakemake.output.bed + '.R'
with open(script_filename, 'w') as fout:
fout.write(R_template.format(**locals()))
# Run it
shell('Rscript {script_filename} {log}')
# Fix the output file so that it doesn't have negative numbers and so it fits
# inside the genome
shell(
"""awk -F "\\t" '{{OFS="\\t"; print $1, "0", $2}}' """
"{snakemake.input.chromsizes} "
"> {snakemake.output.bed}.tmp.genome"
)
shell(
"sort -k1,1 -k2,2n {snakemake.output.bed}.tmp | "
"""awk -F "\\t" '{{OFS="\\t"; if (($2>0) && ($3>0)) print $0}}' | """
"bedtools intersect -a - -b {snakemake.output.bed}.tmp.genome > {snakemake.output.bed}"
)
# SPP's writewig() adds a header and is space-separated, so this turns it into
# a proper bedGraph file ready for conversion to bigwig.
if 'enrichment_estimates' in snakemake.output.keys():
shell('grep -v "track" {snakemake.output.enrichment_estimates} '
'| sed "s/ /\\t/g" > {snakemake.output.enrichment_estimates}.tmp '
'&& mv {snakemake.output.enrichment_estimates}.tmp '
shell('echo "tempfiles created by merge_and_dedup: {merged} {merged_and_deduped} {metrics}" {log}')
if not keep_tempfiles:
registered_for_deletion.extend([merged, merged_and_deduped, metrics])
bams = ' '.join(bams)
shell(
'samtools merge '
'-f '
'-@ {snakemake.threads} '
'{merged} '
'{bams} '
'{log} '
)
shell(
'picard '
'{java_args} '
'MarkDuplicates '
'INPUT={merged} '
'OUTPUT={merged_and_deduped} '
'METRICS_FILE={metrics} '
'REMOVE_DUPLICATES=true '
'{log} '
)
return merged_and_deduped
)
# Move output to the filenames specified by the rule
shell("cp {tempdir}/{prefix}_screen.txt {snakemake.output.txt}")
# Check for the output of the --tag option to fastq_screen
if os.path.isfile("{tempdir}/{prefix}.tagged.fastq.gz"):
shell("cp {tempdir}/{prefix}.tagged.fastq.gz {snakemake.output.txt}.tagged.fastq.gz")
# Check for the output of the --filter XXXXXX option to fastq_screen
if os.path.isfile("{tempdir}/{prefix}.tagged_filter.fastq.gz"):
shell("cp {tempdir}/{prefix}.tagged_filter.fastq.gz {snakemake.output.txt}.tagged_filter.fastq.gz")
# Clean up temp
shell("rm -r {tempdir}")
shell("rm {tmp}")
# See snakemake.script.log_fmt_shell for details.
if snakemake.log:
snakemake.log = os.path.realpath(str(snakemake.log))
log = snakemake.log_fmt_shell(stdout=False)
# Get directories that I need to move between
cwd = os.getcwd()
tmpdir = gettempdir()
# Copy files over to ease I/O on filesystem.
bam = NamedTemporaryFile(suffix='.bam').name
bed = NamedTemporaryFile(suffix='.bed').name
name = bam.rstrip('.bam')
shell(
'cp {snakemake.input.bam} {bam} '
'&& cp {snakemake.input.bed} {bed}')
os.chdir(tmpdir)
shell(
'infer_experiment.py '
'-i {bam} '
'-r {bed} '
'{extra} '
'> {name}.txt '
'{log}')
# Cleanup 1
shell(
'rm {bam} '
'&& rm {bed}')
import os, sys
sys.path.append('../../')
from textwrap import dedent
import tempfile
from snakemake.shell import shell
from lib import utils
log = snakemake.log_fmt_shell(append=True)
# Since we'll be appending the output from multiple commands to the same log,
# we want to ensure that the provided log file is empty to start
if snakemake.log:
shell('cat /dev/null > {snakemake.log}')
java_args = snakemake.params.get('java_args', '')
samtools_merge_extra = snakemake.params.get('samtools_merge_extra', '')
markduplicates_extra = snakemake.params.get('markduplicates_extra', '')
if len(snakemake.input) == 1:
utils.make_relative_symlink(snakemake.input[0], snakemake.output.bam)
shell('touch {snakemake.output.metrics}')
else:
merged = tempfile.NamedTemporaryFile(delete=False, prefix='merged', suffix='.bam').name
merged_and_deduped = snakemake.output.bam
if 'metrics' in snakemake.output.keys():
metrics = snakemake.output.metrics
else:
# inside the genome
shell(
"""awk -F "\\t" '{{OFS="\\t"; print $1, "0", $2}}' """
"{snakemake.input.chromsizes} "
"> {snakemake.output.bed}.tmp.genome"
)
shell(
"sort -k1,1 -k2,2n {snakemake.output.bed}.tmp | "
"""awk -F "\\t" '{{OFS="\\t"; if (($2>0) && ($3>0)) print $0}}' | """
"bedtools intersect -a - -b {snakemake.output.bed}.tmp.genome > {snakemake.output.bed}"
)
# SPP's writewig() adds a header and is space-separated, so this turns it into
# a proper bedGraph file ready for conversion to bigwig.
if 'enrichment_estimates' in snakemake.output.keys():
shell('grep -v "track" {snakemake.output.enrichment_estimates} '
'| sed "s/ /\\t/g" > {snakemake.output.enrichment_estimates}.tmp '
'&& mv {snakemake.output.enrichment_estimates}.tmp '
'{snakemake.output.enrichment_estimates}')
if 'smoothed_enrichment_mle' in snakemake.output.keys():
shell('grep -v "track" {snakemake.output.smoothed_enrichment_mle} '
'| sed "s/ /\\t/g" > {snakemake.output.smoothed_enrichment_mle}.tmp '
'&& mv {snakemake.output.smoothed_enrichment_mle}.tmp '
'{snakemake.output.smoothed_enrichment_mle}')
for fn in registered_for_deletion:
shell('rm -v {fn} {log}')
# as described in the docstring above, functions are to assume a list of
# urls
urls = block['url']
if isinstance(urls, str):
urls = [urls]
# Download tempfiles into reasonably-named filenames
tmpfiles = ['{0}.{1}.tmp'.format(outfile, i) for i in range(len(urls))]
tmpinputfiles = tmpfiles
try:
for url, tmpfile in zip(urls, tmpfiles):
if url.startswith('file:'):
url = url.replace('file://', '')
shell('cp {url} {tmpfile} 2> {outfile}.log')
else:
shell("wget {url} -O- > {tmpfile} 2> {outfile}.log")
for func, args, kwargs, outfile in funcs:
func(tmpinputfiles, outfile, *args, **kwargs)
tmpinputfiles = [outfile]
except Exception as e:
raise e
finally:
for i in tmpfiles + func_tmpfiles:
if os.path.exists(i):
shell('rm {i}')
def fasta_postprocess(origfn, newfn):
"""
The fasta from UCSC comes as a tarball of fastas. So we extract them all to
a temp directory and then cat them all together into the final fa.gz file.
"""
assert (
(isinstance(origfn, list)) and (len(origfn) == 1)
), 'unexpected input: %s' % origfn
origfn = origfn[0]
t = tarfile.open(origfn)
shell('mkdir -p {origfn}.tmp')
t.extractall(origfn + '.tmp')
with gzip.open(newfn, 'wt') as fout:
for fa in sorted(glob.glob(origfn + '.tmp/*.fa')):
print(fa)
fout.write(open(fa).read())
shell('rm -r {origfn}.tmp')