Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def call_snakemake(workdir, targets=None):
return snakemake.snakemake(
os.path.join(workdir, 'Snakefile'),
configfile=os.path.join(workdir, 'config.yaml'),
workdir=workdir,
dryrun=True,
targets=targets)
'parameters': {'dependencies': ['Task1']}},
{'task': 'SimpleParallelAnalysisTask',
'module': 'testtask',
'analysis_name': 'Task3',
'parameters': {'dependencies': ['Task2']}}
]}
generator = snakewriter.SnakefileGenerator(taskDict, simple_merfish_data)
workflow = generator.generate_workflow()
outputTask1 = simple_merfish_data.load_analysis_task('Task1')
outputTask2 = simple_merfish_data.load_analysis_task('Task2')
outputTask3 = simple_merfish_data.load_analysis_task('Task3')
assert not outputTask1.is_complete()
assert not outputTask2.is_complete()
assert not outputTask3.is_complete()
snakemake.snakemake(workflow)
assert outputTask1.is_complete()
assert outputTask2.is_complete()
assert outputTask3.is_complete()
shutil.rmtree('.snakemake')
def make_targets(rules, config, application, **kw):
TARGETS = []
all_versions = get_versions(config[application])
for r in rules:
p = {}
if not r.name.startswith(application.replace("-", "_")):
continue
for label, out in r.output.items():
if "{end}" in str(out):
p['end'] = config[application][r.name].get("_end", kw['end'])
if "{version}" in str(out):
versions = get_versions(config[application][r.name], all_versions)
p['version'] = versions
TMP = expand(out, **p)
TARGETS = TARGETS + list(TMP)
return TARGETS
all_files = []
for wc, filename in inputmap:
try:
wc = eval(wc)
except:
pass
wc = update_wildcard_constraints(wc, wildcard_constraints, {})
all_wc.append(wc)
if filename is None:
continue
if isinstance(filename, str):
filename = [filename]
all_files = all_files + filename
for f in all_files:
for wc in all_wc:
wildcards = glob_wildcards(wc, [os.path.basename(f)])
for k, v in wildcards._asdict().items():
if len(v) > 0:
d[k] = v[0]
except:
logger.debug("Failed to get wildcards for inputmap ", inputmap)
raise
return d
redundancy_threshold = get_value('redundancy_threshold')
window_size = get_value('window_size')
fragment_size = get_value('fragment_size')
effective_genome_fraction = get_value('effective_genome_fraction', 'reference_effective_genome_fraction')
gap_size = get_value('gap_size')
fdr = get_value('fdr')
genome_build = get_value('genome_build', 'reference_genome_build')
outdir, basebed = os.path.split(snakemake.output.bed)
label = snakemake.params.block['label']
tmpdir = tempfile.mkdtemp()
cwd = os.getcwd()
# SICER expects bed input format, not bam as in other peak callers
shell(
'bamToBed -i {snakemake.input.ip} > {tmpdir}/ip.bed ; '
'bamToBed -i {snakemake.input.control} > {tmpdir}/in.bed '
)
# SICER emits a single hard-coded file that does not respect output directory.
# So move each run into its own temp directory to avoid collisions with
# other processes.
os.chdir(tmpdir)
shell(
# there is a CI-specific bug, in which the python symlink is not correctly resolved to python2.7;
# so as a really desperate hack, modify SICER's python calls to directly touch 2.7
"""sed 's/^python/$CONDA_PREFIX\/bin\/python2.7/' """
"""$CONDA_PREFIX/share/sicer*/SICER.sh > {tmpdir}/SICER.sh && chmod u+x {tmpdir}/SICER.sh """
)
shell(
lock_file.close()
break
cmds = (
'macs2 predictd '
'-i {ip_bam} '
'{genome_count_flag} '
'-m {mfold_lower} {mfold_upper} '
'-f BAM '
)
shell(cmds + ' {log}')
inlog = log.split(' ')[-2]
d_estimate_temporary_file = tempfile.NamedTemporaryFile().name
cmds = ('awk \'/predicted fragment length is/ {{print "{mfold_lower} {mfold_upper} "$(NF-1)}}\' '
'< {inlog} > {d_estimate_temporary_file}')
shell(cmds)
with open(d_estimate_temporary_file, "r") as f:
lines = f.readlines()
# try to get access to d estimate file
with open(d_estimate_file, "a") as d_file:
# if there was less than one matching line from the awk extraction,
# it's because there was an MFOLD failure. Fall back to default extsize.
if len(lines) < 1:
d_file.write("{0} {1} {2}\n".format(mfold_lower, mfold_upper, d_estimate_default_value))
else:
d_file.write("{0} {1} {2}\n".format(mfold_lower, mfold_upper, lines[0].strip()))
cmds = ('awk \'/tag size is determined as/ {{print $(NF-1)}}\' < {inlog} > {read_length_file}')
shell(cmds)
shell("rm {0}".format(d_estimate_temporary_file))
# release the d estimate lock
fcntl.flock(lock_file, fcntl.LOCK_UN)
"-t", "--time", help="time limit")
slurm_parser.add_argument(
"--wrap", help="wrap command string in a sh script and submit")
slurm_parser.add_argument(
"-C", "--constraint", help="specify a list of constraints")
slurm_parser.add_argument(
"--mem", help="minimum amount of real memory")
args = parser.parse_args()
if args.help:
parser.print_help()
sys.exit(0)
jobscript = sys.argv[-1]
job_properties = read_job_properties(jobscript)
extras = ""
if args.positional:
for m in args.positional:
if m is not None:
extras = extras + " " + m
arg_dict = dict(args.__dict__)
# Process resources
if "resources" in job_properties:
resources = job_properties["resources"]
if arg_dict["time"] is None:
if "runtime" in resources:
arg_dict["time"] = resources["runtime"]
rule.missing_config.update(annotation.expand_variables(rule.full_name))
export_annotations[i] = (annotation, export_name)
plain_name, attr = annotation.split()
if not attr:
plain_annotations.add(plain_name)
else:
possible_plain_annotations.add(plain_name)
# Add plain annotations where needed
for a in possible_plain_annotations.difference(plain_annotations):
export_annotations.append((annotation_type(a), None))
for annotation, export_name in export_annotations:
if param.default.is_input:
if param_type == ExportAnnotationsAllDocs:
rule.inputs.extend(
expand(escape_wildcards(paths.annotation_dir / get_annotation_path(annotation.name)),
doc=get_source_files(storage.source_files)))
else:
rule.inputs.append(paths.annotation_dir / get_annotation_path(annotation.name))
rule.parameters[param_name].append((annotation, export_name))
# Corpus
elif param.annotation == Corpus:
rule.parameters[param_name] = Corpus(sparv_config.get("metadata.id"))
# Language
elif param.annotation == Language:
rule.parameters[param_name] = Language(sparv_config.get("metadata.language"))
# Document
elif param.annotation == Document:
rule.docs.append(param_name)
# AllDocuments (all source documents)
elif param_type == AllDocuments:
rule.parameters[param_name] = AllDocuments(get_source_files(storage.source_files))
report_html_tempfile = os.path.join(SCRIPT_PATH, "html", "scATAC_template.html")
report_html_temp = open(report_html_tempfile, "r").read()
# readdistrplot_link = '''"Plot/%s_scATAC_read_distr.png"'''%outpre
# fragplot_link = '''"Plot/%s_scATAC_fragment_size.png"'''%outpre
# mapplot_link = '''"Plot/%s_scATAC_mapping_summary.png"'''%outpre
# fripplot_link = '''"Plot/%s_scATAC_cell_filtering.png"'''%outpre
# peakcluster_link = '''"Plot/%s_cluster.png"'''%outpre
# rpannotate_link = '''"Plot/%s_annotated.png"'''%outpre
# bulkqc_file = "Result/QC/%s_bam_stat.txt"%outpre
cluster_regulator_file = "Result/Analysis/%s.PredictedTFTop10.txt"%outpre
fragplot_link = snakemake.report.data_uri_from_file("Result/QC/%s_scATAC_fragment_size.png"%outpre)[0]
# mapplot_link = snakemake.report.data_uri_from_file("Result/QC/%s_scATAC_mapping_summary.png"%outpre)[0]
fripplot_link = snakemake.report.data_uri_from_file("Result/QC/%s_scATAC_cell_filtering.png"%outpre)[0]
peakcluster_link = snakemake.report.data_uri_from_file("Result/Analysis/%s_cluster.png"%outpre)[0]
rpannotate_link = snakemake.report.data_uri_from_file("Result/Analysis/%s_annotated.png"%outpre)[0]
readdistrplot_link = snakemake.report.data_uri_from_file("Result/QC/%s_scATAC_read_distr.png"%outpre)[0]
# line_id = 0
# total,mapped,duplicate,mito,uniq,promoters = 0,0,0,0,0,0
# for line in open(bulkqc_file).readlines():
# line = line.strip().split(' ')
# line_id += 1
# if line_id == 1:
# total = int(line[0])
# if line_id == 5:
# mapped = int(line[0])
# if line_id == 4:
items_str_list = [" " + i + "" for i in items]
items_str = " \n" + "\n".join(items_str_list) + "\n "
td_list.append(items_str)
td_str = "\n".join(td_list)
#"totalreads":stat_list[0],"dupreads":stat_list[1],"mapreads":stat_list[2],"maptags":stat_list[3],"exontags":stat_list[4],"introntags":stat_list[5],
report_html_instance = report_html_temp % {"outprefix":outpre, "fastqdir":fastqdir, "species":species,"platform":platform, "readdistr":readdistrplot_link,"readqual":readqualplot_link, "nvc":nvcplot_link, "gc":gcplot_link, "genecov":genecovplot_link, "countgene":countgeneplot_link, "genecluster":genecluster_link, "geneannotate":geneannotate_link, "regtable":td_str}
elif platform == "10x-genomics":
report_html_tempfile = os.path.join(SCRIPT_PATH, "html", "scRNA_10x_noqc_template.html")
report_html_temp = open(report_html_tempfile, "r").read()
cluster_regulator_file = "Result/Analysis/%s.PredictedTFTop10.txt"%outpre
readdistrplot_link = snakemake.report.data_uri_from_file("Result/QC/%s_scRNA_read_distr.png"%outpre)[0]
countgeneplot_link = snakemake.report.data_uri_from_file("Result/QC/%s_scRNA_cell_filtering.png"%outpre)[0]
genecluster_link = snakemake.report.data_uri_from_file("Result/Analysis/%s_cluster.png"%outpre)[0]
geneannotate_link = snakemake.report.data_uri_from_file("Result/Analysis/%s_annotated.png"%outpre)[0]
td_list = []
for line in open(cluster_regulator_file,"r").readlines():
if not line.startswith("Cluster"):
items = line.strip().split("\t")
items_str_list = [" " + i + "" for i in items]
items_str = " \n" + "\n".join(items_str_list) + "\n "
td_list.append(items_str)
td_str = "\n".join(td_list)
#"totalreads":stat_list[0],"dupreads":stat_list[1],"mapreads":stat_list[2],"maptags":stat_list[3],"exontags":stat_list[4],"introntags":stat_list[5],
report_html_instance = report_html_temp % {"outprefix":outpre, "fastqdir":fastqdir, "species":species,"platform":platform, "readdistr":readdistrplot_link, "countgene":countgeneplot_link, "genecluster":genecluster_link, "geneannotate":geneannotate_link, "regtable":td_str}