Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
tolerance_anib_hi,
fragment_length,
tmp_path,
):
"""Check ANIblastall results are concordant with JSpecies."""
# Get lengths of input genomes
orglengths = pyani_files.get_sequence_lengths(paths_concordance_fna)
# Perform ANIblastall on the input directory contents
fragfiles, fraglengths = anib.fragment_fasta_files(
paths_concordance_fna, tmp_path, fragment_length
)
jobgraph = anib.make_job_graph(
paths_concordance_fna,
fragfiles,
anib.make_blastcmd_builder("ANIblastall", tmp_path),
)
assert 0 == run_mp.run_dependency_graph(jobgraph) # Jobs must run correctly
# Process BLAST output
result_pid = anib.process_blast(
tmp_path, orglengths, fraglengths, mode="ANIblastall"
).percentage_identity
# Compare JSpecies output to results
result_pid = (result_pid.sort_index(axis=0).sort_index(axis=1) * 100.0).values
tgt_pid = parse_jspecies(path_concordance_jspecies)["ANIb"].values
assert result_pid - tgt_pid == pytest.approx(0, abs=tolerance_anib_hi)
def test_dependency_graph_run(self):
"""Test that module runs dependency graph."""
fragresult = anib.fragment_fasta_files(self.infiles, self.outdir, self.fraglen)
blastcmds = anib.make_blastcmd_builder("ANIb", self.outdir)
jobgraph = anib.make_job_graph(self.infiles, fragresult[0], blastcmds)
result = run_multiprocessing.run_dependency_graph(jobgraph)
self.assertEqual(0, result)
def test_blastn_dbjobdict(path_fna_all, tmp_path):
"""Generate dictionary of BLASTN+ database jobs."""
blastcmds = anib.make_blastcmd_builder("ANIb", tmp_path)
jobdict = anib.build_db_jobs(path_fna_all, blastcmds)
expected = [
(
tmp_path / _.name,
f"makeblastdb -dbtype nucl -in {_} -title {_.stem} -out {tmp_path / _.name}",
)
for _ in path_fna_all
]
assert sorted([(k, v.script) for (k, v) in jobdict.items()]) == sorted(expected)
def test_blastall_graph(path_fna_all, tmp_path, fragment_length):
"""Create jobgraph for legacy BLASTN jobs."""
fragresult = anib.fragment_fasta_files(path_fna_all, tmp_path, fragment_length)
blastcmds = anib.make_blastcmd_builder("ANIblastall", tmp_path)
jobgraph = anib.make_job_graph(path_fna_all, fragresult[0], blastcmds)
# We check that the main script job is a blastn job, and that there
# is a single dependency, which is a makeblastdb job
for job in jobgraph:
assert job.script.startswith("blastall -p blastn")
assert len(job.dependencies) == 1
assert job.dependencies[0].script.startswith("formatdb")
def test_blastall_dbjobdict(path_fna_all, tmp_path):
"""Generate dictionary of legacy BLASTN database jobs."""
blastcmds = anib.make_blastcmd_builder("ANIblastall", tmp_path)
jobdict = anib.build_db_jobs(path_fna_all, blastcmds)
expected = [
(tmp_path / _.name, f"formatdb -p F -i {tmp_path / _.name} -t {_.stem}")
for _ in path_fna_all
]
assert sorted([(k, v.script) for (k, v) in jobdict.items()]) == sorted(expected)
):
"""Check ANIb results are concordant with JSpecies.
We expect ANIb results to be quite different, as the BLASTN
algorithm changed substantially between BLAST and BLAST+ (the
megaBLAST algorithm is now the default for BLASTN)
"""
# Get lengths of input genomes
orglengths = pyani_files.get_sequence_lengths(paths_concordance_fna)
# Build and run BLAST jobs
fragfiles, fraglengths = anib.fragment_fasta_files(
paths_concordance_fna, tmp_path, fragment_length
)
jobgraph = anib.make_job_graph(
paths_concordance_fna, fragfiles, anib.make_blastcmd_builder("ANIb", tmp_path)
)
assert 0 == run_mp.run_dependency_graph(jobgraph) # Jobs must run correctly
# Process BLAST output
result_pid = anib.process_blast(
tmp_path, orglengths, fraglengths, mode="ANIb"
).percentage_identity
# Compare JSpecies output to results. We do this in two blocks,
# masked according to whether the expected result is greater than
# a threshold separating "low" from "high" identity comparisons.
result_pid = result_pid.sort_index(axis=0).sort_index(axis=1) * 100.0
lo_result = result_pid.mask(result_pid >= threshold_anib_lo_hi).fillna(0).values
hi_result = result_pid.mask(result_pid < threshold_anib_lo_hi).fillna(0).values
tgt_pid = parse_jspecies(path_concordance_jspecies)["ANIb"]
def test_blastn_graph(path_fna_all, tmp_path, fragment_length):
"""Create jobgraph for BLASTN+ jobs."""
fragresult = anib.fragment_fasta_files(path_fna_all, tmp_path, fragment_length)
blastcmds = anib.make_blastcmd_builder("ANIb", tmp_path)
jobgraph = anib.make_job_graph(path_fna_all, fragresult[0], blastcmds)
# We check that the main script job is a blastn job, and that there
# is a single dependency, which is a makeblastdb job
for job in jobgraph:
assert job.script.startswith("blastn")
assert len(job.dependencies) == 1
assert job.dependencies[0].script.startswith("makeblastdb")
:param blastdir: path of directory to fragment BLASTN databases
Runs BLAST database creation and comparisons, returning the cumulative
return values of the BLAST tool subprocesses, and the fragment sizes for
each input file
"""
if not args.skip_blastn:
logger.info("Fragmenting input files, and writing to %s", args.outdirname)
fragfiles, fraglengths = make_sequence_fragments(
args, logger, infiles, blastdir
)
# Run BLAST database-building and executables from a jobgraph
logger.info("Creating job dependency graph")
jobgraph = anib.make_job_graph(
infiles, fragfiles, anib.make_blastcmd_builder(args.method, blastdir)
)
if args.scheduler == "multiprocessing":
logger.info("Running dependency graph with multiprocessing")
cumval = run_mp.run_dependency_graph(jobgraph, logger=logger)
if cumval > 0:
logger.warning(
f"At least one BLAST run failed. {args.method} may fail. Please investigate."
)
else:
logger.info("All multiprocessing jobs complete.")
elif args.scheduler == "SGE":
logger.info("Running dependency graph with SGE")
run_sge.run_dependency_graph(jobgraph, logger=logger)
else:
logger.error(f"Scheduler {args.scheduler} not recognised (exiting)")
raise SystemError(1)