Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_blastall_graph(path_fna_all, tmp_path, fragment_length):
"""Create jobgraph for legacy BLASTN jobs."""
fragresult = anib.fragment_fasta_files(path_fna_all, tmp_path, fragment_length)
blastcmds = anib.make_blastcmd_builder("ANIblastall", tmp_path)
jobgraph = anib.make_job_graph(path_fna_all, fragresult[0], blastcmds)
# We check that the main script job is a blastn job, and that there
# is a single dependency, which is a makeblastdb job
for job in jobgraph:
assert job.script.startswith("blastall -p blastn")
assert len(job.dependencies) == 1
assert job.dependencies[0].script.startswith("formatdb")
def test_aniblastall_concordance(
paths_concordance_fna,
path_concordance_jspecies,
tolerance_anib_hi,
fragment_length,
tmp_path,
):
"""Check ANIblastall results are concordant with JSpecies."""
# Get lengths of input genomes
orglengths = pyani_files.get_sequence_lengths(paths_concordance_fna)
# Perform ANIblastall on the input directory contents
fragfiles, fraglengths = anib.fragment_fasta_files(
paths_concordance_fna, tmp_path, fragment_length
)
jobgraph = anib.make_job_graph(
paths_concordance_fna,
fragfiles,
anib.make_blastcmd_builder("ANIblastall", tmp_path),
)
assert 0 == run_mp.run_dependency_graph(jobgraph) # Jobs must run correctly
# Process BLAST output
result_pid = anib.process_blast(
tmp_path, orglengths, fraglengths, mode="ANIblastall"
).percentage_identity
# Compare JSpecies output to results
result_pid = (result_pid.sort_index(axis=0).sort_index(axis=1) * 100.0).values
def test_blastn_graph(path_fna_all, tmp_path, fragment_length):
"""Create jobgraph for BLASTN+ jobs."""
fragresult = anib.fragment_fasta_files(path_fna_all, tmp_path, fragment_length)
blastcmds = anib.make_blastcmd_builder("ANIb", tmp_path)
jobgraph = anib.make_job_graph(path_fna_all, fragresult[0], blastcmds)
# We check that the main script job is a blastn job, and that there
# is a single dependency, which is a makeblastdb job
for job in jobgraph:
assert job.script.startswith("blastn")
assert len(job.dependencies) == 1
assert job.dependencies[0].script.startswith("makeblastdb")
def test_fragment_files(path_fna_all, tmp_path, dir_tgt_fragments, fragment_length):
"""Fragment files for ANIb/ANIblastall."""
result = anib.fragment_fasta_files(path_fna_all, tmp_path, fragment_length)
# Test that files are created
for outfname in result[0]:
assert outfname.is_file()
# Test that file contents are as expected
for fname in tmp_path.iterdir():
with fname.open("r") as ofh:
with (dir_tgt_fragments / fname.name).open("r") as tfh:
assert ofh.read() == tfh.read()
# # Test fragment lengths are in bounds
for _, fragdict in result[-1].items():
for _, fraglen in fragdict.items():
assert fraglen <= fragment_length
tolerance_anib_lo,
threshold_anib_lo_hi,
fragment_length,
tmp_path,
):
"""Check ANIb results are concordant with JSpecies.
We expect ANIb results to be quite different, as the BLASTN
algorithm changed substantially between BLAST and BLAST+ (the
megaBLAST algorithm is now the default for BLASTN)
"""
# Get lengths of input genomes
orglengths = pyani_files.get_sequence_lengths(paths_concordance_fna)
# Build and run BLAST jobs
fragfiles, fraglengths = anib.fragment_fasta_files(
paths_concordance_fna, tmp_path, fragment_length
)
jobgraph = anib.make_job_graph(
paths_concordance_fna, fragfiles, anib.make_blastcmd_builder("ANIb", tmp_path)
)
assert 0 == run_mp.run_dependency_graph(jobgraph) # Jobs must run correctly
# Process BLAST output
result_pid = anib.process_blast(
tmp_path, orglengths, fraglengths, mode="ANIb"
).percentage_identity
# Compare JSpecies output to results. We do this in two blocks,
# masked according to whether the expected result is greater than
# a threshold separating "low" from "high" identity comparisons.
result_pid = result_pid.sort_index(axis=0).sort_index(axis=1) * 100.0