Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# jobgroup generation in the anim.py module. That's a TODO.
ncmds, fcmds = anim.generate_nucmer_commands(paths_concordance_fna, tmp_path)
(tmp_path / "nucmer_output").mkdir(exist_ok=True, parents=True)
run_mp.multiprocessing_run(ncmds)
# delta-filter commands need to be treated with care for
# Travis-CI. Our cluster won't take redirection or semicolon
# separation in individual commands, but the wrapper we wrote
# for this (delta_filter_wrapper.py) can't be called under
# Travis-CI. So we must deconstruct the commands below
dfcmds = [
" > ".join([" ".join(fcmd.split()[1:-1]), fcmd.split()[-1]]) for fcmd in fcmds
]
run_mp.multiprocessing_run(dfcmds)
orglengths = pyani_files.get_sequence_lengths(paths_concordance_fna)
results = anim.process_deltadir(tmp_path / "nucmer_output", orglengths)
result_pid = results.percentage_identity
result_pid.to_csv(tmp_path / "pyani_anim.tab", sep="\t")
# Compare JSpecies output to results
result_pid = (result_pid.sort_index(axis=0).sort_index(axis=1) * 100.0).values
tgt_pid = parse_jspecies(path_concordance_jspecies)["ANIm"].values
assert result_pid - tgt_pid == pytest.approx(0, abs=tolerance_anim)
def test_aniblastall_concordance(
paths_concordance_fna,
path_concordance_jspecies,
tolerance_anib_hi,
fragment_length,
tmp_path,
):
"""Check ANIblastall results are concordant with JSpecies."""
# Get lengths of input genomes
orglengths = pyani_files.get_sequence_lengths(paths_concordance_fna)
# Perform ANIblastall on the input directory contents
fragfiles, fraglengths = anib.fragment_fasta_files(
paths_concordance_fna, tmp_path, fragment_length
)
jobgraph = anib.make_job_graph(
paths_concordance_fna,
fragfiles,
anib.make_blastcmd_builder("ANIblastall", tmp_path),
)
assert 0 == run_mp.run_dependency_graph(jobgraph) # Jobs must run correctly
# Process BLAST output
result_pid = anib.process_blast(
tmp_path, orglengths, fraglengths, mode="ANIblastall"
).percentage_identity
def test_parse_legacy_blastdir(anib_output_dir):
"""Parses directory of legacy BLAST output."""
orglengths = pyani_files.get_sequence_lengths(anib_output_dir.infiles)
fraglengths = anib.get_fraglength_dict(anib_output_dir.fragfiles)
result = anib.process_blast(
anib_output_dir.legacyblastdir, orglengths, fraglengths, mode="ANIblastall"
)
assert_frame_equal(
result.percentage_identity.sort_index(1).sort_index(),
anib_output_dir.legacyblastresult.sort_index(1).sort_index(),
)
paths_concordance_fna,
path_concordance_jspecies,
tolerance_anib_hi,
tolerance_anib_lo,
threshold_anib_lo_hi,
fragment_length,
tmp_path,
):
"""Check ANIb results are concordant with JSpecies.
We expect ANIb results to be quite different, as the BLASTN
algorithm changed substantially between BLAST and BLAST+ (the
megaBLAST algorithm is now the default for BLASTN)
"""
# Get lengths of input genomes
orglengths = pyani_files.get_sequence_lengths(paths_concordance_fna)
# Build and run BLAST jobs
fragfiles, fraglengths = anib.fragment_fasta_files(
paths_concordance_fna, tmp_path, fragment_length
)
jobgraph = anib.make_job_graph(
paths_concordance_fna, fragfiles, anib.make_blastcmd_builder("ANIb", tmp_path)
)
assert 0 == run_mp.run_dependency_graph(jobgraph) # Jobs must run correctly
# Process BLAST output
result_pid = anib.process_blast(
tmp_path, orglengths, fraglengths, mode="ANIb"
).percentage_identity
# Compare JSpecies output to results. We do this in two blocks,
def test_deltadir_parsing(delta_output_dir):
"""Process test directory of .delta files into ANIResults."""
seqfiles = pyani_files.get_fasta_files(delta_output_dir.seqdir)
orglengths = pyani_files.get_sequence_lengths(seqfiles)
result = anim.process_deltadir(delta_output_dir.deltadir, orglengths)
assert_frame_equal(
result.percentage_identity.sort_index(1).sort_index(),
delta_output_dir.deltaresult.sort_index(1).sort_index(),
)
def test_parse_blastdir(anib_output_dir):
"""Parse directory of BLAST+ output."""
orglengths = pyani_files.get_sequence_lengths(anib_output_dir.infiles)
fraglengths = anib.get_fraglength_dict(anib_output_dir.fragfiles)
result = anib.process_blast(
anib_output_dir.blastdir, orglengths, fraglengths, mode="ANIb"
)
assert_frame_equal(
result.percentage_identity.sort_index(1).sort_index(),
anib_output_dir.blastresult.sort_index(1).sort_index(),
)
else:
# Run ANI comparisons
logger.info("Identifying FASTA files in %s", args.indirname)
infiles = pyani_files.get_fasta_files(args.indirname)
logger.info("Input files:\n\t%s", "\n\t".join([str(_) for _ in infiles]))
# Are we subsampling? If so, make the selection here
if args.subsample:
infiles = subsample_input(args, logger, infiles)
logger.info(
"Sampled input files:\n\t%s", "\n\t".join([str(_) for _ in infiles])
)
# Get lengths of input sequences
logger.info("Processing input sequence lengths")
org_lengths = pyani_files.get_sequence_lengths(infiles)
seqlens = os.linesep.join(
["\t%s: %d" % (k, v) for k, v in list(org_lengths.items())]
)
logger.info("Sequence lengths:\n%s", seqlens)
# Run appropriate method on the contents of the input directory,
# and write out corresponding results.
logger.info("Carrying out %s analysis", args.method)
if args.method == "TETRA":
results = method_function(args, logger, infiles)
else:
results = method_function(args, logger, infiles, org_lengths)
write(args, logger, results)
# Do we want graphical output?
if args.graphics or args.rerender: