Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_mummer_single(tmp_path, path_file_two):
"""Generate single NUCmer/delta-filter command-line."""
cmds = anim.construct_nucmer_cmdline(
path_file_two[0], path_file_two[1], outdir=tmp_path
)
dir_nucmer = tmp_path / "nucmer_output"
expected = (
(
"nucmer --mum -p "
f"{dir_nucmer / str(path_file_two[0].stem + '_vs_' + path_file_two[1].stem)} "
f"{path_file_two[0]} {path_file_two[1]}"
),
(
"delta_filter_wrapper.py delta-filter -1 "
f"{dir_nucmer / str(path_file_two[0].stem + '_vs_' + path_file_two[1].stem + '.delta')} "
f"{dir_nucmer / str(path_file_two[0].stem + '_vs_' + path_file_two[1].stem + '.filter')}"
),
)
assert cmds == expected
def test_anim_pairwise_basic(self):
"""Test generation of basic NUCmer pairwise comparison command."""
cmd_nucmer, cmd_filter = anim.construct_nucmer_cmdline(
Path("file1.fna"), Path("file2.fna")
)
tgt_nucmer = "nucmer --mum -p nucmer_output/file1_vs_file2 file1.fna file2.fna"
tgt_filter = "delta_filter_wrapper.py delta-filter -1 nucmer_output/file1_vs_file2.delta nucmer_output/file1_vs_file2.filter"
self.assertEqual(cmd_nucmer, tgt_nucmer)
self.assertEqual(cmd_filter, tgt_filter)
def test_anim_pairwise_maxmatch(self):
"""Test generation of NUCmer pairwise comparison command with maxmatch."""
cmd_nucmer, cmd_filter = anim.construct_nucmer_cmdline(
Path("file1.fna"), Path("file2.fna"), maxmatch=True
)
tgt_nucmer = (
"nucmer --maxmatch -p nucmer_output/file1_vs_file2 file1.fna file2.fna"
)
tgt_filter = "delta_filter_wrapper.py delta-filter -1 nucmer_output/file1_vs_file2.delta nucmer_output/file1_vs_file2.filter"
self.assertEqual(cmd_nucmer, tgt_nucmer)
self.assertEqual(cmd_filter, tgt_filter)
def test_maxmatch_single(tmp_path, path_file_two):
"""Generate NUCmer command line with maxmatch."""
ncmd, _ = anim.construct_nucmer_cmdline(
path_file_two[0], path_file_two[1], outdir=tmp_path, maxmatch=True
)
dir_nucmer = tmp_path / "nucmer_output"
expected = (
"nucmer --maxmatch -p "
f"{dir_nucmer / str(path_file_two[0].stem + '_vs_' + path_file_two[1].stem)} "
f"{path_file_two[0]} {path_file_two[1]}"
)
assert ncmd == expected
existingfiles: List[Path],
args: Namespace,
logger: Logger,
) -> List[ComparisonJob]:
"""Return list of ComparisonJobs.
:param comparisons: list of (Genome, Genome) tuples
:param existingfiles: list of pre-existing nucmer output files
:param args: Namespace of command-line arguments for the run
:param logger: logging object
"""
joblist = [] # will hold ComparisonJob structs
for idx, (query, subject) in enumerate(
tqdm(comparisons, disable=args.disable_tqdm)
):
ncmd, dcmd = anim.construct_nucmer_cmdline(
query.path,
subject.path,
args.outdir,
args.nucmer_exe,
args.filter_exe,
args.maxmatch,
)
logger.debug("Commands to run:\n\t%s\n\t%s", ncmd, dcmd)
outprefix = ncmd.split()[3] # prefix for NUCmer output
if args.nofilter:
outfname = Path(outprefix + ".delta")
else:
outfname = Path(outprefix + ".filter")
logger.debug("Expected output file for db: %s", outfname)
# If we're in recovery mode, we don't want to repeat a computational
logger.debug("Comparisons still to be performed:\n\t%s", comparison_ids)
logger.info("Total comparisons to be conducted: %d", len(comparison_ids))
if not len(comparison_ids):
logger.info("All comparison results already present in database " +
"(skipping comparisons)")
else:
# Create list of NUCmer jobs for each comparison still to be
# performed
logger.info("Creating NUCmer jobs for ANIm")
joblist, comparisons = [], []
jobprefix = "ANINUCmer"
for idx, (qid, sid) in enumerate(tqdm(comparison_ids)):
qpath = pyani_db.get_genome_path(args.dbpath, qid)
spath = pyani_db.get_genome_path(args.dbpath, sid)
ncmd, dcmd = anim.construct_nucmer_cmdline(qpath, spath,
args.outdir,
args.nucmer_exe,
args.filter_exe,
args.maxmatch)
logger.debug("Commands to run:\n\t%s\n\t%s", ncmd, dcmd)
outprefix = ncmd.split()[3] # prefix for NUCmer output
if args.nofilter:
outfname = outprefix + '.delta'
else:
outfname = outprefix + '.filter'
logger.debug("Expected output file for db: %s", outfname)
# If we're in recovery mode, we don't want to repeat a computational
# comparison that already exists, so we check whether the ultimate
# output is in the set of existing files and, if not, we add the jobs
# TODO: something faster than a list search (dict or set?)