How to use the pyani.anib.make_blastcmd_builder function in pyani

To help you get started, we’ve selected a few pyani examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github widdowquinn / pyani / tests / test_concordance.py View on Github external
tolerance_anib_hi,
    fragment_length,
    tmp_path,
):
    """Check ANIblastall results are concordant with JSpecies."""
    # Get lengths of input genomes
    orglengths = pyani_files.get_sequence_lengths(paths_concordance_fna)

    # Perform ANIblastall on the input directory contents
    fragfiles, fraglengths = anib.fragment_fasta_files(
        paths_concordance_fna, tmp_path, fragment_length
    )
    jobgraph = anib.make_job_graph(
        paths_concordance_fna,
        fragfiles,
        anib.make_blastcmd_builder("ANIblastall", tmp_path),
    )
    assert 0 == run_mp.run_dependency_graph(jobgraph)  # Jobs must run correctly

    # Process BLAST output
    result_pid = anib.process_blast(
        tmp_path, orglengths, fraglengths, mode="ANIblastall"
    ).percentage_identity

    # Compare JSpecies output to results
    result_pid = (result_pid.sort_index(axis=0).sort_index(axis=1) * 100.0).values
    tgt_pid = parse_jspecies(path_concordance_jspecies)["ANIb"].values
    assert result_pid - tgt_pid == pytest.approx(0, abs=tolerance_anib_hi)
github widdowquinn / pyani / tests / test_multiprocessing.py View on Github external
def test_dependency_graph_run(self):
        """Test that module runs dependency graph."""
        fragresult = anib.fragment_fasta_files(self.infiles, self.outdir, self.fraglen)
        blastcmds = anib.make_blastcmd_builder("ANIb", self.outdir)
        jobgraph = anib.make_job_graph(self.infiles, fragresult[0], blastcmds)
        result = run_multiprocessing.run_dependency_graph(jobgraph)
        self.assertEqual(0, result)
github widdowquinn / pyani / tests / test_anib.py View on Github external
def test_blastn_dbjobdict(path_fna_all, tmp_path):
    """Generate dictionary of BLASTN+ database jobs."""
    blastcmds = anib.make_blastcmd_builder("ANIb", tmp_path)
    jobdict = anib.build_db_jobs(path_fna_all, blastcmds)
    expected = [
        (
            tmp_path / _.name,
            f"makeblastdb -dbtype nucl -in {_} -title {_.stem} -out {tmp_path / _.name}",
        )
        for _ in path_fna_all
    ]
    assert sorted([(k, v.script) for (k, v) in jobdict.items()]) == sorted(expected)
github widdowquinn / pyani / tests / test_anib.py View on Github external
def test_blastall_graph(path_fna_all, tmp_path, fragment_length):
    """Create jobgraph for legacy BLASTN jobs."""
    fragresult = anib.fragment_fasta_files(path_fna_all, tmp_path, fragment_length)
    blastcmds = anib.make_blastcmd_builder("ANIblastall", tmp_path)
    jobgraph = anib.make_job_graph(path_fna_all, fragresult[0], blastcmds)
    # We check that the main script job is a blastn job, and that there
    # is a single dependency, which is a makeblastdb job
    for job in jobgraph:
        assert job.script.startswith("blastall -p blastn")
        assert len(job.dependencies) == 1
        assert job.dependencies[0].script.startswith("formatdb")
github widdowquinn / pyani / tests / test_anib.py View on Github external
def test_blastall_dbjobdict(path_fna_all, tmp_path):
    """Generate dictionary of legacy BLASTN database jobs."""
    blastcmds = anib.make_blastcmd_builder("ANIblastall", tmp_path)
    jobdict = anib.build_db_jobs(path_fna_all, blastcmds)
    expected = [
        (tmp_path / _.name, f"formatdb -p F -i {tmp_path / _.name} -t {_.stem}")
        for _ in path_fna_all
    ]
    assert sorted([(k, v.script) for (k, v) in jobdict.items()]) == sorted(expected)
github widdowquinn / pyani / tests / test_concordance.py View on Github external
):
    """Check ANIb results are concordant with JSpecies.

    We expect ANIb results to be quite different, as the BLASTN
    algorithm changed substantially between BLAST and BLAST+ (the
    megaBLAST algorithm is now the default for BLASTN)
    """
    # Get lengths of input genomes
    orglengths = pyani_files.get_sequence_lengths(paths_concordance_fna)

    # Build and run BLAST jobs
    fragfiles, fraglengths = anib.fragment_fasta_files(
        paths_concordance_fna, tmp_path, fragment_length
    )
    jobgraph = anib.make_job_graph(
        paths_concordance_fna, fragfiles, anib.make_blastcmd_builder("ANIb", tmp_path)
    )
    assert 0 == run_mp.run_dependency_graph(jobgraph)  # Jobs must run correctly

    # Process BLAST output
    result_pid = anib.process_blast(
        tmp_path, orglengths, fraglengths, mode="ANIb"
    ).percentage_identity

    # Compare JSpecies output to results. We do this in two blocks,
    # masked according to whether the expected result is greater than
    # a threshold separating "low" from "high" identity comparisons.
    result_pid = result_pid.sort_index(axis=0).sort_index(axis=1) * 100.0
    lo_result = result_pid.mask(result_pid >= threshold_anib_lo_hi).fillna(0).values
    hi_result = result_pid.mask(result_pid < threshold_anib_lo_hi).fillna(0).values

    tgt_pid = parse_jspecies(path_concordance_jspecies)["ANIb"]
github widdowquinn / pyani / tests / test_anib.py View on Github external
def test_blastn_graph(path_fna_all, tmp_path, fragment_length):
    """Create jobgraph for BLASTN+ jobs."""
    fragresult = anib.fragment_fasta_files(path_fna_all, tmp_path, fragment_length)
    blastcmds = anib.make_blastcmd_builder("ANIb", tmp_path)
    jobgraph = anib.make_job_graph(path_fna_all, fragresult[0], blastcmds)
    # We check that the main script job is a blastn job, and that there
    # is a single dependency, which is a makeblastdb job
    for job in jobgraph:
        assert job.script.startswith("blastn")
        assert len(job.dependencies) == 1
        assert job.dependencies[0].script.startswith("makeblastdb")
github widdowquinn / pyani / pyani / scripts / average_nucleotide_identity.py View on Github external
:param blastdir:  path of directory to fragment BLASTN databases

    Runs BLAST database creation and comparisons, returning the cumulative
    return values of the BLAST tool subprocesses, and the fragment sizes for
    each input file
    """
    if not args.skip_blastn:
        logger.info("Fragmenting input files, and writing to %s", args.outdirname)
        fragfiles, fraglengths = make_sequence_fragments(
            args, logger, infiles, blastdir
        )

        # Run BLAST database-building and executables from a jobgraph
        logger.info("Creating job dependency graph")
        jobgraph = anib.make_job_graph(
            infiles, fragfiles, anib.make_blastcmd_builder(args.method, blastdir)
        )
        if args.scheduler == "multiprocessing":
            logger.info("Running dependency graph with multiprocessing")
            cumval = run_mp.run_dependency_graph(jobgraph, logger=logger)
            if cumval > 0:
                logger.warning(
                    f"At least one BLAST run failed. {args.method} may fail. Please investigate."
                )
            else:
                logger.info("All multiprocessing jobs complete.")
        elif args.scheduler == "SGE":
            logger.info("Running dependency graph with SGE")
            run_sge.run_dependency_graph(jobgraph, logger=logger)
        else:
            logger.error(f"Scheduler {args.scheduler} not recognised (exiting)")
            raise SystemError(1)