Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Add dependency citations
logger.info(termcolor("DEPENDENCIES", bold=True))
dep_citations = [
"The authors of pyani gratefully acknowledge its dependence on",
"the following bioinformatics software:",
f"\t{termcolor('MUMmer3', 'cyan')}: S. Kurtz, A. Phillippy, A.L. Delcher, M. Smoot, M. Shumway,",
"\tC. Antonescu, and S.L. Salzberg (2004), 'Versatile and open software",
"\tfor comparing large genomes' Genome Biology 5:R12",
f"\t{termcolor('BLAST+', 'cyan')}: Camacho C., Coulouris G., Avagyan V., Ma N., Papadopoulos J.,",
"\tBealer K., & Madden T.L. (2008) 'BLAST+: architecture and applications.'",
"\tBMC Bioinformatics 10:421.",
f"\t{termcolor('BLAST', 'cyan')}: Altschul, S.F., Madden, T.L., Schäffer, A.A., Zhang, J.,",
"\tZhang, Z., Miller, W. & Lipman, D.J. (1997) 'Gapped BLAST and PSI-BLAST:",
"\ta new generation of protein database search programs.' Nucleic Acids Res.",
"\t25:3389-3402",
f"\t{termcolor('Biopython', 'cyan')}: Cock PA, Antao T, Chang JT, Chapman BA, Cox CJ, Dalke A,",
"\tFriedberg I, Hamelryck T, Kauff F, Wilczynski B and de Hoon MJL",
"\t(2009) Biopython: freely available Python tools for computational",
"\tmolecular biology and bioinformatics. Bioinformatics, 25, 1422-1423",
]
for line in dep_citations:
logger.info(line)
# skipped downloads (and define a helper tuple for collating skipped
# genome information)
classes = []
labels = []
skippedlist = []
Skipped = namedtuple("Skipped", "taxon_id accession organism strain url dltype")
# Download contigs and hashes for each assembly UID in the list
# On completion of this loop, each assembly in the list will either be
# downloaded or skipped (with skipped genome information preserved in
# skippedlist), and class/label info will be collated, ready for writing
# to file.
# Summary information is reported to the logger for each eSummary that
# can be recovered
for tid, uids in asm_dict.items():
logger.info(termcolor("Downloading contigs for Taxon ID %s", "blue"), uids)
for uid in uids:
# Obtain eSummary
logger.info(
termcolor("Retrieving eSummary information for UID %s", "cyan"), uid
)
esummary, filestem = download.get_ncbi_esummary(uid, args.retries, api_key)
logger.debug("NCBI eSummary:\n%s\n%s", esummary, filestem)
uid_class = download.get_ncbi_classification(esummary)
# Report summary
outstr = "\n\t".join(
[
f"Species Taxid: {esummary['SpeciesTaxid']}",
f"TaxID: {esummary['Taxid']}",
f"Accession: {esummary['AssemblyAccession']}",
f"Name: {esummary['AssemblyName']}",
logger.info(termcolor("CITATION INFO", bold=True))
for line in CITATION_INFO:
logger.info(line)
# Add dependency citations
logger.info(termcolor("DEPENDENCIES", bold=True))
dep_citations = [
"The authors of pyani gratefully acknowledge its dependence on",
"the following bioinformatics software:",
f"\t{termcolor('MUMmer3', 'cyan')}: S. Kurtz, A. Phillippy, A.L. Delcher, M. Smoot, M. Shumway,",
"\tC. Antonescu, and S.L. Salzberg (2004), 'Versatile and open software",
"\tfor comparing large genomes' Genome Biology 5:R12",
f"\t{termcolor('BLAST+', 'cyan')}: Camacho C., Coulouris G., Avagyan V., Ma N., Papadopoulos J.,",
"\tBealer K., & Madden T.L. (2008) 'BLAST+: architecture and applications.'",
"\tBMC Bioinformatics 10:421.",
f"\t{termcolor('BLAST', 'cyan')}: Altschul, S.F., Madden, T.L., Schäffer, A.A., Zhang, J.,",
"\tZhang, Z., Miller, W. & Lipman, D.J. (1997) 'Gapped BLAST and PSI-BLAST:",
"\ta new generation of protein database search programs.' Nucleic Acids Res.",
"\t25:3389-3402",
f"\t{termcolor('Biopython', 'cyan')}: Cock PA, Antao T, Chang JT, Chapman BA, Cox CJ, Dalke A,",
"\tFriedberg I, Hamelryck T, Kauff F, Wilczynski B and de Hoon MJL",
"\t(2009) Biopython: freely available Python tools for computational",
"\tmolecular biology and bioinformatics. Bioinformatics, 25, 1422-1423",
]
for line in dep_citations:
logger.info(line)
def add_log_headers():
"""Add headers to log output."""
logger = logging.getLogger(__name__)
# Add citation information to log
logger.info(termcolor("CITATION INFO", bold=True))
for line in CITATION_INFO:
logger.info(line)
# Add dependency citations
logger.info(termcolor("DEPENDENCIES", bold=True))
dep_citations = [
"The authors of pyani gratefully acknowledge its dependence on",
"the following bioinformatics software:",
f"\t{termcolor('MUMmer3', 'cyan')}: S. Kurtz, A. Phillippy, A.L. Delcher, M. Smoot, M. Shumway,",
"\tC. Antonescu, and S.L. Salzberg (2004), 'Versatile and open software",
"\tfor comparing large genomes' Genome Biology 5:R12",
f"\t{termcolor('BLAST+', 'cyan')}: Camacho C., Coulouris G., Avagyan V., Ma N., Papadopoulos J.,",
"\tBealer K., & Madden T.L. (2008) 'BLAST+: architecture and applications.'",
"\tBMC Bioinformatics 10:421.",
f"\t{termcolor('BLAST', 'cyan')}: Altschul, S.F., Madden, T.L., Schäffer, A.A., Zhang, J.,",
"\tZhang, Z., Miller, W. & Lipman, D.J. (1997) 'Gapped BLAST and PSI-BLAST:",
"\ta new generation of protein database search programs.' Nucleic Acids Res.",
"\t25:3389-3402",
f"\t{termcolor('Biopython', 'cyan')}: Cock PA, Antao T, Chang JT, Chapman BA, Cox CJ, Dalke A,",
"\tFriedberg I, Hamelryck T, Kauff F, Wilczynski B and de Hoon MJL",
"\t(2009) Biopython: freely available Python tools for computational",
"\tmolecular biology and bioinformatics. Bioinformatics, 25, 1422-1423",
]
for line in dep_citations:
logger.info(line)
import logging
import sys
import time
from typing import List, Optional
from pyani.pyani_tools import termcolor
from .logger import config_logger
from .parsers import parse_cmdline
from .. import __version__
CITATION_INFO = [
termcolor(
"If you use pyani in your work, please cite the following publication:",
"green",
),
termcolor(
"\tPritchard, L., Glover, R. H., Humphris, S., Elphinstone, J. G.,", "yellow",
),
termcolor(
"\t& Toth, I.K. (2016) 'Genomics and taxonomy in diagnostics for", "yellow"
),
termcolor(
"\tfood security: soft-rotting enterobacterial plant pathogens.'", "yellow"
),
termcolor(
"\tAnalytical Methods, 8(1), 12–24. http://doi.org/10.1039/C5AY02550H",
"yellow",
),
The intended outcomes are:
outdir doesn't exist: create outdir
outdir exists: raise exception
outdir exists, --force only: remove the directory tree
outdir exists, --force --noclobber: continue with existing directory tree
So long as the outdir is created with this function, we need only check
for args.noclobber elsewhere to see how to proceed when a file exists.
"""
# Create logger
logger = logging.getLogger(__name__)
logger.info("Creating output directory %s", outdir)
if force:
logger.warning(termcolor("Output directory overwrite forced", "red"))
if outdir.is_dir() and noclobber is False:
logger.warning(termcolor("Clobbering existing directory %s", "red"), outdir)
shutil.rmtree(outdir)
outdir.mkdir(parents=True, exist_ok=force)
def add_log_headers():
"""Add headers to log output."""
logger = logging.getLogger(__name__)
# Add citation information to log
logger.info(termcolor("CITATION INFO", bold=True))
for line in CITATION_INFO:
logger.info(line)
# Add dependency citations
logger.info(termcolor("DEPENDENCIES", bold=True))
dep_citations = [
"The authors of pyani gratefully acknowledge its dependence on",
"the following bioinformatics software:",
f"\t{termcolor('MUMmer3', 'cyan')}: S. Kurtz, A. Phillippy, A.L. Delcher, M. Smoot, M. Shumway,",
"\tC. Antonescu, and S.L. Salzberg (2004), 'Versatile and open software",
"\tfor comparing large genomes' Genome Biology 5:R12",
f"\t{termcolor('BLAST+', 'cyan')}: Camacho C., Coulouris G., Avagyan V., Ma N., Papadopoulos J.,",
"\tBealer K., & Madden T.L. (2008) 'BLAST+: architecture and applications.'",
"\tBMC Bioinformatics 10:421.",
f"\t{termcolor('BLAST', 'cyan')}: Altschul, S.F., Madden, T.L., Schäffer, A.A., Zhang, J.,",
"\tZhang, Z., Miller, W. & Lipman, D.J. (1997) 'Gapped BLAST and PSI-BLAST:",
"\ta new generation of protein database search programs.' Nucleic Acids Res.",
"\t25:3389-3402",
f"\t{termcolor('Biopython', 'cyan')}: Cock PA, Antao T, Chang JT, Chapman BA, Cox CJ, Dalke A,",
"\tFriedberg I, Hamelryck T, Kauff F, Wilczynski B and de Hoon MJL",
"\t(2009) Biopython: freely available Python tools for computational",
def subcmd_classify(args: Namespace) -> int:
"""Generate classifications for an analysis.
:param args: Namespace, command-line arguments
"""
logger = logging.getLogger(__name__)
# Tell the user what's going on
logger.info(
termcolor("Generating classification for ANI run: %s", "red"), args.run_id
)
logger.info("\tWriting output to: %s", args.outdir)
logger.info(termcolor("\tCoverage threshold: %s", "cyan"), args.cov_min)
logger.info(
termcolor("\tInitial minimum identity threshold: %s", "cyan"), args.id_min
)
# Get results data for the specified run
logger.info("Acquiring results for run: %s", args.run_id)
logger.debug("Connecting to database: %s", args.dbpath)
session = pyani_orm.get_session(args.dbpath)
logger.debug("Retrieving results matrices")
results = (
session.query(pyani_orm.Run).filter(pyani_orm.Run.run_id == args.run_id).first()
)
result_label_dict = pyani_orm.get_matrix_labels_for_run(session, args.run_id)
# Generate initial graph on basis of results
logger.info("Constructing graph from results.")
initgraph = pyani_classify.build_graph_from_results(
results, result_label_dict, args.cov_min, args.id_min
We attempt to gracefully skip genomes with download errors.
"""
# Create logger
logger = logging.getLogger(__name__)
if dltype == "GenBank":
filestem = re.sub("^GCF_", "GCA_", dlfiledata.filestem)
else:
filestem = dlfiledata.filestem
dlstatus = retrieve_genome_and_hash(
filestem, dlfiledata.suffix, dlfiledata.ftpstem, outdir, timeout, disable_tqdm,
)
# Pylint is confused by the content of dlstatus (a namedlist)
if dlstatus.error is not None: # pylint: disable=no-member
logger.warning(termcolor("%s download failed: skipping!", "magenta"), dltype)
logger.debug(
"Exception raised:\n%s", dlstatus.error
) # pylint: disable=no-member
dlstatus.skipped = True
return dlstatus # pylint: disable=no-member
logger = logging.getLogger(__name__)
# Add citation information to log
logger.info(termcolor("CITATION INFO", bold=True))
for line in CITATION_INFO:
logger.info(line)
# Add dependency citations
logger.info(termcolor("DEPENDENCIES", bold=True))
dep_citations = [
"The authors of pyani gratefully acknowledge its dependence on",
"the following bioinformatics software:",
f"\t{termcolor('MUMmer3', 'cyan')}: S. Kurtz, A. Phillippy, A.L. Delcher, M. Smoot, M. Shumway,",
"\tC. Antonescu, and S.L. Salzberg (2004), 'Versatile and open software",
"\tfor comparing large genomes' Genome Biology 5:R12",
f"\t{termcolor('BLAST+', 'cyan')}: Camacho C., Coulouris G., Avagyan V., Ma N., Papadopoulos J.,",
"\tBealer K., & Madden T.L. (2008) 'BLAST+: architecture and applications.'",
"\tBMC Bioinformatics 10:421.",
f"\t{termcolor('BLAST', 'cyan')}: Altschul, S.F., Madden, T.L., Schäffer, A.A., Zhang, J.,",
"\tZhang, Z., Miller, W. & Lipman, D.J. (1997) 'Gapped BLAST and PSI-BLAST:",
"\ta new generation of protein database search programs.' Nucleic Acids Res.",
"\t25:3389-3402",
f"\t{termcolor('Biopython', 'cyan')}: Cock PA, Antao T, Chang JT, Chapman BA, Cox CJ, Dalke A,",
"\tFriedberg I, Hamelryck T, Kauff F, Wilczynski B and de Hoon MJL",
"\t(2009) Biopython: freely available Python tools for computational",
"\tmolecular biology and bioinformatics. Bioinformatics, 25, 1422-1423",
]
for line in dep_citations:
logger.info(line)