Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _read_taxonomy_files(self, options) -> Dict[str, Tuple[str, str, str, str, str, str, str]]:
"""Read and merge taxonomy files."""
self.logger.info('Reading GTDB taxonomy for representative genomes.')
taxonomy = Taxonomy().read(Config.TAXONOMY_FILE)
if options.gtdbtk_classification_file:
# add and overwrite taxonomy for genomes specified in the
# GTDB-Tk classification file
check_file_exists(options.gtdbtk_classification_file)
self.logger.info('Reading GTDB-Tk classification file.')
gtdbtk_taxonomy = Taxonomy().read(options.gtdbtk_classification_file)
del gtdbtk_taxonomy['user_genome']
num_reassigned = 0
for gid, taxa in gtdbtk_taxonomy.items():
if gid in taxonomy:
num_reassigned += 1
taxonomy[gid] = taxa
self.logger.info(f'Read GTDB-Tk classifications for {len(gtdbtk_taxonomy):,} genomes.')
self.logger.info(f'Reassigned taxonomy for {num_reassigned:,} GTDB representative genomes.')
if options.custom_taxonomy_file:
# add and overwrite taxonomy for genomes specified in the
# custom taxonomy file
def decorate(self, options):
"""Decorate tree with GTDB taxonomy.
Parameters
----------
options : argparse.Namespace
The CLI arguments input by the user.
"""
check_file_exists(options.input_tree)
taxonomy = self._read_taxonomy_files(options)
d = Decorate()
d.run(options.input_tree,
taxonomy,
options.output_tree)
self.logger.info('Done.')
# symlink to the decorated tree file, if not run independently
if hasattr(options, 'suffix'):
if options.suffix == 'bac120':
symlink_f(PATH_BAC120_DECORATED_TREE.format(prefix=options.prefix),
os.path.join(options.out_dir,
os.path.basename(PATH_BAC120_DECORATED_TREE.format(prefix=options.prefix))))
def identify(self, options):
"""Identify marker genes in genomes.
Parameters
----------
options : argparse.Namespace
The CLI arguments input by the user.
"""
if options.genome_dir:
check_dir_exists(options.genome_dir)
if options.batchfile:
check_file_exists(options.batchfile)
make_sure_path_exists(options.out_dir)
genomes, tln_tables = self._genomes_to_process(
options.genome_dir, options.batchfile, options.extension)
self.genomes_to_process = genomes
markers = Markers(options.cpus)
markers.identify(genomes,
tln_tables,
options.out_dir,
options.prefix,
options.force)
self.logger.info('Done.')
def infer(self, options):
"""Infer a tree from a user specified MSA.
Parameters
----------
options : argparse.Namespace
The CLI arguments input by the user.
"""
check_file_exists(options.msa_file)
make_sure_path_exists(options.out_dir)
check_dependencies(['FastTree' + ('MP' if options.cpus > 1 else '')])
if hasattr(options, 'suffix'):
output_tree = os.path.join(options.out_dir,
PATH_MARKER_UNROOTED_TREE.format(prefix=options.prefix,
marker=options.suffix))
tree_log = os.path.join(options.out_dir,
PATH_MARKER_TREE_LOG.format(prefix=options.prefix,
marker=options.suffix))
fasttree_log = os.path.join(options.out_dir,
PATH_MARKER_FASTTREE_LOG.format(prefix=options.prefix,
marker=options.suffix))
else:
output_tree = os.path.join(options.out_dir,
def infer_ranks(self, options):
"""Establish taxonomic ranks of internal nodes using RED."""
check_file_exists(options.input_tree)
p = InferRanks()
p.run(options.input_tree,
options.ingroup_taxon,
options.output_tree)
self.logger.info('Done.')
self.logger.info('Reading GTDB-Tk classification file.')
gtdbtk_taxonomy = Taxonomy().read(options.gtdbtk_classification_file)
del gtdbtk_taxonomy['user_genome']
num_reassigned = 0
for gid, taxa in gtdbtk_taxonomy.items():
if gid in taxonomy:
num_reassigned += 1
taxonomy[gid] = taxa
self.logger.info(f'Read GTDB-Tk classifications for {len(gtdbtk_taxonomy):,} genomes.')
self.logger.info(f'Reassigned taxonomy for {num_reassigned:,} GTDB representative genomes.')
if options.custom_taxonomy_file:
# add and overwrite taxonomy for genomes specified in the
# custom taxonomy file
check_file_exists(options.custom_taxonomy_file)
self.logger.info('Reading custom taxonomy file.')
custom_taxonomy = Taxonomy().read(options.custom_taxonomy_file)
num_reassigned = 0
for gid, taxa in custom_taxonomy.items():
if gid in taxonomy:
num_reassigned += 1
taxonomy[gid] = taxa
self.logger.info(f'Read custom taxonomy for {len(custom_taxonomy):,} genomes.')
self.logger.info(f'Reassigned taxonomy for {num_reassigned:,} GTDB representative genomes.')
if options.gtdbtk_classification_file and options.custom_taxonomy_file:
dup_genomes = set(gtdbtk_taxonomy).intersection(custom_taxonomy)
if len(dup_genomes) > 0:
self.logger.error('GTDB-Tk classification and custom taxonomy '