Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
raise GenomeMarkerSetUnknown
shutil.copyfile(user_msa_file, t)
user_msa_file = t
# run pplacer to place bins in reference genome tree
num_genomes = sum([1 for _seq_id, _seq in read_seq(user_msa_file)])
# check if a scratch file is to be created
pplacer_mmap_file = None
if scratch_dir:
self.logger.info('Using a scratch file for pplacer allocations. '
'This decreases memory usage and performance.')
pplacer_mmap_file = os.path.join(
scratch_dir, prefix + ".pplacer.scratch")
make_sure_path_exists(scratch_dir)
# get path to pplacer reference package
if marker_set_id == 'bac120':
if levelopt is None:
self.logger.info(
f'Placing {num_genomes} bacterial genomes into reference tree with pplacer using {self.pplacer_cpus} cpus (be patient).')
pplacer_ref_pkg = os.path.join(
Config.PPLACER_DIR, Config.PPLACER_BAC120_REF_PKG)
elif levelopt == 'high':
self.logger.info(
f'Placing {num_genomes} bacterial genomes into high reference tree with pplacer using {self.pplacer_cpus} cpus (be patient).')
pplacer_ref_pkg = os.path.join(
Config.HIGH_PPLACER_DIR, Config.HIGH_PPLACER_REF_PKG)
elif levelopt == 'low':
self.logger.info(
f'Placing {num_genomes} bacterial genomes into low reference tree {tree_iter} with pplacer using {self.pplacer_cpus} cpus (be patient).')
"""
if marker_set_id == 'bac120':
marker_dict = Config.RED_DIST_BAC_DICT
out_path = os.path.join(
out_dir, PATH_BAC120_RED_DICT.format(prefix=prefix))
elif marker_set_id == 'ar122':
marker_dict = Config.RED_DIST_ARC_DICT
out_path = os.path.join(
out_dir, PATH_AR122_RED_DICT.format(prefix=prefix))
else:
self.logger.error('There was an error determining the marker set.')
raise GenomeMarkerSetUnknown
make_sure_path_exists(os.path.dirname(out_path))
with open(out_path, 'w') as reddictfile:
reddictfile.write('Phylum\t{}\n'.format(marker_dict.get('p__')))
reddictfile.write('Class\t{}\n'.format(marker_dict.get('c__')))
reddictfile.write('Order\t{}\n'.format(marker_dict.get('o__')))
reddictfile.write('Family\t{}\n'.format(marker_dict.get('f__')))
reddictfile.write('Genus\t{}\n'.format(marker_dict.get('g__')))
return marker_dict
timestamp_stream_logger.setFormatter(SpecialFormatter())
timestamp_logger.addHandler(timestamp_stream_logger)
no_timestamp_stream_logger = logging.StreamHandler(sys.stdout)
no_timestamp_stream_logger.setFormatter(None)
no_timestamp_logger.addHandler(no_timestamp_stream_logger)
timestamp_logger.is_silent = False
no_timestamp_stream_logger.is_silent = False
if silent:
timestamp_logger.is_silent = True
timestamp_stream_logger.setLevel(logging.ERROR)
no_timestamp_stream_logger.is_silent = True
if log_dir:
make_sure_path_exists(log_dir)
timestamp_file_logger = logging.FileHandler(os.path.join(log_dir,
log_file), 'a')
timestamp_file_logger.setFormatter(ColourlessFormatter())
timestamp_logger.addHandler(timestamp_file_logger)
no_timestamp_file_logger = logging.FileHandler(os.path.join(log_dir,
log_file), 'a')
no_timestamp_file_logger.setFormatter(None)
no_timestamp_logger.addHandler(no_timestamp_file_logger)
warning_fh = logging.FileHandler(os.path.join(log_dir,
log_file.replace('.log', '.warnings.log')), 'a')
warning_fh.setFormatter(ColourlessFormatter())
warning_logger.addHandler(warning_fh)
timestamp_logger.info('%s v%s' % (program_name, version))
def infer(self, options):
"""Infer a tree from a user specified MSA.
Parameters
----------
options : argparse.Namespace
The CLI arguments input by the user.
"""
check_file_exists(options.msa_file)
make_sure_path_exists(options.out_dir)
check_dependencies(['FastTree' + ('MP' if options.cpus > 1 else '')])
if hasattr(options, 'suffix'):
output_tree = os.path.join(options.out_dir,
PATH_MARKER_UNROOTED_TREE.format(prefix=options.prefix,
marker=options.suffix))
tree_log = os.path.join(options.out_dir,
PATH_MARKER_TREE_LOG.format(prefix=options.prefix,
marker=options.suffix))
fasttree_log = os.path.join(options.out_dir,
PATH_MARKER_FASTTREE_LOG.format(prefix=options.prefix,
marker=options.suffix))
else:
output_tree = os.path.join(options.out_dir,
PATH_UNROOTED_TREE.format(prefix=options.prefix))
def classify(self, options):
"""Determine taxonomic classification of genomes.
Parameters
----------
options : argparse.Namespace
The CLI arguments input by the user.
"""
# See ticket #255... perhaps an upstream version/OS issue?
if not hasattr(options, 'pplacer_cpus'):
options.pplacer_cpus = None
check_dir_exists(options.align_dir)
make_sure_path_exists(options.out_dir)
if options.scratch_dir:
make_sure_path_exists(options.scratch_dir)
genomes, _ = self._genomes_to_process(
options.genome_dir, options.batchfile, options.extension)
classify = Classify(options.cpus, options.pplacer_cpus)
classify.run(genomes,
options.align_dir,
options.out_dir,
options.prefix,
options.scratch_dir,
options.recalculate_red,
options.debug,
options.split_tree)
def _place_in_low_tree(self, tree_iter, listg, msa_dict, marker_set_id, prefix, scratch_dir, out_dir):
make_sure_path_exists(os.path.join(
out_dir, DIR_LOW_PPLACER.format(iter=tree_iter)))
submsa_file_path = os.path.join(
out_dir, PATH_LOW_BAC120_SUBMSA.format(iter=tree_iter))
submsa_file = open(submsa_file_path, 'w')
for gid in listg:
submsa_file.write('>{}\n{}\n'.format(gid, msa_dict.get(gid)))
submsa_file.close()
low_classify_tree = self.place_genomes(PATH_LOW_BAC120_SUBMSA.format(iter=tree_iter),
marker_set_id,
out_dir,
prefix,
scratch_dir,
'low', tree_iter)
return low_classify_tree, submsa_file_path
def identify(self, options):
"""Identify marker genes in genomes.
Parameters
----------
options : argparse.Namespace
The CLI arguments input by the user.
"""
if options.genome_dir:
check_dir_exists(options.genome_dir)
if options.batchfile:
check_file_exists(options.batchfile)
make_sure_path_exists(options.out_dir)
genomes, tln_tables = self._genomes_to_process(
options.genome_dir, options.batchfile, options.extension)
self.genomes_to_process = genomes
markers = Markers(options.cpus)
markers.identify(genomes,
tln_tables,
options.out_dir,
options.prefix,
options.force)
self.logger.info('Done.')
def align(self, options):
"""Create MSA from marker genes.
Parameters
----------
options : argparse.Namespace
The CLI arguments input by the user.
"""
check_dir_exists(options.identify_dir)
make_sure_path_exists(options.out_dir)
if not hasattr(options, 'outgroup_taxon'):
options.outgroup_taxon = None
markers = Markers(options.cpus, options.debug)
markers.align(options.identify_dir,
options.skip_gtdb_refs,
options.taxa_filter,
options.min_perc_aa,
options.custom_msa_filters,
options.skip_trimming,
options.rnd_seed,
options.cols_per_gene,
options.min_consensus,
options.max_consensus,
options.min_perc_taxa,