Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def split(self, options):
"""Split command"""
check_file_exists(options.scaffold_stats_file)
check_file_exists(options.genome_file)
make_sure_path_exists(options.output_dir)
self.logger.info('Reading scaffold statistics.')
scaffold_stats = ScaffoldStats()
scaffold_stats.read(options.scaffold_stats_file)
cluster = Cluster(1)
cluster.split(scaffold_stats,
options.criteria1,
options.criteria2,
options.genome_file,
options.output_dir)
self.logger.info('Partitioned sequences written to: ' + options.output_dir)
def taxon_profile(self, options):
"""Call genes command"""
make_sure_path_exists(options.output_dir)
check_file_exists(options.scaffold_stats_file)
check_file_exists(options.taxonomy_file)
check_file_exists(options.db_file)
gene_files = self._genome_files(options.genome_prot_dir, options.protein_ext)
if not self._check_protein_seqs(gene_files):
self.logger.warning('All files must contain amino acid sequences.')
sys.exit()
# build gene profile
taxon_profile = TaxonProfile(options.cpus, options.output_dir)
taxon_profile.run(gene_files,
options.scaffold_stats_file,
options.db_file,
options.taxonomy_file,
options.per_to_classify,
options.evalue,
def manual(self, options):
"""Manual command"""
check_file_exists(options.cluster_file)
check_file_exists(options.genome_file)
make_sure_path_exists(options.output_dir)
genome_id = remove_extension(options.genome_file)
seqs = seq_io.read(options.genome_file)
fout = {}
with open(options.cluster_file) as f:
f.readline()
for line in f:
line_split = line.rstrip().split('\t')
scaffold_id = line_split[0]
cluster_id = int(line_split[1])
if cluster_id < 0:
def taxon_profile(self, options):
"""Call genes command"""
make_sure_path_exists(options.output_dir)
check_file_exists(options.scaffold_stats_file)
check_file_exists(options.taxonomy_file)
check_file_exists(options.db_file)
gene_files = self._genome_files(options.genome_prot_dir, options.protein_ext)
if not self._check_protein_seqs(gene_files):
self.logger.warning('All files must contain amino acid sequences.')
sys.exit()
# build gene profile
taxon_profile = TaxonProfile(options.cpus, options.output_dir)
taxon_profile.run(gene_files,
options.scaffold_stats_file,
options.db_file,
options.taxonomy_file,
options.per_to_classify,
options.evalue,
options.per_identity,
options.per_aln_len,
def aai(self, options):
"""AAI command"""
check_file_exists(options.sorted_hit_table)
make_sure_path_exists(options.output_dir)
aai_calculator = AAICalculator(options.cpus)
aai_output_file, rbh_output_file = aai_calculator.run(options.query_gene_file,
None,
options.sorted_hit_table,
options.evalue,
options.per_identity,
options.per_aln_len,
options.keep_rbhs,
options.output_dir)
if rbh_output_file:
self.logger.info('Identified reciprocal best hits written to: %s' % rbh_output_file)
self.logger.info('AAI between genomes written to: %s' % aai_output_file)
def align(self, options):
"""Create MSA from marker genes."""
if options.genome_dir:
check_dir_exists(options.genome_dir)
if options.batchfile:
check_file_exists(options.batchfile)
check_dir_exists(options.identify_dir)
make_sure_path_exists(options.out_dir)
marker_set_id = self._marker_set_id(options.bac120_ms,
options.ar122_ms,
options.rps23_ms)
markers = Markers(options.threads)
markers.align(options.genome_dir,
options.batchfile,
options.identify_dir,
marker_set_id,
options.taxa_filter,
options.min_perc_aa,
options.custom_msa_filters,
def identify(self, options):
"""Identify marker genes in genomes."""
try:
if options.genome_dir:
check_dir_exists(options.genome_dir)
if options.batchfile:
check_file_exists(options.batchfile)
make_sure_path_exists(options.out_dir)
markers = Markers(options.cpus)
markers.identify(options.genome_dir,
options.batchfile,
options.proteins,
options.out_dir,
options.prefix)
self.logger.info('Done.')
except Exception as e:
self.logger.info('GTDB-Tk has encountered an error.')
def root(self, options):
"""Root tree using outgroup."""
check_file_exists(options.input_tree)
gtdb_taxonomy = Taxonomy().read(Config.TAXONOMY_FILE)
self.logger.info('Identifying genomes from the specified outgroup.')
outgroup = set()
for genome_id, taxa in gtdb_taxonomy.iteritems():
if options.outgroup_taxon in taxa:
outgroup.add(genome_id)
reroot = RerootTree()
reroot.root_with_outgroup(options.input_tree,
options.output_tree,
outgroup)
self.logger.info('Done.')