Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Flag indicating if RBH should be written to file.
output_dir : str
Directory to store AAI results.
"""
# read taxonomic identification of each genome
taxonomy = {}
if taxonomy_file:
for line in open(taxonomy_file):
genome_id, taxa_str = line.rstrip().split('\t')
taxonomy[genome_id] = taxa_str
# calculate AAI between query and target genomes
aai_output_dir = os.path.join(output_dir, 'aai')
make_sure_path_exists(aai_output_dir)
aai_calculator = AAICalculator(self.cpus)
aai_output_file, rbh_output_file = aai_calculator.run(query_gene_file,
target_gene_file,
sorted_hit_table,
evalue_threshold,
per_iden_threshold,
per_aln_len_threshold,
keep_rbhs,
aai_output_dir)
# determine matches to each query genomes
aai_results_file = os.path.join(aai_output_dir, 'aai_summary.tsv')
with open(aai_results_file) as f:
f.readline()
hits = defaultdict(list)
for line in f:
def hclust(self, options):
"""Hierarchical clustering command"""
if options.similarity and not options.max_sim_value:
self.logger.error("The 'max_sim_value' must be specified for similarity values.")
sys.exit(-1)
self.logger.info('Performing hierarchical clustering.')
hclust = HierarchicalCluster()
hclust.run(options.pairwise_value_file,
options.method,
options.similarity,
options.max_sim_value,
options.name_col1,
options.name_col2,
options.value_col,
options.output_tree)
self.logger.info('Hierarchical cluster tree written to: %s' % options.output_tree)
def kmer_usage(self, options):
"""Kmer usage command"""
if options.k > 10 or options.k <= 0:
self.logger.warning('CompareM only support kmers with k <= 10.')
sys.exit(0)
genome_files = self._input_files(options.genome_files, options.file_ext)
# calculate amino acid usage
kmer_usage = KmerUsage(options.k, options.cpus)
genome_kmer_usage, kmer_set = kmer_usage.run(genome_files)
# write out results
self.logger.info('Writing kmer profiles to file (be patient!).')
self._write_usage_profile(genome_kmer_usage,
kmer_set,
options.counts,
options.output_file)
self.logger.info('Kmer usage written to: %s' % options.output_file)
def codon_usage(self, options):
"""Codon usage command"""
gene_files = self._input_files(options.nucleotide_gene_files, options.file_ext)
# calculate amino acid usage
codon_usage = CodonUsage(options.cpus, options.keep_ambiguous)
genome_codon_usage, codon_set, _mean_length = codon_usage.run(gene_files)
# write out results
self._write_usage_profile(genome_codon_usage,
codon_set,
options.counts,
options.output_file)
self.logger.info('Codon usage written to: %s' % options.output_file)
def stop_usage(self, options):
"""Stop codon usage command"""
gene_files = self._input_files(options.nucleotide_gene_files, options.file_ext)
# calculate amino acid usage
codon_usage = CodonUsage(options.cpus, keep_ambiguous=False, stop_codon_only=True)
genome_codon_usage, codon_set, mean_gene_length = codon_usage.run(gene_files)
# write out results
if not options.mean_gene_length:
self._write_usage_profile(genome_codon_usage,
codon_set,
options.counts,
options.output_file)
else:
fout = open(options.output_file, 'w')
for codon in codon_set:
fout.write('\t' + codon)
if mean_gene_length:
fout.write('\t' + codon + ': avg. seq. length')
fout.write('\n')
def aai(self, options):
"""AAI command"""
check_file_exists(options.sorted_hit_table)
make_sure_path_exists(options.output_dir)
aai_calculator = AAICalculator(options.cpus)
aai_output_file, rbh_output_file = aai_calculator.run(options.query_gene_file,
None,
options.sorted_hit_table,
options.evalue,
options.per_identity,
options.per_aln_len,
options.keep_rbhs,
options.output_dir)
if rbh_output_file:
self.logger.info('Identified reciprocal best hits written to: %s' % rbh_output_file)
self.logger.info('AAI between genomes written to: %s' % aai_output_file)
def aa_usage(self, options):
"""Amino acid usage command"""
gene_files = self._input_files(options.protein_gene_files, options.file_ext)
# calculate amino acid usage
amino_acid_usage = AminoAcidUsage(options.cpus)
genome_aa_usage, aa_set = amino_acid_usage.run(gene_files)
# write out results
self._write_usage_profile(genome_aa_usage,
aa_set,
options.counts,
options.output_file)
self.logger.info('Amino acid usage written to: %s' % options.output_file)
def classify(self, options):
"""Classify genomes based on AAI values."""
check_file_exists(options.sorted_hit_table)
make_sure_path_exists(options.output_dir)
classify = Classify(options.cpus)
results_file = classify.run(options.query_gene_file,
options.target_gene_file,
options.sorted_hit_table,
options.evalue,
options.per_identity,
options.per_aln_len,
options.num_top_targets,
options.taxonomy_file,
options.keep_rbhs,
options.output_dir)
self.logger.info('Classification results written to: %s' % results_file)
def lgt_codon(self, options):
"""LGT dinucleotide usage command"""
gene_files = self._input_files(options.nucleotide_gene_files, options.file_ext)
lgt_codon = LgtCodon(options.cpus)
lgt_codon.run(gene_files, options.output_dir)
self.logger.info('Codon usage written to directory: %s' % options.output_dir)
def lgt_di(self, options):
"""LGT dinucleotide usage command"""
gene_files = self._input_files(options.nucleotide_gene_files, options.file_ext)
lgt_dinucleotide = LgtDinucleotide(options.cpus)
lgt_dinucleotide.run(gene_files, options.crit_value, options.output_dir)
self.logger.info('Dinucleotide usage written to directory: %s' % options.output_dir)