Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def bin_compare(self, options):
"""Bin compare command"""
check_dir_exists(options.genome_nt_dir1)
check_dir_exists(options.genome_nt_dir2)
genomes_files1 = self._genome_files(options.genome_nt_dir1, options.genome_ext1)
if not self._check_nuclotide_seqs(genomes_files1):
self.logger.warning('All files must contain nucleotide sequences.')
sys.exit()
genomes_files2 = self._genome_files(options.genome_nt_dir2, options.genome_ext2)
if not self._check_nuclotide_seqs(genomes_files2):
self.logger.warning('All files must contain nucleotide sequences.')
sys.exit()
bin_comparer = BinComparer()
bin_comparer.run(genomes_files1, genomes_files2, options.scaffold_file, options.output_file)
self.logger.info('Detailed bin comparison written to: ' + options.output_file)
def ssu_erroneous(self, options):
"""Erroneous SSU command"""
check_dependencies(('nhmmer', 'blastn'))
check_dir_exists(options.genome_nt_dir)
check_dir_exists(options.taxon_profile_dir)
make_sure_path_exists(options.output_dir)
genome_files = self._genome_files(options.genome_nt_dir, options.genome_ext)
if not self._check_nuclotide_seqs(genome_files):
self.logger.warning('All files must contain nucleotide sequences.')
sys.exit()
# identify scaffolds with 16S sequences
ssu = SSU(options.cpus)
ssu_hits = ssu.identify(genome_files, options.evalue, options.concatenate, options.output_dir)
ssu_seq_files = ssu.extract(genome_files, ssu_hits, options.output_dir)
ssu_classifications = ssu.classify(ssu_seq_files, options.ssu_db, options.ssu_taxonomy_file, options.evalue, options.output_dir)
# report statistics for SSU scaffolds
def unbinned(self, options):
"""Unbinned Command"""
check_dir_exists(options.genome_nt_dir)
genomes_files = self._genome_files(options.genome_nt_dir, options.genome_ext)
if not self._check_nuclotide_seqs(genomes_files):
self.logger.warning('All files must contain nucleotide sequences.')
sys.exit()
unbinned = Unbinned()
unbinned_seqs = unbinned.run(genomes_files, options.scaffold_file, options.min_seq_len)
seq_io.write_fasta(unbinned_seqs, options.output_file)
self.logger.info('Unbinned scaffolds written to: ' + options.output_file)
def ssu_erroneous(self, options):
"""Erroneous SSU command"""
check_dependencies(('nhmmer', 'blastn'))
check_dir_exists(options.genome_nt_dir)
check_dir_exists(options.taxon_profile_dir)
make_sure_path_exists(options.output_dir)
genome_files = self._genome_files(options.genome_nt_dir, options.genome_ext)
if not self._check_nuclotide_seqs(genome_files):
self.logger.warning('All files must contain nucleotide sequences.')
sys.exit()
# identify scaffolds with 16S sequences
ssu = SSU(options.cpus)
ssu_hits = ssu.identify(genome_files, options.evalue, options.concatenate, options.output_dir)
ssu_seq_files = ssu.extract(genome_files, ssu_hits, options.output_dir)
ssu_classifications = ssu.classify(ssu_seq_files, options.ssu_db, options.ssu_taxonomy_file, options.evalue, options.output_dir)
# report statistics for SSU scaffolds
self.logger.info('Identifying scaffolds with 16S rRNA genes with divergent taxonomic classification.')
def call_genes(self, options):
"""Call genes command"""
check_dir_exists(options.genome_nt_dir)
make_sure_path_exists(options.output_dir)
genome_files = self._genome_files(options.genome_nt_dir, options.genome_ext)
if not self._check_nuclotide_seqs(genome_files):
self.logger.warning('All files must contain nucleotide sequences.')
sys.exit()
# call genes in genomes
prodigal = Prodigal(options.cpus)
prodigal.run(genome_files, options.output_dir)
self.logger.info('Genes in genomes written to: %s' % options.output_dir)
# call genes in unbinned scaffolds
if options.unbinned_file:
unbinned_output_dir = os.path.join(options.output_dir, 'unbinned')
prodigal.run([options.unbinned_file], unbinned_output_dir, meta=True)
"""Identify genomes files.
Parameters
----------
genome_dir : str
Directory containing genomes of interest.
genome_ext : str
Extension of genome files.
Returns
-------
list
Path to genome files.
"""
check_dir_exists(genome_dir)
genome_files = []
for f in os.listdir(genome_dir):
if f.endswith(genome_ext):
genome_files.append(os.path.join(genome_dir, f))
if not genome_files:
self.logger.warning('No genomes found. Check the --genome_ext or --protein_ext flag used to identify genomes.')
sys.exit()
return genome_files
def align(self, options):
"""Create MSA from marker genes."""
if options.genome_dir:
check_dir_exists(options.genome_dir)
if options.batchfile:
check_file_exists(options.batchfile)
check_dir_exists(options.identify_dir)
make_sure_path_exists(options.out_dir)
marker_set_id = self._marker_set_id(options.bac120_ms,
options.ar122_ms,
options.rps23_ms)
markers = Markers(options.threads)
markers.align(options.genome_dir,
options.batchfile,
options.identify_dir,
marker_set_id,
options.taxa_filter,
options.min_perc_aa,
options.custom_msa_filters,
options.consensus,
options.min_perc_taxa,
def align(self, options):
"""Create MSA from marker genes."""
if options.genome_dir:
check_dir_exists(options.genome_dir)
if options.batchfile:
check_file_exists(options.batchfile)
check_dir_exists(options.identify_dir)
make_sure_path_exists(options.out_dir)
marker_set_id = self._marker_set_id(options.bac120_ms,
options.ar122_ms,
options.rps23_ms)
markers = Markers(options.threads)
markers.align(options.genome_dir,
options.batchfile,
options.identify_dir,
marker_set_id,