How to use the biolib.parallel.Parallel function in biolib

To help you get started, we’ve selected a few biolib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dparks1134 / CompareM / comparem / dinucleotide_usage.py View on Github external
gene_files : list
            Fasta files containing called genes in nucleotide space.

        Returns
        -------
        dict of dict : d[genome_id][codon] -> count
           Codon usage of each genome.
        set
           Set with all identified codons.
        dict of dict : d[genome_id][codon] -> length
            Mean length of genes for each stop codon.
        """

        self.logger.info('  Calculating codon usage for each genome.')

        parallel = Parallel(self.cpus)
        parallel.run(self._producer, None, gene_files[0:1], self._progress)
github dparks1134 / CompareM / comparem / lgt_codon.py View on Github external
Fasta files containing called genes in nucleotide space.
        output_dir : str
            Directory to store results.
        """

        self.output_dir = output_dir
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)

        self.logger.info('Calculating codon usage for each genome.')
        
        progress_func = self._progress
        if self.logger.is_silent:
            progress_func = None

        parallel = Parallel(self.cpus)
        parallel.run(self._producer, None, gene_files, progress_func)
github dparks1134 / CompareM / comparem / aai_calculator.py View on Github external
genome_info_pairs = []
        genome_ids = genes_in_genomes.keys()
        for i in xrange(0, len(genome_ids)):
            genome_idI = genome_ids[i]
            genome_infoI = (genome_idI, genes_in_genomes[genome_idI])
            for j in xrange(i + 1, len(genome_ids)):
                genome_idJ = genome_ids[j]
                genome_infoJ = (genome_idJ, genes_in_genomes[genome_idJ])
                genome_info_pairs.append((genome_infoI, genome_infoJ))

        if len(genome_info_pairs) == 0:
            self.logger.warning('No genome pairs identified.')
            return

        parallel = Parallel(self.cpus)
        consumer_data = parallel.run(self._producer, self._consumer, genome_info_pairs, self._progress)

        # write results for each genome pair
        aai_summay_file = os.path.join(output_dir, 'aai_summary.tsv')
        fout = open(aai_summay_file, 'w')
        fout.write('Genome Id A\tGenes in A\tGenome Id B\tGenes in B\t# orthologous genes\tMean AAI\tStd AAI\n')

        for data in consumer_data:
            fout.write('%s\t%d\t%s\t%d\t%d\t%.2f\t%.2f\n' % data)

        fout.close()

        self.logger.info('Summary of AAI between genomes: %s' % aai_summay_file)
github dparks1134 / CompareM / comparem / aai_calculator.py View on Github external
target_genomes = list(target_genomes)
        for i in xrange(0, len(query_genomes)):
            genome_idI = query_genomes[i]
            
            if target_genomes:
                genome_id_list = target_genomes
            else:
                genome_id_list = []
                for j in xrange(i + 1, len(query_genomes)):
                    genome_idJ = query_genomes[j]
                    genome_id_list.append(genome_idJ)

            genome_id_lists.append((genome_idI, genome_id_list))

        self.processed_paired = 0
        parallel = Parallel(self.cpus)
        
        progress_func = self._progress
        if self.logger.is_silent:
            progress_func = None
        consumer_data = parallel.run(self._producer, self._consumer, genome_id_lists, progress_func)

        # write results for each genome pair
        self.logger.info('Summarizing AAI results.')
        aai_summay_file = os.path.join(output_dir, 'aai_summary.tsv')
        fout = open(aai_summay_file, 'w')
        fout.write('#Genome A\tGenes in A\tGenome B\tGenes in B\t# orthologous genes\tMean AAI\tStd AAI\tOrthologous fraction (OF)\n')

        for data in consumer_data:
            fout.write('%s\t%d\t%s\t%d\t%d\t%.2f\t%.2f\t%.2f\n' % data)

        fout.close()
github dparks1134 / CompareM / comparem / reciprocal_blast.py View on Github external
output_dir : str
            Directory to store blast results.
        """

        self.evalue = evalue
        self.output_dir = output_dir

        # set CPUs per producer process
        self.producer_cpus = 1
        if self.cpus > len(aa_gene_files):
            self.producer_cpus = self.cpus / len(aa_gene_files)

        # create the blast databases in serial
        self.logger.info('  Creating blast databases:')

        parallel = Parallel(self.cpus)
        parallel.run(self._producer_db, None, aa_gene_files, self._progress)

        # perform reciprocal blast between all genome pairs
        self.logger.info('')
        self.logger.info('  Identifying hits between all pairs of genomes:')

        genome_pairs = []
        for i in xrange(0, len(aa_gene_files)):
            for j in xrange(i, len(aa_gene_files)):
                genome_pairs.append((aa_gene_files[i], aa_gene_files[j]))

        parallel.run(self._producer_blast, None, genome_pairs, self._progress)
github dparks1134 / CompareM / comparem / kmer_usage.py View on Github external
Returns
        -------
        dict of dict : d[genome_id][kmer] -> count
           Kmer usage of each genome.
        set
           Set with all identified kmers.
        """

        self.logger.info('Calculating kmer usage for each genome.')
        
        progress_func = self._progress
        if self.logger.is_silent:
            progress_func = None

        parallel = Parallel(self.cpus)
        kmer_counts = parallel.run(self._producer, self._consumer, genome_files, progress_func)

        return kmer_counts, self.signatures.canonical_order()
github dparks1134 / RefineM / refinem / tetranucleotide.py View on Github external
"""Calculate tetranucleotide signatures of sequences.

        Parameters
        ----------
        seq_file : str
            Name of fasta/q file to read.

        Returns
        -------
        dict : d[seq_id] -> tetranucleotide signature in canonical order
            Count of each kmer.
        """

        self.logger.info('Calculating tetranucleotide signature for each sequence:')

        parallel = Parallel(self.cpus)
        seq_signatures = parallel.run_seqs_file(self._producer, self._consumer, seq_file, self._progress)

        return seq_signatures
github dparks1134 / CompareM / comparem / codon_usage.py View on Github external
-------
        dict of dict : d[genome_id][codon] -> count
           Codon usage of each genome.
        set
           Set with all identified codons.
        dict of dict : d[genome_id][codon] -> length
            Mean length of genes for each stop codon.
        """

        self.logger.info('Calculating codon usage for each genome.')
        
        progress_func = self._progress
        if self.logger.is_silent:
            progress_func = None

        parallel = Parallel(self.cpus)
        consumer_data = parallel.run(self._producer, self._consumer, gene_files, progress_func)

        return consumer_data.genome_codon_usage, consumer_data.codon_set, consumer_data.mean_gene_length
github dparks1134 / CompareM / comparem / lgt_dinucleotide.py View on Github external
Directory to store results.
        """

        self.output_dir = output_dir
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)

        self.critical_value = critical_value

        self.logger.info('Calculating dinucleotide usage for each genome.')
        
        progress_func = self._progress
        if self.logger.is_silent:
            progress_func = None

        parallel = Parallel(self.cpus)
        parallel.run(self._producer, None, gene_files, progress_func)
github dparks1134 / CompareM / comparem / amino_acid_usage.py View on Github external
Returns
        -------
        dict of dict : dict[genome_id][aa] -> count
           Amino acid usage of each genome.
        set
           Set with all identified amino acids.
        """

        self.logger.info('Calculating amino acid usage for each genome:')
        
        progress_func = self._progress
        if self.logger.is_silent:
            progress_func = None

        parallel = Parallel(self.cpus)
        consumer_data = parallel.run(self._producer, self._consumer, gene_files, progress_func)

        return consumer_data.genome_aa_usage, consumer_data.aa_set