How to use the gtdbtk.tools.add_ncbi_prefix function in gtdbtk

To help you get started, we’ve selected a few gtdbtk examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Ecogenomics / GTDBTk / tests / test_gtdbtk / test_tools.py View on Github external
def test_add_ncbi_prefix(self):
        refname = 'GCF_123.1'
        self.assertEqual(tools.add_ncbi_prefix(refname), 'RS_GCF_123.1')
        refname = 'GCA_456.1'
        self.assertEqual(tools.add_ncbi_prefix(refname), 'GB_GCA_456.1')
        refname = 'genome_1'
        self.assertEqual(tools.add_ncbi_prefix(refname), refname)
github Ecogenomics / GTDBTk / tests / test_gtdbtk / test_tools.py View on Github external
def test_add_ncbi_prefix(self):
        refname = 'GCF_123.1'
        self.assertEqual(tools.add_ncbi_prefix(refname), 'RS_GCF_123.1')
        refname = 'GCA_456.1'
        self.assertEqual(tools.add_ncbi_prefix(refname), 'GB_GCA_456.1')
        refname = 'genome_1'
        self.assertEqual(tools.add_ncbi_prefix(refname), refname)
github Ecogenomics / GTDBTk / tests / test_gtdbtk / test_tools.py View on Github external
def test_add_ncbi_prefix(self):
        refname = 'GCF_123.1'
        self.assertEqual(tools.add_ncbi_prefix(refname), 'RS_GCF_123.1')
        refname = 'GCA_456.1'
        self.assertEqual(tools.add_ncbi_prefix(refname), 'GB_GCA_456.1')
        refname = 'genome_1'
        self.assertEqual(tools.add_ncbi_prefix(refname), refname)
github Ecogenomics / GTDBTk / gtdbtk / classify.py View on Github external
sorted_dict : sorted dictionary listing reference genomes, ani and alignment fraction for a specific user genome
                    (genomeid, {ani: value, af: value})
        labels : array of label that are removed from the note field

        Returns
        -------
        string
            note field

        """
        note_list = []
        for element in sorted_dict:
            if element[0] not in labels:
                note_str = "{}, {}, {}, {}, {}".format(element[0],
                                                       self.gtdb_taxonomy.get(
                                                           add_ncbi_prefix(element[0]))[6],
                                                       self.species_radius.get(
                                                           element[0]),
                                                       round(
                                                           element[1].get('ani'), 2),
                                                       element[1].get('af'))
                note_list.append(note_str)
        return note_list
github Ecogenomics / GTDBTk / gtdbtk / classify.py View on Github external
summary_list[8] = summary_list[3]
                            summary_list[9] = summary_list[4]
                            summary_list[10] = summary_list[5]
                            summary_list[11] = summary_list[6]
                            summary_list[14] = 'topological placement and ANI have congruent species assignments'
                            if len(sorted_dict) > 0:
                                other_ref = '; '.join(self._formatnote(
                                    sorted_dict, [fastani_matching_reference]))
                                if len(other_ref) == 0:
                                    summary_list[15] = None
                                else:
                                    summary_list[15] = other_ref

                        else:
                            taxa_str = ";".join(self.gtdb_taxonomy.get(
                                add_ncbi_prefix(fastani_matching_reference)))
                            summary_list[1] = self.standardise_taxonomy(
                                taxa_str)
                            summary_list[7] = pplacer_leafnode
                            summary_list[8] = str(
                                self.species_radius.get(pplacer_leafnode))
                            summary_list[9] = ";".join(self.gtdb_taxonomy.get(
                                add_ncbi_prefix(pplacer_leafnode)))
                            if pplacer_leafnode in all_fastani_dict.get(userleaf.taxon.label):
                                summary_list[10] = round(all_fastani_dict.get(
                                    userleaf.taxon.label).get(pplacer_leafnode).get('ani'), 2)
                                summary_list[11] = all_fastani_dict.get(
                                    userleaf.taxon.label).get(pplacer_leafnode).get('af')
                            summary_list[14] = 'topological placement and ANI have incongruent species assignments'
                            summary_list[13] = 'ANI'

                            if len(sorted_dict) > 0:
github Ecogenomics / GTDBTk / gtdbtk / classify.py View on Github external
summary_list[12] = pplacer_taxonomy_dict.get(
                        userleaf.taxon.label)
                    summary_list[13] = 'taxonomic classification defined by topology and ANI'
                    summary_list[16] = self.aa_percent_msa(
                        msa_dict.get(summary_list[0]))
                    summary_list[17] = trans_table_dict.get(summary_list[0])
                    if len(notes) > 0:
                        summary_list[19] = ';'.join(notes)

                    if fastani_matching_reference is not None:
                        summary_list[2] = fastani_matching_reference
                        summary_list[3] = str(
                            self.species_radius.get(fastani_matching_reference))
                        summary_list[4] = ";".join(self.gtdb_taxonomy.get(
                            add_ncbi_prefix(fastani_matching_reference)))
                        summary_list[5] = round(current_ani, 2)
                        summary_list[6] = current_af
                        if pplacer_leafnode == fastani_matching_reference:
                            if taxa_str.endswith("s__"):
                                taxa_str = taxa_str + pplacer_leafnode
                            summary_list[1] = self.standardise_taxonomy(
                                taxa_str)
                            summary_list[7] = summary_list[2]
                            summary_list[8] = summary_list[3]
                            summary_list[9] = summary_list[4]
                            summary_list[10] = summary_list[5]
                            summary_list[11] = summary_list[6]
                            summary_list[14] = 'topological placement and ANI have congruent species assignments'
                            if len(sorted_dict) > 0:
                                other_ref = '; '.join(self._formatnote(
                                    sorted_dict, [fastani_matching_reference]))
github Ecogenomics / GTDBTk / gtdbtk / classify.py View on Github external
sorted_dict, [fastani_matching_reference]))
                                if len(other_ref) == 0:
                                    summary_list[15] = None
                                else:
                                    summary_list[15] = other_ref

                        else:
                            taxa_str = ";".join(self.gtdb_taxonomy.get(
                                add_ncbi_prefix(fastani_matching_reference)))
                            summary_list[1] = self.standardise_taxonomy(
                                taxa_str)
                            summary_list[7] = pplacer_leafnode
                            summary_list[8] = str(
                                self.species_radius.get(pplacer_leafnode))
                            summary_list[9] = ";".join(self.gtdb_taxonomy.get(
                                add_ncbi_prefix(pplacer_leafnode)))
                            if pplacer_leafnode in all_fastani_dict.get(userleaf.taxon.label):
                                summary_list[10] = round(all_fastani_dict.get(
                                    userleaf.taxon.label).get(pplacer_leafnode).get('ani'), 2)
                                summary_list[11] = all_fastani_dict.get(
                                    userleaf.taxon.label).get(pplacer_leafnode).get('af')
                            summary_list[14] = 'topological placement and ANI have incongruent species assignments'
                            summary_list[13] = 'ANI'

                            if len(sorted_dict) > 0:
                                other_ref = '; '.join(self._formatnote(
                                    sorted_dict, [fastani_matching_reference, pplacer_leafnode]))
                                if len(other_ref) == 0:
                                    summary_list[15] = None
                                else:
                                    summary_list[15] = other_ref
github Ecogenomics / GTDBTk / gtdbtk / classify.py View on Github external
summary_list[19] = ';'.join(notes)

                    if sorted_prefilter_af_dict[0][1].get('ani') >= self.species_radius.get(sorted_prefilter_af_dict[0][0]):
                        fastani_matching_reference = sorted_prefilter_af_dict[0][0]
                        exception_genomes.append(fastani_matching_reference)

                        taxa_str = ";".join(self.gtdb_taxonomy.get(
                            add_ncbi_prefix(fastani_matching_reference)))
                        summary_list[1] = self.standardise_taxonomy(
                            taxa_str)

                        summary_list[2] = fastani_matching_reference
                        summary_list[3] = str(
                            self.species_radius.get(fastani_matching_reference))
                        summary_list[4] = ";".join(self.gtdb_taxonomy.get(
                            add_ncbi_prefix(fastani_matching_reference)))
                        current_ani = all_fastani_dict.get(userleaf.taxon.label).get(
                            fastani_matching_reference).get('ani')
                        summary_list[5] = round(current_ani, 2)
                        current_af = all_fastani_dict.get(userleaf.taxon.label).get(
                            fastani_matching_reference).get('af')
                        summary_list[6] = current_af
                        summary_list[14] = 'topological placement and ANI have incongruent species assignments'
                        if len(notes) > 0:
                            summary_list[19] = ';'.join(notes)

                        summaryfout.write("{}\n".format(
                            '\t'.join(['N/A' if x is None else str(x) for x in summary_list])))
                        classified_user_genomes.append(userleaf.taxon.label)
                    else:
                        notes.append(
                            "Genome not assigned to closest species as it falls outside its pre-defined ANI radius")