Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _find_ingroup_taxon(self, ingroup_taxon, tree):
"""Find node of ingroup taxon in tree."""
ingroup_node = None
for node in tree.postorder_node_iter():
support, taxon, auxiliary_info = parse_label(node.label)
if taxon:
taxa = [t.strip() for t in taxon.split(';')]
if ingroup_taxon in taxa:
if ingroup_node is not None:
raise GTDBTkExit(f'Ingroup taxon {ingroup_taxon} '
f'identified multiple times.')
ingroup_node = node
if ingroup_node is None:
raise GTDBTkExit(f'Ingroup taxon {ingroup_taxon} not found in tree.')
return ingroup_node
Returns
-------
dict : d[rank_index][taxon] -> relative divergence
"""
# calculate relative distance for all nodes
self.decorate_rel_dist(tree)
# assign internal nodes with ranks from
rel_dists = defaultdict(dict)
for node in tree.preorder_node_iter(lambda n: n != tree.seed_node):
if not node.label or node.is_leaf():
continue
# check for support value
_support, taxon_name, _auxiliary_info = parse_label(node.label)
if not taxon_name:
continue
# get most-specific rank if a node represents multiple ranks
if ';' in taxon_name:
taxon_name = taxon_name.split(';')[-1].strip()
most_specific_rank = taxon_name[0:3]
rel_dists[Taxonomy.rank_index[most_specific_rank]
][taxon_name] = node.rel_dist
return rel_dists
Parameters
----------
leaf : Node
Node in tree.
Returns
-------
list
Taxa for leaf in rank order.
"""
leaf_taxa = []
parent = leaf
while parent:
_support, taxon, _aux_info = parse_label(parent.label)
if taxon:
for t in taxon.split(';')[::-1]:
leaf_taxa.append(t.strip())
parent = parent.parent_node
ordered_taxa = leaf_taxa[::-1]
# fill in missing ranks
last_rank = ordered_taxa[-1][0:3]
for i in range(Taxonomy.rank_prefixes.index(last_rank) + 1, len(Taxonomy.rank_prefixes)):
ordered_taxa.append(Taxonomy.rank_prefixes[i])
return ordered_taxa
count += 1
taxa = []
cur_node = leaf
current_rel_dist = 1.0
while cur_node.parent_node:
if hasattr(cur_node, 'rel_dist') and current_rel_dist == 1.0 and cur_node.rel_dist < 1.0:
current_rel_dist = cur_node.rel_dist
if cur_node.is_internal():
child_genomes = [nd.taxon.label for nd in cur_node.leaf_nodes(
) if nd.taxon.label not in user_genome_ids]
if len(child_genomes) == 1:
is_on_terminal_branch = True
term_branch_taxonomy = self.gtdb_taxonomy.get(
child_genomes[0])
_support, taxon, _aux_info = parse_label(
cur_node.label)
if taxon:
for t in taxon.split(';')[::-1]:
taxa.append(t.strip())
cur_node = cur_node.parent_node
taxa_str = ';'.join(taxa[::-1])
pplacer_tax = str(taxa_str)
if is_on_terminal_branch:
tax_of_leaf = term_branch_taxonomy[term_branch_taxonomy.index(
taxa_str.split(';')[-1]) + 1:-1]
#print ('tax_of_leaf', tax_of_leaf)
taxa_str = self._classify_on_terminal_branch(
tax_of_leaf, current_rel_dist, taxa_str.split(';')[-1][0:3], term_branch_taxonomy, marker_dict)
else:
Parameters
----------
tree : Dendropy Tree
Phylogenetic tree.
Returns
-------
list
List of phyla level lineages.
"""
phyla = []
for node in tree.preorder_node_iter():
if not node.label or node.is_leaf():
continue
_support, taxon_name, _auxiliary_info = parse_label(node.label)
if taxon_name:
taxa = [x.strip() for x in taxon_name.split(';')]
if taxa[-1].startswith('p__'):
phyla.append(taxa[-1])
return phyla
for r in rel_dists.keys():
rel_dists[r].pop(p, None)
for t in children:
for r in rel_dists.keys():
rel_dists[r].pop(t, None)
phylum_rel_dists[phylum] = rel_dists
# calculate relative distance to all nodes
rd.decorate_rel_dist(cur_tree)
# determine which lineages represents the 'ingroup'
ingroup_subtree = None
for c in cur_tree.seed_node.child_node_iter():
_support, taxon_name, _auxiliary_info = parse_label(c.label)
if not taxon_name or p not in taxon_name:
ingroup_subtree = c
break
# do a preorder traversal of 'ingroup' and record relative
# divergence to nodes
for n in ingroup_subtree.preorder_iter():
rel_node_dists[n.id].append(n.rel_dist)
sys.stdout.write(
'==> Inference for RED distributions finished. ')
sys.stdout.flush()
sys.stdout.write('\n')
return phylum_rel_dists, rel_node_dists
taxa_for_dist_inference.intersection_update(valid_taxa)
# explicitly add in the species since they have no
# children and thus be absent from the taxon_child dictionary
taxa_for_dist_inference.update(species)
# restrict taxa used for inferring distribution to those with
# sufficient support
if min_support > 0:
for node in tree.preorder_node_iter():
if not node.label or node.is_leaf():
continue
# check for support value
support, taxon_name, _auxiliary_info = parse_label(node.label)
if not taxon_name:
continue
if support and float(support) < min_support:
taxa_for_dist_inference.difference_update([taxon_name])
elif not support and min_support > 0:
# no support value, so inform user if they were trying to
# filter on this property
print(
'[Error] Tree does not contain support values. As such, --min_support should be set to 0.')
continue
# restrict taxa used for inferring distribution to the trusted set
if trusted_taxa:
taxa_for_dist_inference = trusted_taxa.intersection(