I have a phylogenetic tree in Newick format, and I would like to remove all species from it that are on a specific list and rename it accordingly.
This is the tree:
((((A:0.1, B:0.2):0.3, C:0.3):0.15, (D:0.3, (E:0.1, (F:0.15, (G:0.1, H:0.1):0.1):0.1):0.1):0.1):0.15, I:0.2);
This is the table to rename:
| species | clade_renaming |
|------------|----------------|
| A, B | X |
| F, G, H | Y |
Expected result:
(((X:0.3, C:0.3):0.15, (D:0.3, (E:0.1, Y:0.1):0.1):0.1):0.15, I:0.2);
This is the current code that can collapse nodes:
from Bio import Phylo
import io
tree_structure = "((((A:0.1, B:0.2):0.3, C:0.3):0.15, (D:0.3, (E:0.1, (F:0.15, (G:0.1, H:0.1):0.1):0.1):0.1):0.1):0.15, I:0.2);"
tree = Phylo.read(io.StringIO(tree_structure), 'newick')
nodes_to_collapse = ["A", "B", "F", "G", "H"]
def collapse_nodes(tree, nodes_to_collapse):
for node in tree.find_elements(target=lambda x: x.name in nodes_to_collapse, order="postorder"):
tree.collapse(node)
collapse_nodes(tree, nodes_to_collapse)
Phylo.draw(tree)
I created a function to collapse nodes and remove their direct children based on the renaming table. Within this function, I iterated through the rename table, identified the common ancestor for each group of species to be renamed, updated its name, and removed its direct children.
from Bio import Phylo
import io
tree_structure = "((((A:0.1, B:0.2):0.3, C:0.3):0.15, (D:0.3, (E:0.1, (F:0.15, (G:0.1, H:0.1):0.1):0.1):0.1):0.1):0.15, I:0.2);"
rename_table = {"A, B": "X", "F, G, H": "Y"}
tree = Phylo.read(io.StringIO(tree_structure), 'newick')
# Function to collapse specified nodes and remove their direct children
def collapse_and_remove_children(tree, rename_table):
for nodes, new_name in rename_table.items():
# Remove leading and trailing whitespace from node names
nodes = [node.strip() for node in nodes.split(',')]
# Find the collapsed node
collapsed_node = tree.common_ancestor(nodes)
# Set the name of the collapsed node
collapsed_node.name = new_name
# Remove the direct children of the collapsed node
collapsed_node.clades = []
# Collapse specified nodes and remove their direct children
collapse_and_remove_children(tree, rename_table)
# Draw the tree
Phylo.draw(tree)
Hi, after working around, I found suitable solution below here
from Bio import Phylo
import io
# Input tree and table
tree_structure = "((((A:0.1, B:0.2):0.3, C:0.3):0.15, (D:0.3, (E:0.1, (F:0.15, (G:0.1, H:0.1):0.1):0.1):0.1):0.1):0.15, I:0.2);"
rename_table = {"A, B": "X", "F, G, H": "Y"}
# Read the tree
tree = Phylo.read(io.StringIO(tree_structure), 'newick')
# Function to collapse and rename nodes
def collapse_and_rename(tree, rename_table):
for nodes, new_name in rename_table.items():
common_ancestor_names = [node.strip() for node in nodes.split(',')]
common_ancestor = tree.common_ancestor(common_ancestor_names)
# Calculate the total branch length of the collapsed node
total_branch_length = sum(child.branch_length for child in common_ancestor.clades if child.branch_length is not None)
# Set the new name for the common ancestor node
common_ancestor.name = new_name
# Adjust branch lengths of children
for child in common_ancestor.clades:
if child.branch_length:
child.branch_length -= total_branch_length
# Set the branch length of the collapsed node to the sum of its children's branch lengths
common_ancestor.branch_length = total_branch_length
# Collapse and rename nodes
collapse_and_rename(tree, rename_table)
# Function to collapse nodes and update branch lengths
def collapse_nodes(tree, nodes_to_collapse):
for node in tree.find_elements(target=lambda x: x.name in nodes_to_collapse, order="postorder"):
tree.collapse(node)
nodes_to_collapse = ["A", "B", "F", "G", "H"]
# Collapse nodes and update branch lengths
collapse_nodes(tree, nodes_to_collapse)
# Draw the tree
Phylo.draw(tree)