Search code examples
biopythonphylogeny

BioPython phylo: collapsing nodes on tree if node in list and rename the respective collapsed node


I have a phylogenetic tree in Newick format, and I would like to remove all species from it that are on a specific list and rename it accordingly.

This is the tree:

((((A:0.1, B:0.2):0.3, C:0.3):0.15, (D:0.3, (E:0.1, (F:0.15, (G:0.1, H:0.1):0.1):0.1):0.1):0.1):0.15, I:0.2);

This is the table to rename:

| species | clade_renaming |
|------------|----------------|
| A, B       | X              |
| F, G, H    | Y              |

Expected result:

(((X:0.3, C:0.3):0.15, (D:0.3, (E:0.1, Y:0.1):0.1):0.1):0.15, I:0.2);

This is the current code that can collapse nodes:

from Bio import Phylo
import io

tree_structure = "((((A:0.1, B:0.2):0.3, C:0.3):0.15, (D:0.3, (E:0.1, (F:0.15, (G:0.1, H:0.1):0.1):0.1):0.1):0.1):0.15, I:0.2);"
tree = Phylo.read(io.StringIO(tree_structure), 'newick')

nodes_to_collapse = ["A", "B", "F", "G", "H"]

def collapse_nodes(tree, nodes_to_collapse):
    for node in tree.find_elements(target=lambda x: x.name in nodes_to_collapse, order="postorder"):
        tree.collapse(node)

collapse_nodes(tree, nodes_to_collapse)
Phylo.draw(tree)

Solution

  • I created a function to collapse nodes and remove their direct children based on the renaming table. Within this function, I iterated through the rename table, identified the common ancestor for each group of species to be renamed, updated its name, and removed its direct children.

    from Bio import Phylo
    import io
    
    tree_structure = "((((A:0.1, B:0.2):0.3, C:0.3):0.15, (D:0.3, (E:0.1, (F:0.15, (G:0.1, H:0.1):0.1):0.1):0.1):0.1):0.15, I:0.2);"
    rename_table = {"A, B": "X", "F, G, H": "Y"}
    
    tree = Phylo.read(io.StringIO(tree_structure), 'newick')
    
    # Function to collapse specified nodes and remove their direct children
    def collapse_and_remove_children(tree, rename_table):
        for nodes, new_name in rename_table.items():
            # Remove leading and trailing whitespace from node names
            nodes = [node.strip() for node in nodes.split(',')]
            # Find the collapsed node
            collapsed_node = tree.common_ancestor(nodes)
            # Set the name of the collapsed node
            collapsed_node.name = new_name
            # Remove the direct children of the collapsed node
            collapsed_node.clades = []
    
    # Collapse specified nodes and remove their direct children
    collapse_and_remove_children(tree, rename_table)
    
    # Draw the tree
    Phylo.draw(tree)
    

    enter image description here

    Hi, after working around, I found suitable solution below here

    from Bio import Phylo
    import io
    
    # Input tree and table
    tree_structure = "((((A:0.1, B:0.2):0.3, C:0.3):0.15, (D:0.3, (E:0.1, (F:0.15, (G:0.1, H:0.1):0.1):0.1):0.1):0.1):0.15, I:0.2);"
    rename_table = {"A, B": "X", "F, G, H": "Y"}
    
    # Read the tree
    tree = Phylo.read(io.StringIO(tree_structure), 'newick')
    
    # Function to collapse and rename nodes
    def collapse_and_rename(tree, rename_table):
        for nodes, new_name in rename_table.items():
            common_ancestor_names = [node.strip() for node in nodes.split(',')]
            common_ancestor = tree.common_ancestor(common_ancestor_names)
            
            # Calculate the total branch length of the collapsed node
            total_branch_length = sum(child.branch_length for child in common_ancestor.clades if child.branch_length is not None)
            
            # Set the new name for the common ancestor node
            common_ancestor.name = new_name
            
            # Adjust branch lengths of children
            for child in common_ancestor.clades:
                if child.branch_length:
                    child.branch_length -= total_branch_length
            # Set the branch length of the collapsed node to the sum of its children's branch lengths
            common_ancestor.branch_length = total_branch_length
    
    
    
    # Collapse and rename nodes
    collapse_and_rename(tree, rename_table)
    
    # Function to collapse nodes and update branch lengths
    def collapse_nodes(tree, nodes_to_collapse):
        for node in tree.find_elements(target=lambda x: x.name in nodes_to_collapse, order="postorder"):
            tree.collapse(node)
    nodes_to_collapse = ["A", "B", "F", "G", "H"]
    # Collapse nodes and update branch lengths
    collapse_nodes(tree, nodes_to_collapse)
    
    # Draw the tree
    Phylo.draw(tree)
    

    enter image description here