Search code examples
pythonxmllistrecursion

Recursive loop on a list to print xml with indent


I have a list in python and I want to transform it in an xml like result. The input is

tasks = [
    ('task1', 'Type1', 'Description1', 'task11', 'Type11', 'Description11'),
    ('task2', 'Type2', 'Description2', 'task22', 'Type22', 'Description22'),
    ('task11', 'Type11', 'Description11', 'task33', 'Type33', 'Description33'),
    ('task33', 'Type33', 'Description33', 'task3', 'Type3', 'Description3'),
    ('task3', 'Type3', 'Description3', 'task5', 'Type5', 'Description5'),
    ('task4', 'Type4', 'Description4', 'task6', 'Type6', 'Description6'),
    ('task6', 'Type6', 'Description6', 'task7', 'Type7', 'Description7'),
    ('task7', 'Type7', 'Description7', 'task8', 'Type8', 'Description8'),
    ('taskX', 'TypeX', 'DescriptionX', 'task33', 'Type33', 'Description33'),
]

and the desired output is:

<task taskId="task1" taskIdType="Type1" taskIdDescription="Description1"> 
    <task taskIdRelated ="task11" taskIdRelatedType="Type11" taskIdRelatedDescription="Description11">
        <task taskIdRelated="task33" taskIdRelatedType="Type33" taskIdRelatedDescription="Description33">
            <task taskIdRelated="task3" taskIdRelatedType="Type3" taskIdRelatedDescription="Description3">
                <task taskIdRelated="task5" taskIdRelatedType="Type5" taskIdRelatedDescription="Description5" />
                </task>
        </task>
    </task>
</task>
<task taskId="task2" taskIdType="Type2" taskIdDescription="Description2">
    <task taskIdRelated="task22" taskIdRelatedType="Type22" taskIdRelatedDescription="Description22" />
    </task>
</task>
<task taskId="task4" taskIdType="Type4" taskIdDescription="Description4">
    <task taskIdRelated="task7" taskIdRelatedType="Type7" taskIdRelatedDescription="Description7">
        <task taskIdRelated="task8" taskIdRelatedType="Type8" taskIdRelatedDescription="Description8" />
        </task>
</task>
<task taskId="taskX" taskIdType="TypeX" taskIdDescription="DescriptionX">
    <task taskIdRelated="task33" taskIdRelatedType="Type33" taskIdRelatedDescription="Description33">
        <task taskIdRelated="task3" taskIdRelatedType="Type3" taskIdRelatedDescription="Description3">
            <task  taskIdRelated="task5" taskIdRelatedType="Type5" taskIdRelatedDescription="Description5" />
            </task>
    </task>
</task>

I am trying the following code but I cannot print the last child. At node task1 I cannot show task5 as the last child, at node task2 I cannot show task22 as the last child, at node task4 I cannot show task8 as the last child and at node taskX I cannot show task5 as the last child.

import xml.etree.ElementTree as ET

# Sample list of tasks with taskId, taskIdType, taskIdDescription, 
# taskIdRelated, taskIdRelatedType, taskIdRelatedDescription columns
tasks = [
    ('task1', 'Type1', 'Description1', 'task11', 'Type11', 'Description11'),
    ('task2', 'Type2', 'Description2', 'task22', 'Type22', 'Description22'),
    ('task11', 'Type11', 'Description11', 'task33', 'Type33', 'Description33'),
    ('task33', 'Type33', 'Description33', 'task3', 'Type3', 'Description3'),
    ('task3', 'Type3', 'Description3', 'task5', 'Type5', 'Description5'),
    ('task4', 'Type4', 'Description4', 'task6', 'Type6', 'Description6'),
    ('task6', 'Type6', 'Description6', 'task7', 'Type7', 'Description7'),
    ('task7', 'Type7', 'Description7', 'task8', 'Type8', 'Description8'),
    ('taskX', 'TypeX', 'DescriptionX', 'task33', 'Type33', 'Description33'),
]

def build_xml(tasks, task_id):
    task_element = ET.Element('task')
    task_element.set('taskId', task_id[0])
    task_element.set('taskIdType', task_id[1])
    task_element.set('taskIdDescription', task_id[2])
    
    related_tasks = [t for t in tasks if t[0] == task_id[3]]
    if related_tasks:
        related_task = related_tasks[0]
        related_element = build_xml(tasks, related_task)
        task_element.append(related_element)

    return task_element

def find_root_task(tasks):
    all_task_ids = set([t[0] for t in tasks])
    related_task_ids = set([t[3] for t in tasks])
    return [task_id for task_id in all_task_ids if task_id not in related_task_ids]

for task in tasks:
    task_id = task[0]
    if task_id in find_root_task(tasks):
        task_element = build_xml(tasks, task)
        # Convert the XML element to string
        xml_str = ET.tostring(task_element, encoding='unicode')
        print(xml_str)

Can you help me? Any help in any language is accepted. Thank you


Solution

  • The main issue is that your current output does not include the deepest elements. That happens because their related task is not an entry in your input list, and so no XML element is created for it. But in that case you should still create the element with the information available.

    To indent your output, you can use the indent() method.

    You can also avoid repeatedly iterating through your input list by first creating a dictionary from it, keyed by the task ids.

    Here is the adapted code:

    def create_element(taskId, taskType, taskDescription):
        task_element = ET.Element('task')
        task_element.set('taskId', taskId)
        task_element.set('taskIdType', taskType)
        task_element.set('taskIdDescription', taskDescription)
        return task_element
    
    def build_xml(task_dict, task):
        if task:
            task_element = create_element(*task[:3])
            related_tasks = task_dict.get(task[3], None)
            # If the related task is a leaf, still create the related element
            related_element = build_xml(task_dict, related_tasks) or create_element(*task[3:])
            task_element.append(related_element)
            return task_element
        
    def find_root_tasks(task_dict):
        related_task_ids = { t[3] for t in task_dict.values() }
        return [task_dict[task_id] for task_id in (set(task_dict) - related_task_ids)]
    
    # Create a dict and then work with that throughout the algorithm
    task_dict = { task[0] : task for task in tasks }
    for task in find_root_tasks(task_dict):
        task_element = build_xml(task_dict, task)
        # Set the indentation to be used with string representation
        ET.indent(task_element, space="\t", level=0)
        xml_str = ET.tostring(task_element, encoding='unicode')
        print(xml_str)