I have a list in python and I want to transform it in an xml like result. The input is
tasks = [
('task1', 'Type1', 'Description1', 'task11', 'Type11', 'Description11'),
('task2', 'Type2', 'Description2', 'task22', 'Type22', 'Description22'),
('task11', 'Type11', 'Description11', 'task33', 'Type33', 'Description33'),
('task33', 'Type33', 'Description33', 'task3', 'Type3', 'Description3'),
('task3', 'Type3', 'Description3', 'task5', 'Type5', 'Description5'),
('task4', 'Type4', 'Description4', 'task6', 'Type6', 'Description6'),
('task6', 'Type6', 'Description6', 'task7', 'Type7', 'Description7'),
('task7', 'Type7', 'Description7', 'task8', 'Type8', 'Description8'),
('taskX', 'TypeX', 'DescriptionX', 'task33', 'Type33', 'Description33'),
]
and the desired output is:
<task taskId="task1" taskIdType="Type1" taskIdDescription="Description1">
<task taskIdRelated ="task11" taskIdRelatedType="Type11" taskIdRelatedDescription="Description11">
<task taskIdRelated="task33" taskIdRelatedType="Type33" taskIdRelatedDescription="Description33">
<task taskIdRelated="task3" taskIdRelatedType="Type3" taskIdRelatedDescription="Description3">
<task taskIdRelated="task5" taskIdRelatedType="Type5" taskIdRelatedDescription="Description5" />
</task>
</task>
</task>
</task>
<task taskId="task2" taskIdType="Type2" taskIdDescription="Description2">
<task taskIdRelated="task22" taskIdRelatedType="Type22" taskIdRelatedDescription="Description22" />
</task>
</task>
<task taskId="task4" taskIdType="Type4" taskIdDescription="Description4">
<task taskIdRelated="task7" taskIdRelatedType="Type7" taskIdRelatedDescription="Description7">
<task taskIdRelated="task8" taskIdRelatedType="Type8" taskIdRelatedDescription="Description8" />
</task>
</task>
<task taskId="taskX" taskIdType="TypeX" taskIdDescription="DescriptionX">
<task taskIdRelated="task33" taskIdRelatedType="Type33" taskIdRelatedDescription="Description33">
<task taskIdRelated="task3" taskIdRelatedType="Type3" taskIdRelatedDescription="Description3">
<task taskIdRelated="task5" taskIdRelatedType="Type5" taskIdRelatedDescription="Description5" />
</task>
</task>
</task>
I am trying the following code but I cannot print the last child. At node task1 I cannot show task5 as the last child, at node task2 I cannot show task22 as the last child, at node task4 I cannot show task8 as the last child and at node taskX I cannot show task5 as the last child.
import xml.etree.ElementTree as ET
# Sample list of tasks with taskId, taskIdType, taskIdDescription,
# taskIdRelated, taskIdRelatedType, taskIdRelatedDescription columns
tasks = [
('task1', 'Type1', 'Description1', 'task11', 'Type11', 'Description11'),
('task2', 'Type2', 'Description2', 'task22', 'Type22', 'Description22'),
('task11', 'Type11', 'Description11', 'task33', 'Type33', 'Description33'),
('task33', 'Type33', 'Description33', 'task3', 'Type3', 'Description3'),
('task3', 'Type3', 'Description3', 'task5', 'Type5', 'Description5'),
('task4', 'Type4', 'Description4', 'task6', 'Type6', 'Description6'),
('task6', 'Type6', 'Description6', 'task7', 'Type7', 'Description7'),
('task7', 'Type7', 'Description7', 'task8', 'Type8', 'Description8'),
('taskX', 'TypeX', 'DescriptionX', 'task33', 'Type33', 'Description33'),
]
def build_xml(tasks, task_id):
task_element = ET.Element('task')
task_element.set('taskId', task_id[0])
task_element.set('taskIdType', task_id[1])
task_element.set('taskIdDescription', task_id[2])
related_tasks = [t for t in tasks if t[0] == task_id[3]]
if related_tasks:
related_task = related_tasks[0]
related_element = build_xml(tasks, related_task)
task_element.append(related_element)
return task_element
def find_root_task(tasks):
all_task_ids = set([t[0] for t in tasks])
related_task_ids = set([t[3] for t in tasks])
return [task_id for task_id in all_task_ids if task_id not in related_task_ids]
for task in tasks:
task_id = task[0]
if task_id in find_root_task(tasks):
task_element = build_xml(tasks, task)
# Convert the XML element to string
xml_str = ET.tostring(task_element, encoding='unicode')
print(xml_str)
Can you help me? Any help in any language is accepted. Thank you
The main issue is that your current output does not include the deepest elements. That happens because their related task is not an entry in your input list, and so no XML element is created for it. But in that case you should still create the element with the information available.
To indent your output, you can use the indent()
method.
You can also avoid repeatedly iterating through your input list by first creating a dictionary from it, keyed by the task ids.
Here is the adapted code:
def create_element(taskId, taskType, taskDescription):
task_element = ET.Element('task')
task_element.set('taskId', taskId)
task_element.set('taskIdType', taskType)
task_element.set('taskIdDescription', taskDescription)
return task_element
def build_xml(task_dict, task):
if task:
task_element = create_element(*task[:3])
related_tasks = task_dict.get(task[3], None)
# If the related task is a leaf, still create the related element
related_element = build_xml(task_dict, related_tasks) or create_element(*task[3:])
task_element.append(related_element)
return task_element
def find_root_tasks(task_dict):
related_task_ids = { t[3] for t in task_dict.values() }
return [task_dict[task_id] for task_id in (set(task_dict) - related_task_ids)]
# Create a dict and then work with that throughout the algorithm
task_dict = { task[0] : task for task in tasks }
for task in find_root_tasks(task_dict):
task_element = build_xml(task_dict, task)
# Set the indentation to be used with string representation
ET.indent(task_element, space="\t", level=0)
xml_str = ET.tostring(task_element, encoding='unicode')
print(xml_str)