I would to convert XML file into pandas dataframe.
Afterwards I would like to convert the dataframe back to exactly the same XML.
[First step]: save the XLM file from string:
xml = '''<system>
<station id="1" type="DeviceName Functional Test">
<stationId>Functional_Test</stationId>
</station>
<software>
<component version="1.0.0.0" date="01/01/1889">Functional_Test.exe</component>
</software>
</system>'''
element = ET.XML(xml)
ET.indent(element)
pretty_xml = ET.tostring(element, encoding='unicode')
xml_file_name = r"save_xml.xml"
xml_file=open(xml_file_name,"w")
xml_file.write(str(pretty_xml))
xml_file.close()
[Second step]: read the xml and save it as csv using pandas:
import pandas as pd
df = pd.read_xml(xml_file_name)
df.to_csv("xml_to_csv.csv")
[Third step]: convert dataframe back to XML:
back_to_xml = df.to_xml(index = False, root_name = "system", xml_declaration = False)
remove_rows = back_to_xml.replace("<row>", "").replace("</row>", "")
print(remove_rows)
Unfortunately the output XML is not the same as the input XML.
<system>
<id>1.0</id>
<type>DeviceName Functional Test</type>
<stationId>Functional_Test</stationId>
<component/>
<id/>
<type/>
<stationId/>
<component>Functional_Test.exe</component>
</system>
I would much apricate any support and guidance.
[Second step]: Save XML as DataFrame Using James code in How to convert an XML string to a dictionary? XML can be converted to well structured df.
In my case:
import xml.etree.ElementTree as ET
class XmlListConfig(list):
def __init__(self, aList):
for element in aList:
if element:
if len(element) == 1 or element[0].tag != element[1].tag:
self.append(XmlDictConfig(element))
elif element[0].tag == element[1].tag:
self.append(XmlListConfig(element))
elif element.text:
text = element.text.strip()
if text:
self.append(text)
class XmlDictConfig(dict):
def __init__(self, parent_element):
if parent_element.items():
self.update(dict(parent_element.items()))
for element in parent_element:
if element:
if len(element) == 1 or element[0].tag != element[1].tag:
aDict = XmlDictConfig(element)
else:
aDict = {element[0].tag: XmlListConfig(element)}
if element.items():
aDict.update(dict(element.items()))
self.update({element.tag: aDict})
elif element.items():
self.update({element.tag: dict(element.items())})
else:
self.update({element.tag: element.text})
path = r"save_xml.xml"
tree = ET.parse(path)
root = tree.getroot()
xml_dict = XmlDictConfig(root)
df = pd.DataFrame.from_dict(xml_dict)
csv_path = rf"{path[:-4]}.csv"
df.to_csv(csv_path)
[Third step]: using ElementTree structure convert dataframe back to XML:
def parse_df_to_xml(csv_path):
df = pd.read_csv(csv_path, index_col=0)
root = ET.Element("system")
for column in df.columns:
row = df[column]
if column == "station":
station = ET.SubElement(root, str(column), id = row.id, type = row.type)
ET.SubElement(station, "stationId").text = row.stationId
elif column == "software":
software = ET.SubElement(root, str(column))
component_dict = eval(row.component)
ET.SubElement(software, "component", version = component_dict["version"], date = component_dict["date"]).text = "Functional_Test.exe"
return root
root = parse_df_to_xml(csv_path)
tree = ET.ElementTree(root)
ET.indent(tree, space="\t", level=0)
tree.write("df_to_xml.xml")