I know there are already several questions concerning the sorting of an xml, but none of them seem to work with my case. I have the following xml file representing a cutout of a data schema of an esri file geodatabase:
import xml.etree.ElementTree as ET
from operator import attrgetter
data = """<esri:Workspace xmlns:esri='http://www.esri.com/schemas/ArcGIS/10.8' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xs='http://www.w3.org/2001/XMLSchema'>
<WorkspaceDefinition xsi:type='esri:WorkspaceDefinition'>
<WorkspaceType>esriLocalDatabaseWorkspace</WorkspaceType>
<Version/>
<Domains xsi:type='esri:ArrayOfDomain'/>
<Sequences xsi:type='esri:ArrayOfSequence'/>
<DatasetDefinitions xsi:type='esri:ArrayOfDataElement'>
<DataElement xsi:type='esri:DEFeatureClass'/>
<DataElement xsi:type='esri:DEFeatureClass'/>
<DataElement xsi:type='esri:DEFeatureClass'/>
<DataElement xsi:type='esri:DEFeatureDataset'/>
<DataElement xsi:type='esri:DEFeatureClass'/>
<DataElement xsi:type='esri:DEFeatureClass'/>
</DatasetDefinitions>
</WorkspaceDefinition>
<WorkspaceData xsi:type='esri:WorkspaceData'/>
</esri:Workspace>"""
root_1 = ET.fromstring(data)
I want to sort it by tag and by the DataElement type, so that it is sorted like this:
WorkspaceData {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:WorkspaceData'}
WorkspaceDefinition {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:WorkspaceDefinition'}
DatasetDefinitions {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:ArrayOfDataElement'}
DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureDataset'}
Domains {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:ArrayOfDomain'}
Sequences {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:ArrayOfSequence'}
Version {}
WorkspaceType {}
So far, I managed to sort by tag, but how can I sort by the DataElement type? Here is my code so far:
root_1[:] = sorted(root_1, key=attrgetter("tag")) # WorkspaceData, WorkspaceDefinition
for node in root_1.findall("*"): # DatasetDefinitions, Domains, Sequences, Version, WorkspaceType
node[:] = sorted(node, key=attrgetter("tag"))
print(node)
for subnode in node.findall("*"): #DataElement, Domain
subnode[:] = sorted(subnode, key=attrgetter("tag"))
#subnode[:] = sorted(subnode, key=subnode.get['xsi:type']) # not working!
print("\t", subnode.tag, subnode.attrib)
for subsubnode in subnode.findall("*"):
print("\t\t", subsubnode.tag, subsubnode.attrib)
subsubnode[:] = sorted(subsubnode, key=attrgetter("tag"))
IIUC, you can slightly change the key=
parameter in sorted()
:
import xml.etree.ElementTree as ET
from operator import attrgetter
data = """<esri:Workspace xmlns:esri='http://www.esri.com/schemas/ArcGIS/10.8' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xmlns:xs='http://www.w3.org/2001/XMLSchema'>
<WorkspaceDefinition xsi:type='esri:WorkspaceDefinition'>
<WorkspaceType>esriLocalDatabaseWorkspace</WorkspaceType>
<Version/>
<Domains xsi:type='esri:ArrayOfDomain'/>
<Sequences xsi:type='esri:ArrayOfSequence'/>
<DatasetDefinitions xsi:type='esri:ArrayOfDataElement'>
<DataElement xsi:type='esri:DEFeatureClass'/>
<DataElement xsi:type='esri:DEFeatureClass'/>
<DataElement xsi:type='esri:DEFeatureClass'/>
<DataElement xsi:type='esri:DEFeatureDataset'/>
<DataElement xsi:type='esri:DEFeatureClass'/>
<DataElement xsi:type='esri:DEFeatureClass'/>
</DatasetDefinitions>
</WorkspaceDefinition>
<WorkspaceData xsi:type='esri:WorkspaceData'/>
</esri:Workspace>"""
root_1 = ET.fromstring(data)
root_1[:] = sorted(root_1, key=attrgetter("tag")) # WorkspaceData, WorkspaceDefinition
for node in root_1.findall(
"*"
): # DatasetDefinitions, Domains, Sequences, Version, WorkspaceType
node[:] = sorted(node, key=attrgetter("tag"))
print(node)
for subnode in node.findall("*"): # DataElement, Domain
subnode[:] = sorted(
subnode,
key=lambda node: ( # <--- change key= here
node.tag,
node.get("{http://www.w3.org/2001/XMLSchema-instance}type"),
),
)
print("\t", subnode.tag, subnode.attrib)
for subsubnode in subnode.findall("*"):
print("\t\t", subsubnode.tag, subsubnode.attrib)
subsubnode[:] = sorted(
subsubnode,
key=attrgetter("tag"),
)
Prints:
<Element 'WorkspaceData' at 0x7f5ff630bec0>
<Element 'WorkspaceDefinition' at 0x7f5ff6316610>
DatasetDefinitions {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:ArrayOfDataElement'}
DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureClass'}
DataElement {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:DEFeatureDataset'}
Domains {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:ArrayOfDomain'}
Sequences {'{http://www.w3.org/2001/XMLSchema-instance}type': 'esri:ArrayOfSequence'}
Version {}
WorkspaceType {}