Search code examples
pythonxml

python merge xml files


Say input a.xml:

<?xml version="1.0" encoding="utf-8"?>
<permissions>
  <privapp-permissions package="a">
        <permission name="x"/>
        <permission name="y"/>
  </privapp-permissions>
</permissions>

And b.xml:

<?xml version="1.0" encoding="utf-8"?>
<permissions>
  <privapp-permissions package="a">
        <permission name="x"/>
        <permission name="z"/>
  </privapp-permissions>
</permissions>

Expected output c.xml:

<?xml version="1.0" encoding="utf-8"?>
<permissions>
  <privapp-permissions package="a">
        <permission name="x"/>
        <permission name="y"/>
        <permission name="z"/>
  </privapp-permissions>
</permissions>

I write below code but it just append it not merge:

import xml.etree.ElementTree as ET

fname1 = "a.xml"
fname2 = "b.xml"
fname3 = "c.xml"

tree1 = ET.parse(fname1)
root1 = tree1.getroot()

tree2 = ET.parse(fname2)
root2 = tree2.getroot()

merged_tree = ET.ElementTree(ET.Element('root'))

merged_tree.getroot().append(root1)
merged_tree.getroot().append(root2)

merged_tree.write(fname3, encoding='utf-8')

Solution

  • You can manage the duplicates with a list:

    import xml.etree.ElementTree as ET
    
    
    a= """<?xml version="1.0" encoding="utf-8"?>
    <permissions>
      <privapp-permissions package="a">
            <permission name="x"/>
            <permission name="y"/>
      </privapp-permissions>
    </permissions>"""
    
    b= """<?xml version="1.0" encoding="utf-8"?>
    <permissions>
      <privapp-permissions package="a">
            <permission name="x"/>
            <permission name="z"/>
      </privapp-permissions>
    </permissions>"""
    
    c= """<?xml version="1.0" encoding="utf-8"?>
    <permissions>
      <privapp-permissions package="a" />
    </permissions>"""
    
    tree = ET.fromstring(a)
    tree1 = ET.fromstring(b)
    l1 = tree.findall('.//permission')
    l2 = tree1.findall('.//permission')
    
    tree2 = ET.fromstring(c)
    
    new = tree2.find('privapp-permissions')
    new.extend(l1)
    new.extend(l2)
    
    d = []
    for elem in tree2.iter('permission'):
        if elem.get('name') not in d:
            d.append(elem.get('name'))
    
    for parent in tree2.findall('.//privapp-permissions'):
        for elem in tree2.iter('permission'):
            if elem.get('name') in d:
                d.remove(elem.get('name'))
            else:
                parent.remove(elem)
                              
    root = ET.ElementTree(tree2)
    ET.indent(root, space="  ", level=0)
    #root.write(file_name, xml_declaration = True, encoding="utf-8")
    ET.dump(root)
    

    Output:

    <permissions>
      <privapp-permissions package="a">
        <permission name="x" />
        <permission name="y" />
        <permission name="z" />
      </privapp-permissions>
    </permissions>