Search code examples
pythonjsonxmlxml-parsingxmltodict

To access keys and values using Dictionary with python, after parsing XML using xmltodict


Given XML file:

<?xml version="1.0" standalone="yes"?>
<!--COUNTRIES is the root element-->
<WORLD>
    <country name="ABCDEF">
        <event day="323" name="$abcd"> </event>
        <event day="23" name="$aklm"> </event>

        <neighbor name="B" direction="W" friend="T"></neighbor>
        <neighbor name="C" direction="E"></neighbor>
        <neighbor name="D" direction="W"></neighbor>
    </country>
    <country name="KLMNOP">
        <event day="825" name="$nmre"> </event>
        <event day="329" name="$lpok"> </event>
        <event day="145" name="$dswq"> </event>
        <event day="256" name="$tyul"> </event>

        <neighbor name="D" direction="S"/>
        <neighbor name="E" direction="N" friend="T"/>
    </country>
</WORLD>

I have then parsed this xml file using "xmltodict" library in Python:

import xmltodict

class XMLParser:
    def __init__(self, xml_file_path):
        self.xml_file_path = xml_file_path
        if not self.xml_file_path:
            raise ValueError("XML file path is not found./n")
        
        with open (self.xml_file_path, 'r') as f:
            self.xml_file = f.read()

    def parse_xml_to_json(self):
        xml_file = self.xml_file
        dict = xmltodict.parse(xml_file, attr_prefix='')

        for k in dict['WORLD']['country'][1]:
            if k == "name":
                print(dict.keys())
                print(dict.values())

        return dict

xml_file_path = "file_path"
xml_parser = XMLParser(xml_file_path)
data = xml_parser.parse_xml_to_json()
print(data)

However, I am getting error and I am unable to access keys and values from this dictionary.

Error: AttributeError: 'str' object has no attribute 'keys'

I want to get the output in the below format:

Required Output:

    { "neighbor":
[
{
  "Name": "B",
  "direction": "W",
  "Type": "ABCDEF"
},
{  
  "Name": "C",
  "direction": "E",
  "Type": "ABCDEF"
},
{  
  "Name": "D",
  "direction": "W",
  "Type": "ABCDEF"
},
{  
  "Name": "D",
  "direction": "S",
  "Type": "KLMNOP"
},
{  
  "Name": "E",
  "direction": "N",
  "Type": "KLMNOP"
},
]
}

Please suggest, I am new to xml and python.


Solution

  • A simple nested loop can work here:

    import xml.etree.ElementTree as ET
    
    xml = '''<WORLD>
        <country name="ABCDEF">
            <event day="323" name="$abcd"> </event>
            <event day="23" name="$aklm"> </event>
    
            <neighbor name="B" direction="W" friend="T"></neighbor>
            <neighbor name="C" direction="E"></neighbor>
            <neighbor name="D" direction="W"></neighbor>
        </country>
        <country name="KLMNOP">
            <event day="825" name="$nmre"> </event>
            <event day="329" name="$lpok"> </event>
            <event day="145" name="$dswq"> </event>
            <event day="256" name="$tyul"> </event>
    
            <neighbor name="D" direction="S"/>
            <neighbor name="E" direction="N" friend="T"/>
        </country>
    </WORLD>'''
    data = {'neighbor': []}
    root = ET.fromstring(xml)
    for country in root.findall('.//country'):
        country_name = country.attrib['name']
        for neighbor in country.findall('neighbor'):
            data['neighbor'].append({'Type': country_name, 'Name': neighbor.attrib['name'],'direction': neighbor.attrib['direction']})
    print(data)
    

    output

    {
      'neighbor': [
        {
          'Type': 'ABCDEF',
          'Name': 'B',
          'direction': 'W'
        },
        {
          'Type': 'ABCDEF',
          'Name': 'C',
          'direction': 'E'
        },
        {
          'Type': 'ABCDEF',
          'Name': 'D',
          'direction': 'W'
        },
        {
          'Type': 'KLMNOP',
          'Name': 'D',
          'direction': 'S'
        },
        {
          'Type': 'KLMNOP',
          'Name': 'E',
          'direction': 'N'
        }
      ]
    }