Search code examples
pythonjsoncsvxlsx

python nested json to csv/xlsx with specified headers


With json like below, which is an array of objects at the outer most level with further nested arrays with objects.

data = [{"a": [{"a1": [{"id0": [{"aa": [{"aaa": 97}, {"aab": "one"}], "ab": [{"aba": 97}, {"abb": ["one", "two"]}]}]}, {"id1": [{"aa": [{"aaa": 23}]}]}]}, {"a2": []}]}, {"b": [{"b1": [{"Common": [{"bb": [{"value": 4}]}]}]}]}]

I need to write this to a csv (or .xlsx file)

what I've tried so far ?

data_file = open('data_file.csv', 'w')
csv_writer = csv.writer(data_file)
for row in data:
  csv_writer.writerow(row)
data_file.close() 

This gives an empty file 'data_file.csv'.

Also how do I add headers to the CSV. I have the headers stored in a list as below

hdrs = ['Section', 'Subsection', 'pId', 'Group', 'Parameter', 'Value'] 

- this corresponds to the five levels of keys

Expected CSV output

+---------+------------+--------+-------+-----------+----------+
| Section | Subsection |  pId   | Group | Parameter |  Value   |
+---------+------------+--------+-------+-----------+----------+
| a       | a1         | id0    | aa    | aaa       | 97       |
| a       | a1         | id0    | aa    | aab       | one      |
| a       | a1         | id0    | ab    | aba       | 97       |
| a       | a1         | id0    | ab    | abb       | one, two |
| a       | a1         | id1    | aa    | aaa       | 23       |
| a       | a2         |        |       |           |          |
| b       | b1         | Common | bb    | value     | 4        |
+---------+------------+--------+-------+-----------+----------+

Expected XLSX output enter image description here


Solution

  • Following code is able to parse the provided data as per expected format.

    from typing import List
    
    def parse_recursive(dat)->List[List]:
        ret=[]
        if type(dat) is list:
            for item in dat:
                if type(item)==dict:
                    for k in item:
                        #print(k, item[k], sep=" # ")#debug print
                        if item[k]==[]: #empty list
                            ret.append([k])
                        else:
                            for l in parse_recursive(item[k]):
                                #print(k,l,sep=" : ") #debug print
                                ret.append([k]+l) #always returns List of List
                else: #Right now only possibility is string eg. "one", "two"
                    return [[",".join(dat)]]
        else: #can be int or string eg. 97, "23"
            return [[dat]]
    
        return ret
    
    
    def write_to_csv(file_name:str, fields:List, row_data:List[List]):
        import csv
        with open(file_name, 'w') as csvfile:  
            # creating a csv writer object  
            csvwriter = csv.writer(csvfile)  
            # writing the fields  
            csvwriter.writerow(fields)  
            # writing the data rows  
            csvwriter.writerows(row_data)
    
    
    if __name__=="__main__":
        org_data = [{"a": [
            {"a1": [
                {"id0": [
                    {
                        "aa": [
                            {"aaa": 97},
                            {"aab": "one"}],
                        "ab": [
                            {"aba": 97},
                            {"abb": ["one", "two"]}
                            ]
                    }
                ]
                },
                {"id1": [
                    {"aa": [
                        {"aaa": 23}]}]}
                ]
            },
            {"a2": []}
            ]},
            {"b": [{"b1": [{"Common": [{"bb": [{"value": 4}]}]}]}]}]
        print(parse_recursive(org_data)) #Debug
    
        file_name="data_file.csv"
        fields=['Section', 'Subsection', 'pId', 'Group', 'Parameter', 'Value']
        write_to_csv(file_name, fields, parse_recursive(org_data))
    

    parse_recursive tries to parse arbitrary depth dictionary as per rule i tried deducing from your input and output formats.

    Following is the output of parse_recursive for your provided input -

    mahorir@mahorir-Vostro-3446:~/Desktop$ python3 so.py 
    [['a', 'a1', 'id0', 'aa', 'aaa', 97], ['a', 'a1', 'id0', 'aa', 'aab', 'one'], ['a', 'a1', 'id0', 'ab', 'aba', 97], ['a', 'a1', 'id0', 'ab', 'abb', 'one,two'], ['a', 'a1', 'id1', 'aa', 'aaa', 23], ['a', 'a2'], ['b', 'b1', 'Common', 'bb', 'value', 4]]
    

    write_to_csv is a trivial function that write to a csv file.