I want to convert nested json into csv format including sub rows for grouped list/dict.
Here my json
data =\
{
"id": "1",
"name": "HIGHLEVEL",
"description": "HLD",
"item": {
"id": "11",
"description": "description"
},
"packages": [{
"id": "1",
"label": "Package 1",
"products": [{
"id": "1",
"price": 5
}, {
"id": "2",
"price": 3
}
]
}, {
"id": "2",
"label": "Package 3",
"products": [{
"id": "1",
"price": 5
}, {
"id": "2",
"price": 3
}
]
}
]
}
import pandas as pd
df = pd.json_normalize(data)
# display(df)
description id name packages item.description item.id
0 HLD 1 HIGHLEVEL [{'id': '1', 'label': 'Package 1', 'products': [{'id': '1', 'price': 5}, {'id': '2', 'price': 3}]}, {'id': '2', 'label': 'Package 3', 'products': [{'id': '1', 'price': 5}, {'id': '2', 'price': 3}]}] description 11
Output of JSON to CSV Converter
"id","name","description","item__id","item__description","packages__id","packages__label","packages__products__id","packages__products__price"
"1","HIGHLEVEL","HLD","11","description","1","Package 1","1","5"
"","","","","","","","2","3"
"","","","","","2","Package 3","1","5"
"","","","","","","","2","3"
I tried pandas normalization but the results are not the same as wanted. JSON Array are not converted into sub rows in csv. I want to keep empty string in the csv.
I want to do the same but with a Python Script.
This should work for you:
from copy import deepcopy
import pandas
def cross_join(left, right):
new_rows = [] if right else left
for left_row in left:
for right_row in right:
temp_row = deepcopy(left_row)
for key, value in right_row.items():
temp_row[key] = value
new_rows.append(deepcopy(temp_row))
return new_rows
def flatten_list(data):
for elem in data:
if isinstance(elem, list):
yield from flatten_list(elem)
else:
yield elem
def json_to_dataframe(data_in):
def flatten_json(data, prev_heading=''):
if isinstance(data, dict):
rows = [{}]
for key, value in data.items():
rows = cross_join(rows, flatten_json(value, prev_heading + '_' + key))
elif isinstance(data, list):
rows = []
if(len(data) != 0):
for i in range(len(data)):
[rows.append(elem) for elem in flatten_list(flatten_json(data[i], prev_heading))]
else:
data.append("")
[rows.append(elem) for elem in flatten_list(flatten_json(data[0], prev_heading))]
else:
rows = [{prev_heading[1:]: data}]
return rows
return pandas.DataFrame(flatten_json(data_in))
def remove_duplicates(df):
columns = list(df)[:7]
for c in columns:
df[c] = df[c].mask(df[c].duplicated(), "")
return df
if __name__ == '__main__':
df = json_to_dataframe(data)
df = remove_duplicates(df)
print(df)
df.to_csv('data.csv', index=False)
Input 01:
data = {
"id": "1",
"name": "HIGHLEVEL",
"description": "HLD",
"item": {
"id": "11",
"description": "description"
},
"packages": [{
"id": "1",
"label": "Package 1",
"products": [{
"id": "1",
"price": 5
}, {
"id": "2",
"price": 3
}, {
"id": "3",
"price": 9
}
]
}, {
"id": "2",
"label": "Package 3",
"products": [{
"id": "1",
"price": 5
}, {
"id": "2",
"price": 3
}, {
"id": "3",
"price": 9
}
]
}
]
}
Output 01:
Input 02:
data = {
"id": "1",
"name": "HIGHLEVEL",
"description": "HLD",
"item": {
"id": "11",
"description": "description"
},
"packages": [{
"id": "1",
"label": "Package 1",
"products": []
}, {
"id": "2",
"label": "Package 3",
"products": []
}
]
}
Hope it will resolve your issue. If you need any explanation then please let me know.
Thanks