Python Converting Nested array of objects in JSON to flattened excel
Current Code
def read_json(filename): jsonData = {} try: with open(filename, "r", encoding="utf-8") as f: jsonData = json.loads(f.read()) except: raise Exception(f"Reading {filename} file encountered an error") return jsonData json_data = read_json(filename=".json")
df = pd.json_normalize(json_data,meta=['Documents']).explode('Documents').reset_index(drop=True)
df = df.join(pd.json_normalize(df.pop('Documents')))
df = df.join(pd.json_normalize(df.pop("lossInfo").apply(pd.Series).stack().reset_index(drop=True)))
Expected:
metaData.form metaData.userNm report.date policy.effdate policy.efftime year.current year.previous
0 1 1 1 11 11 11 11
1 1 1 1 12 12 12 12
3 2 2 2 21 21 21 21
4 2 2 2 22 22 22 22
How to flatten below JSON file. Collection of 2 or more array of objects with nested data in a single json:
{
"Documents": [
{
"metaData": {
"form": "1",
"userNm": "1"
},
"report": {
"date": "1"
},
"lossInfo": [
{
"policy": {
"effdate": "11",
"efftime": "11"
},
"year": {
"current": "11",
"previous": "11"
}
},
{
"policy": {
"effdate": "12",
"efftime": "12"
},
"year": {
"current": "12",
"previous": "12"
}
}
]
},
{
"metaData": {
"form": "2",
"userNm": "2"
},
"report": {
"date": "2"
},
"lossInfo": [
{
"policy": {
"effdate": "21",
"efftime": "21"
},
"year": {
"current": "22",
"previous": "22"
}
},
{
"policy": {
"effdate": "21",
"efftime": "21"
},
"year": {
"current": "22",
"previous": "22"
}
}
]
}
]
}
@Chetan
For your specific case, this should do the trick.
import pandas as pd
data = {'Documents':[{'metaData':{'form':'1','userNm':'1'},'report':{'date':'1'},'lossInfo':[{'policy':{'effdate':'11','efftime':'11'},'year':{'current':'11','previous':'11'}},{'policy':{'effdate':'12','efftime':'12'},'year':{'current':'12','previous':'12'}}]},{'metaData':{'form':'2','userNm':'2'},'report':{'date':'2'},'lossInfo':[{'policy':{'effdate':'21','efftime':'21'},'year':{'current':'22','previous':'22'}},{'policy':{'effdate':'21','efftime':'21'},'year':{'current':'22','previous':'22'}}]}]}
def flattened_dataframe(data: dict) -> pd.DataFrame:
metadata_cols = list(pd.json_normalize(data["Documents"]).columns)[1:]
df = pd.json_normalize(
data["Documents"], record_path="lossInfo", meta=["metaData", "report"]
)
data_pending_right_cols = pd.concat(
[df, df["metaData"].apply(pd.Series), df["report"].apply(pd.Series)], axis=1
).drop(columns=["metaData", "report"])
lossinfo_cols = list(data_pending_right_cols.columns)[:-3]
return data_pending_right_cols.set_axis(
lossinfo_cols + metadata_cols, axis="columns"
)
flattened_dataframe(data)