Search code examples
pythonjsonpandasdictionarynested-lists

Convert dictionary containing multiple nested lists to a pandas dataframe


I have a dictionary that looks like this:

my_dict = {
    "end_time": "2022-10-21T20:00:00",
    "time_samples": [
        "2022-10-21T15:00:00+00:00",
        "2022-10-21T16:00:00+00:00",
        "2022-10-21T17:00:00+00:00",
        "2022-10-21T18:00:00+00:00",
        "2022-10-21T19:00:00+00:00",
    ],
    "groupings": [
        {
            "grouping": "day_of_week",
            "groupings": ["day_of_week"],
            "grouping_value": "5",
            "grouping_values": [5],
            "metrics": [
                {
                    "metric_type": "vehicle_rotation",
                    "units": {
                        "numerators": [{"unit": "metric_count"}],
                        "denominators": [
                            {"unit": "vehicles"},
                            {"unit": "seconds", "value": 86400.0},
                        ],
                    },
                    "values": [
                        17.533546325878596,
                        17.19327731092437,
                        13.44502617801047,
                        11.5010395010395,
                        9.649484536082475,
                    ],
                }
            ],
        },
        {
            "grouping": "device_type",
            "groupings": ["device_type"],
            "grouping_value": "bicycle",
            "grouping_values": ["bicycle"],
            "metrics": [
                {
                    "metric_type": "vehicle_rotation",
                    "units": {
                        "numerators": [{"unit": "metric_count"}],
                        "denominators": [
                            {"unit": "vehicles"},
                            {"unit": "seconds", "value": 86400.0},
                        ],
                    },
                    "values": [
                        2.3736263736263736,
                        0.5454545454545454,
                        None,
                        None,
                        None,
                    ],
                }
            ],
        },
        {
            "grouping": "device_type",
            "groupings": ["device_type"],
            "grouping_value": "moped",
            "grouping_values": ["moped"],
            "metrics": [
                {
                    "metric_type": "vehicle_rotation",
                    "units": {
                        "numerators": [{"unit": "metric_count"}],
                        "denominators": [
                            {"unit": "vehicles"},
                            {"unit": "seconds", "value": 86400.0},
                        ],
                    },
                    "values": [
                        19.160377358490564,
                        18.88888888888889,
                        14.799076212471132,
                        12.702640642939151,
                        10.669714285714285,
                    ],
                }
            ],
        },
        {
            "grouping": "geo_fence",
            "groupings": ["geo_fence"],
            "grouping_value": "/geo_features/1448150377",
            "grouping_values": ["/geo_features/1448150377"],
            "metrics": [
                {
                    "metric_type": "vehicle_rotation",
                    "units": {
                        "numerators": [{"unit": "metric_count"}],
                        "denominators": [
                            {"unit": "vehicles"},
                            {"unit": "seconds", "value": 86400.0},
                        ],
                    },
                    "values": [
                        17.533546325878596,
                        17.19327731092437,
                        13.44502617801047,
                        11.5010395010395,
                        9.649484536082475,
                    ],
                }
            ],
        },
        {
            "grouping": "hour_of_day",
            "groupings": ["hour_of_day"],
            "grouping_value": "17",
            "grouping_values": [17],
            "metrics": [
                {
                    "metric_type": "vehicle_rotation",
                    "units": {
                        "numerators": [{"unit": "metric_count"}],
                        "denominators": [
                            {"unit": "vehicles"},
                            {"unit": "seconds", "value": 86400.0},
                        ],
                    },
                    "values": [17.533546325878596, None, None, None, None],
                }
            ],
        },
        {
            "grouping": "hour_of_day",
            "groupings": ["hour_of_day"],
            "grouping_value": "18",
            "grouping_values": [18],
            "metrics": [
                {
                    "metric_type": "vehicle_rotation",
                    "units": {
                        "numerators": [{"unit": "metric_count"}],
                        "denominators": [
                            {"unit": "vehicles"},
                            {"unit": "seconds", "value": 86400.0},
                        ],
                    },
                    "values": [None, 17.19327731092437, None, None, None],
                }
            ],
        },
        {
            "grouping": "hour_of_day",
            "groupings": ["hour_of_day"],
            "grouping_value": "19",
            "grouping_values": [19],
            "metrics": [
                {
                    "metric_type": "vehicle_rotation",
                    "units": {
                        "numerators": [{"unit": "metric_count"}],
                        "denominators": [
                            {"unit": "vehicles"},
                            {"unit": "seconds", "value": 86400.0},
                        ],
                    },
                    "values": [None, None, 13.44502617801047, None, None],
                }
            ],
        },
        {
            "grouping": "hour_of_day",
            "groupings": ["hour_of_day"],
            "grouping_value": "20",
            "grouping_values": [20],
            "metrics": [
                {
                    "metric_type": "vehicle_rotation",
                    "units": {
                        "numerators": [{"unit": "metric_count"}],
                        "denominators": [
                            {"unit": "vehicles"},
                            {"unit": "seconds", "value": 86400.0},
                        ],
                    },
                    "values": [None, None, None, 11.5010395010395, None],
                }
            ],
        },
        {
            "grouping": "hour_of_day",
            "groupings": ["hour_of_day"],
            "grouping_value": "21",
            "grouping_values": [21],
            "metrics": [
                {
                    "metric_type": "vehicle_rotation",
                    "units": {
                        "numerators": [{"unit": "metric_count"}],
                        "denominators": [
                            {"unit": "vehicles"},
                            {"unit": "seconds", "value": 86400.0},
                        ],
                    },
                    "values": [None, None, None, None, 9.649484536082475],
                }
            ],
        },
        {
            "grouping": "no_grouping",
            "groupings": ["no_grouping"],
            "grouping_value": "all",
            "grouping_values": ["all"],
            "metrics": [
                {
                    "metric_type": "vehicle_rotation",
                    "units": {
                        "numerators": [{"unit": "metric_count"}],
                        "denominators": [
                            {"unit": "vehicles"},
                            {"unit": "seconds", "value": 86400.0},
                        ],
                    },
                    "values": [
                        17.533546325878596,
                        17.19327731092437,
                        13.44502617801047,
                        11.5010395010395,
                        9.649484536082475,
                    ],
                }
            ],
        },
        {
            "grouping": "provider",
            "groupings": ["provider"],
            "grouping_value": "/providers/12",
            "grouping_values": ["/providers/12"],
            "metrics": [
                {
                    "metric_type": "vehicle_rotation",
                    "units": {
                        "numerators": [{"unit": "metric_count"}],
                        "denominators": [
                            {"unit": "vehicles"},
                            {"unit": "seconds", "value": 86400.0},
                        ],
                    },
                    "values": [
                        15.849056603773583,
                        16.383561643835616,
                        13.254545454545454,
                        10.914027149321267,
                        10.232142857142858,
                    ],
                }
            ],
        },
        {
            "grouping": "provider",
            "groupings": ["provider"],
            "grouping_value": "/providers/180",
            "grouping_values": ["/providers/180"],
            "metrics": [
                {
                    "metric_type": "vehicle_rotation",
                    "units": {
                        "numerators": [{"unit": "metric_count"}],
                        "denominators": [
                            {"unit": "vehicles"},
                            {"unit": "seconds", "value": 86400.0},
                        ],
                    },
                    "values": [
                        2.3736263736263736,
                        0.5454545454545454,
                        None,
                        None,
                        None,
                    ],
                }
            ],
        },
        {
            "grouping": "provider",
            "groupings": ["provider"],
            "grouping_value": "/providers/19",
            "grouping_values": ["/providers/19"],
            "metrics": [
                {
                    "metric_type": "vehicle_rotation",
                    "units": {
                        "numerators": [{"unit": "metric_count"}],
                        "denominators": [
                            {"unit": "vehicles"},
                            {"unit": "seconds", "value": 86400.0},
                        ],
                    },
                    "values": [
                        22.471698113207548,
                        21.464788732394368,
                        16.394366197183096,
                        14.545454545454547,
                        11.1288056206089,
                    ],
                }
            ],
        },
    ],
}

I would like to convert this dictionary to a pandas dataframe with the timestamps in the time_samples key as a column and the values in the values key (which are grouped by the grouping_value key) as other columns (and with the grouping_value as column names), like so:

time_samples               5                   bicycle              ...
2022-10-21T15:00:00+00:00  17.533546325878596  2.3736263736263736   ...
2022-10-21T16:00:00+00:00  17.19327731092437   0.5454545454545454   ...
2022-10-21T17:00:00+00:00  13.44502617801047   None            ...
2022-10-21T18:00:00+00:00  11.5010395010395    None            ...
2022-10-21T19:00:00+00:00  9.649484536082475   None            ...

I started as follows, but struggle how to proceed. Any ideas?

df = pd.json_normalize(response_dict)
df['groupings'].explode().apply(pd.Series)

Solution

  • Here is one way to do it using Pandas concat:

    import pandas as pd
    
    
    df = pd.concat(
        [
            pd.DataFrame({"time_samples": my_dict["time_samples"]}),
            pd.DataFrame(
                {
                    grouping["grouping_value"]: grouping["metrics"][0]["values"]
                    for grouping in my_dict["groupings"]
                }
            ),
        ],
        axis=1,
    )
    

    Then:

    print(df)
    # Output
                    time_samples          5   bicycle      moped  \
    0  2022-10-21T15:00:00+00:00  17.533546  2.373626  19.160377   
    1  2022-10-21T16:00:00+00:00  17.193277  0.545455  18.888889   
    2  2022-10-21T17:00:00+00:00  13.445026       NaN  14.799076   
    3  2022-10-21T18:00:00+00:00  11.501040       NaN  12.702641   
    4  2022-10-21T19:00:00+00:00   9.649485       NaN  10.669714   
    
       /geo_features/1448150377         17         18         19        20  \
    0                 17.533546  17.533546        NaN        NaN       NaN   
    1                 17.193277        NaN  17.193277        NaN       NaN   
    2                 13.445026        NaN        NaN  13.445026       NaN   
    3                 11.501040        NaN        NaN        NaN  11.50104   
    4                  9.649485        NaN        NaN        NaN       NaN   
    
             21        all  /providers/12  /providers/180  /providers/19  
    0       NaN  17.533546      15.849057        2.373626      22.471698  
    1       NaN  17.193277      16.383562        0.545455      21.464789  
    2       NaN  13.445026      13.254545             NaN      16.394366  
    3       NaN  11.501040      10.914027             NaN      14.545455  
    4  9.649485   9.649485      10.232143             NaN      11.128806