Search code examples
pythonjsoncsvcsvtojsonnested-json

How to convert a CSV into a nested JSON using Python


So i have read alot of previous questions about this topic but non were really able to help me get to the result I want so here's the CSV file sample

tripId,scooterId,userId,totalDuration,pickUpTime,dropOffTime,userLocationAtBooking.0,userLocationAtBooking.1,userLocationAtDropOff.0,userLocationAtDropOff.1,totalFare
18721,927,38579,45,2021-08-22 03:00:49,2021-08-22 03:45:39,24.76412,46.6493,24.76409833,46.64934,9.58
18722,434,38563,45,2021-08-22 03:01:16,2021-08-22 03:45:39,24.76412,46.64933333,24.76407,46.64933333,13.53
18723,876,38554,33,2021-08-22 03:05:57,2021-08-22 03:38:55,24.71392833,46.660645,24.7097,46.66272,0.67
18724,476,32291,65,2021-08-22 03:14:37,2021-08-22 04:18:56,24.77137833,46.64568667,24.7722,46.64523167,32.35

and here's the desired output

{
    "38579": {
        "18721": {
        "scooterId": "927",
        "userId": "38579",
        "totalDuration": "45",
        "pickUpTime": "2021-08-22 03:00:49",
        "dropOffTime": "2021-08-22 03:45:39",
        "userLocationAtBooking.0": "24.76412",
        "userLocationAtBooking.1": "46.6493",
        "userLocationAtDropOff.0": "24.76409833",
        "userLocationAtDropOff.1": "46.64934",
        "totalFare": "9.58"
        }
    },
    "38563": {
        "18722" : {
        "scooterId": "434",
        "userId": "38563",
        "totalDuration": "45",
        "pickUpTime": "2021-08-22 03:01:16",
        "dropOffTime": "2021-08-22 03:45:39",
        "userLocationAtBooking.0": "24.76412",
        "userLocationAtBooking.1": "46.64933333",
        "userLocationAtDropOff.0": "24.76407",
        "userLocationAtDropOff.1": "46.64933333",
        }
    }
}

but here's what I'm getting

{
    "38563": {
        "tripId": "18722",
        "scooterId": "434",
        "userId": "38563",
        "totalDuration": "45",
        "pickUpTime": "2021-08-22 03:01:16",
        "dropOffTime": "2021-08-22 03:45:39",
        "userLocationAtBooking.0": "24.76412",
        "userLocationAtBooking.1": "46.64933333",
        "userLocationAtDropOff.0": "24.76407",
        "userLocationAtDropOff.1": "46.64933333",
        "totalFare": "13.53"
    }
}

and here's the code I'm currently using

import csv, json
csvFilePath = 'tripsData.csv'
jsonFilePath = 'tripsData.json'

data = {}
with open(csvFilePath) as csvFile:
    csvReader = csv.DictReader(csvFile)
    for rows in csvReader:
        id = rows['userId']
        data[id] = rows
    for rows in csvReader:
        tripId = rows[tripId]
        data[tripId] = rows

with open(jsonFilePath, 'w') as jsonFile:
    jsonFile.write(json.dumps(data, indent=4))

ANy help would be highly appreciated and please keep in mind I'm really new to this


Solution

  • You're reading from the file two times, but first for-loop already exhausted the csvReader. To get desired output, you can use next example:

    import csv, json
    
    csvFilePath = "tripsData.csv"
    jsonFilePath = "tripsData.json"
    
    data = {}
    with open(csvFilePath, "r") as csvFile:
        csvReader = csv.DictReader(csvFile)
        for rows in csvReader:
            id_ = rows["userId"]
            data[id_] = rows
    
        for user_id, row in data.items():
            data[user_id] = {row["tripId"]: row}
            del row["tripId"]
    
    
    with open(jsonFilePath, "w") as jsonFile:
        jsonFile.write(json.dumps(data, indent=4))
    

    Creates json:

    {
        "38579": {
            "18721": {
                "scooterId": "927",
                "userId": "38579",
                "totalDuration": "45",
                "pickUpTime": "2021-08-22 03:00:49",
                "dropOffTime": "2021-08-22 03:45:39",
                "userLocationAtBooking.0": "24.76412",
                "userLocationAtBooking.1": "46.6493",
                "userLocationAtDropOff.0": "24.76409833",
                "userLocationAtDropOff.1": "46.64934",
                "totalFare": "9.58"
            }
        },
        "38563": {
            "18722": {
                "scooterId": "434",
                "userId": "38563",
                "totalDuration": "45",
                "pickUpTime": "2021-08-22 03:01:16",
                "dropOffTime": "2021-08-22 03:45:39",
                "userLocationAtBooking.0": "24.76412",
                "userLocationAtBooking.1": "46.64933333",
                "userLocationAtDropOff.0": "24.76407",
                "userLocationAtDropOff.1": "46.64933333",
                "totalFare": "13.53"
            }
        },
        "38554": {
            "18723": {
                "scooterId": "876",
                "userId": "38554",
                "totalDuration": "33",
                "pickUpTime": "2021-08-22 03:05:57",
                "dropOffTime": "2021-08-22 03:38:55",
                "userLocationAtBooking.0": "24.71392833",
                "userLocationAtBooking.1": "46.660645",
                "userLocationAtDropOff.0": "24.7097",
                "userLocationAtDropOff.1": "46.66272",
                "totalFare": "0.67"
            }
        },
        "32291": {
            "18724": {
                "scooterId": "476",
                "userId": "32291",
                "totalDuration": "65",
                "pickUpTime": "2021-08-22 03:14:37",
                "dropOffTime": "2021-08-22 04:18:56",
                "userLocationAtBooking.0": "24.77137833",
                "userLocationAtBooking.1": "46.64568667",
                "userLocationAtDropOff.0": "24.7722",
                "userLocationAtDropOff.1": "46.64523167",
                "totalFare": "32.35"
            }
        }
    }