Search code examples
pythonjsonpandasdataframereadfile

How to normalize a json file to pandas dataframe?


I have a json file like as shown below. I already tried the solutions from this post,post

[{
    "answersData": {
        "employeeId": "0923a",
        "answers": {
            "Address_2": "Address_Line_2_1",
            "Address_2_CC": "Address_2_CC_1",
            "CellphoneNumberConsent": "YES",
            "Consent_Given": "Y",
            "DoB": "1971-07-10T16:00:00.000Z",
            "EmailaddressConsent": "NotApplicable",
            "First_Name": "First_Name_1",
            "Gender": "M",
            "Last_Name": "Last_Name_1",
            "Middle_Name": null,
            "PrimaryLanguage": [
                "English"
            ],
            "SecondaryLanguage": [
                "Hindi"
            ],
            "addionalIdentificationType": "NO",
            "cellphoneNumber": "1234567890",
            "countryName": "IND",
            "householdResponsibility": "husband",
            "poorCardHas": "N",
            "poorCardReason": "OTH",
            "postalCode": "123456",
            "profilePicture": null,
            "provinceCity": "Province_1"
        },
        "createdBy": "MAM_1@123.com",
        "dateCreated": "2021-02-23T17:20:33.134Z",
        "type": "profile"
    }
}, {
    "answersData": {
        "employeeId": "27l23t",
        "answers": {
            "Address_2": "Address_Line_2_2",
            "Address_2_CC": "Address_2_CC_2",
            "CellphoneNumberConsent": "YES",
            "Consent_Given": "Y",
            "DoB": "1980-07-10T16:00:00.000Z",
            "EmailaddressConsent": "NotApplicable",
            "First_Name": "First_Name_2",
            "Gender": "M",
            "Last_Name": "Last_Name_2",
            "Middle_Name": null,
            "PrimaryLanguage": [
                "English"
            ],
            "SecondaryLanguage": [
                "Kannada"
            ],
            "addionalIdentificationType": "NO",
            "cellphoneNumber": "1234567890",
            "countryName": "IND",
            "householdResponsibility": "wife",
            "poorCardHas": "N",
            "poorCardReason": "OTH",
            "postalCode": "621",
            "profilePicture": null,
            "provinceCity": "Province_2"
        },
        "createdBy": "MAM_2@123.com",
        "dateCreated": "2021-02-23T17:20:33.134Z",
        "type": "profile"
    }
}, {
    "answersData": {
        "employeeId": "290p",
        "answers": {
            "Address_2": "Address_Line_2_3",
            "Address_2_CC": "Address_2_CC_3",
            "CellphoneNumberConsent": "NO",
            "Consent_Given": "N",
            "DoB": "1991-10-10T16:00:00.000Z",
            "EmailaddressConsent": "NotApplicable",
            "First_Name": "First_Name_3",
            "Gender": "M",
            "Last_Name": "Last_Name_3",
            "Middle_Name": null,
            "PrimaryLanguage": [
                "German"
            ],
            "SecondaryLanguage": [
                "Telugu"
            ],
            "addionalIdentificationType": "NO",
            "cellphoneNumber": "1234567890",
            "countryName": "IND",
            "householdResponsibility": "Father",
            "poorCardHas": "N",
            "poorCardReason": "OTH",
            "postalCode": "123456",
            "profilePicture": null,
            "provinceCity": "Province_3"
        },
        "createdBy": "MAM_3@123.com",
        "dateCreated": "2021-01-11T19:11:20.134Z",
        "type": "profile"
    }
}, {
    "answersData": {
        "employeeId": "17mk9i",
        "answers": {
            "Address_2": "Address_Line_2_4",
            "Address_2_CC": "Address_2_CC_4",
            "CellphoneNumberConsent": "YES",
            "Consent_Given": "Y",
            "DoB": "1947-07-10T16:00:00.000Z",
            "EmailaddressConsent": "NotApplicable",
            "First_Name": "First_Name_4",
            "Gender": "M",
            "Last_Name": "Last_Name_4",
            "Middle_Name": null,
            "PrimaryLanguage": [
                "English"
            ],
            "SecondaryLanguage": [
                "Hindi"
            ],
            "addionalIdentificationType": "NO",
            "cellphoneNumber": "1234567890",
            "countryName": "IND",
            "householdResponsibility": "mother",
            "poorCardHas": "N",
            "poorCardReason": "OTH",
            "postalCode": "123456",
            "profilePicture": null,
            "provinceCity": "Province_4"
        },
        "createdBy": "MAM_4@123.com",
        "dateCreated": "2021-05-23T17:20:33.134Z",
        "type": "profile"
    }
}, {
    "answersData": {
        "employeeId": "17lo8i",
        "answers": {
            "Address_2": "Address_Line_2_5",
            "Address_2_CC": "Address_2_CC_5",
            "CellphoneNumberConsent": "YES",
            "Consent_Given": "Y",
            "DoB": "1993-07-10T16:00:00.000Z",
            "EmailaddressConsent": "NotApplicable",
            "First_Name": "First_Name_5",
            "Gender": "M",
            "Last_Name": "Last_Name_5",
            "Middle_Name": null,
            "PrimaryLanguage": [
                "English"
            ],
            "SecondaryLanguage": [
                "Hindi"
            ],
            "addionalIdentificationType": "NO",
            "cellphoneNumber": "1234567890",
            "countryName": "IND",
            "householdResponsibility": "child",
            "poorCardHas": "N",
            "poorCardReason": "OTH",
            "postalCode": "123456",
            "profilePicture": null,
            "provinceCity": "Province_5"
        },
        "createdBy": "MAM_5@123.com",
        "dateCreated": "2021-01-01T17:20:33.134Z",
        "type": "profile"
    }
}, {
    "answersData": {
        "employeeId": "17k9i",
        "answers": {
            "Address_2": "Address_Line_2_6",
            "Address_2_CC": "Address_2_CC_6",
            "CellphoneNumberConsent": "YES",
            "Consent_Given": "Y",
            "DoB": "1983-07-10T16:00:00.000Z",
            "EmailaddressConsent": "NotApplicable",
            "First_Name": "First_Name_6",
            "Gender": "M",
            "Last_Name": "Last_Name_6",
            "Middle_Name": null,
            "PrimaryLanguage": [
                "Spanish"
            ],
            "SecondaryLanguage": [
                "Tagalog"
            ],
            "addionalIdentificationType": "NO",
            "cellphoneNumber": "1234567890",
            "countryName": "IND",
            "householdResponsibility": "husband",
            "poorCardHas": "N",
            "poorCardReason": "OTH",
            "postalCode": "123456",
            "profilePicture": null,
            "provinceCity": "Province_6"
        },
        "createdBy": "MAM_6@123.com",
        "dateCreated": "2021-01-16T17:20:33.134Z",
        "type": "profile"
    }
}, {
    "answersData": {
        "employeeId": "87p",
        "answers": {
            "Address_2": "TEST123",
            "Address_2_CC": "Test",
            "CellphoneNumberConsent": "YES",
            "Consent_Given": "Y",
            "DoB": "1801-07-10T16:00:00.000Z",
            "EmailaddressConsent": "NotApplicable",
            "First_Name": "Test123",
            "Gender": "M",
            "Last_Name": "Test123",
            "Middle_Name": null,
            "PrimaryLanguage": [
                "English"
            ],
            "SecondaryLanguage": [
                "Kannada"
            ],
            "addionalIdentificationType": "NO",
            "cellphoneNumber": "1234567890",
            "countryName": "IND",
            "householdResponsibility": "wife",
            "poorCardHas": "N",
            "poorCardReason": "OTH",
            "postalCode": "654321",
            "profilePicture": null,
            "provinceCity": "Province_2"
        },
        "createdBy": "jo@test.com",
        "dateCreated": "2021-02-23T17:20:33.134Z",
        "type": "profile"
    }
}, {
    "answersData": {
        "employeeId": "09l07ytw",
        "answers": {
            "Address_2": "TEST123",
            "Address_2_CC": "Test",
            "CellphoneNumberConsent": "YES",
            "Consent_Given": "Y",
            "DoB": "1801-07-10T16:00:00.000Z",
            "EmailaddressConsent": "NotApplicable",
            "First_Name": "Test123",
            "Gender": "M",
            "Last_Name": "Test123",
            "Middle_Name": null,
            "PrimaryLanguage": [
                "English"
            ],
            "SecondaryLanguage": [
                "Kannada"
            ],
            "addionalIdentificationType": "NO",
            "cellphoneNumber": "1234567890",
            "countryName": "IND",
            "householdResponsibility": "wife",
            "poorCardHas": "N",
            "poorCardReason": "OTH",
            "postalCode": "654321",
            "profilePicture": null,
            "provinceCity": "Province_2"
        },
        "createdBy": "jo@test.com",
        "dateCreated": "2021-02-23T17:20:33.134Z",
        "type": "profile"
    }
}

]

When I try to read the json file using pandas, I get the below error

ValueError: Expected object or value

I already tried the below approaches

df = pd.read_json(open(r"test_data.json", "r",encoding="utf8"))
df = pd.read_json(r'test_data.json', encoding='utf-8-sig')
basepath = 'C:\\Users\\test\\Downloads' 
pd.read_json(basePath + '\\test_data.json') 

I expect my output to be like as shown below

enter image description here


Solution

  • Use json_normalize to flatten the nested data.

    Code

    # Load json data from file
    with open('test.json') as f:
        adict = json.load(f)
    
        # Use json normalize for nested dictionaries
        df = pd.json_normalize(adict)
    

    df

        answersData.employeeId  answersData.answers.Address_2   answersData.answers.Address_2_CC    answersData.answers.CellphoneNumberConsent  answersData.answers.Consent_Given   answersData.answers.DoB answersData.answers.EmailaddressConsent answersData.answers.First_Name  answersData.answers.Gender  answersData.answers.Last_Name   ... answersData.answers.countryName answersData.answers.householdResponsibility answersData.answers.poorCardHas answersData.answers.poorCardReason  answersData.answers.postalCode  answersData.answers.profilePicture  answersData.answers.provinceCity    answersData.createdBy   answersData.dateCreated answersData.type
    0   0923a   Address_Line_2_1    Address_2_CC_1  YES Y   1971-07-10T16:00:00.000Z    NotApplicable   First_Name_1    M   Last_Name_1 ... IND husband N   OTH 123456  None    Province_1  MAM_1@123.com   2021-02-23T17:20:33.134Z    profile
    1   27l23t  Address_Line_2_2    Address_2_CC_2  YES Y   1980-07-10T16:00:00.000Z    NotApplicable   First_Name_2    M   Last_Name_2 ... IND wife    N   OTH 621 None    Province_2  MAM_2@123.com   2021-02-23T17:20:33.134Z    profile
    2   290p    Address_Line_2_3    Address_2_CC_3  NO  N   1991-10-10T16:00:00.000Z    NotApplicable   First_Name_3    M   Last_Name_3 ... IND Father  N   OTH 123456  None    Province_3  MAM_3@123.com   2021-01-11T19:11:20.134Z    profile
    3   17mk9i  Address_Line_2_4    Address_2_CC_4  YES Y   1947-07-10T16:00:00.000Z    NotApplicable   First_Name_4    M   Last_Name_4 ... IND mother  N   OTH 123456  None    Province_4  MAM_4@123.com   2021-05-23T17:20:33.134Z    profile
    4   17lo8i  Address_Line_2_5    Address_2_CC_5  YES Y   1993-07-10T16:00:00.000Z    NotApplicable   First_Name_5    M   Last_Name_5 ... IND child   N   OTH 123456  None    Province_5  MAM_5@123.com   2021-01-01T17:20:33.134Z    profile
    5   17k9i   Address_Line_2_6    Address_2_CC_6  YES Y   1983-07-10T16:00:00.000Z    NotApplicable   First_Name_6    M   Last_Name_6 ... IND husband N   OTH 123456  None    Province_6  MAM_6@123.com   2021-01-16T17:20:33.134Z    profile
    6   87p TEST123 Test    YES Y   1801-07-10T16:00:00.000Z    NotApplicable   Test123 M   Test123 ... IND wife    N   OTH 654321  None    Province_2  jo@test.com 2021-02-23T17:20:33.134Z    profile
    7   09l07ytw    TEST123 Test    YES Y   1801-07-10T16:00:00.000Z    NotApplicable   Test123 M   Test123 ... IND wife    N   OTH 654321  None    Province_2  jo@test.com 2021-02-23T17:20:33.134Z    profile