Search code examples
arraysjsonstructhivehdfs

Hive data loading using complex json structure


I am trying to load an external table via a JSON file having free schema arrays and structures.

create external table longtimedata_v4
(measurementPointName string,measurementPointId string,dataTypeId string,dataTypeName string,channels ARRAY<            struct<name:string,unit:string,dvalues:ARRAY<struct<atimedt:string,dvalue:string>>>>)
ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
STORED AS TEXTFILE
LOCATION '/warehouse/tablespace/external/hive/longtimedata'
{
    "measurementPointName": "Saab Scania, BBY2908, Sibbhult",
    "measurementPointId": 35,
    "dataTypeId": 1,
    "dataTypeName": "Voltage",
    "channels": [
        {
            "name": "U1 RMS",
            "unit": "V",
            "values": [
                {
                    "time": "2022-10-24T00:00:00Z",
                    "value": 21348.7695
                },
                {
                    "time": "2022-10-24T00:10:00Z",
                    "value": 21342.7656
                },
                {
                    "time": "2022-10-24T00:20:00Z",
                    "value": 21401.0957
                },
                {
                    "time": "2022-10-24T00:30:00Z",
                    "value": 21415.2129
                },
                {
                    "time": "2022-10-24T00:40:00Z",
                    "value": 21456.9062
                },
                {
                    "time": "2022-10-24T00:50:00Z",
                    "value": 21501.541
                },
                {
                    "time": "2022-10-24T01:00:00Z",
                    "value": 21527.4355
                },
                {
                    "time": "2022-10-24T01:10:00Z",
                    "value": 21510.9512
                },
                {
                    "time": "2022-10-24T01:20:00Z",
                    "value": 21436.2266
                },
                {
                    "time": "2022-10-24T01:30:00Z",
                    "value": 21423.5176
                },
                {
                    "time": "2022-10-24T01:40:00Z",
                    "value": 21415.6211
                },
                {
                    "time": "2022-10-24T01:50:00Z",
                    "value": 21416.8008
                },
                {
                    "time": "2022-10-24T02:00:00Z",
                    "value": 21460.6016
                },
                {
                    "time": "2022-10-24T02:10:00Z",
                    "value": 21502.0195
                },
                {
                    "time": "2022-10-24T02:20:00Z",
                    "value": 21405.748
                },
                {
                    "time": "2022-10-24T02:30:00Z",
                    "value": 21321.9062
                },
                {
                    "time": "2022-10-24T02:40:00Z",
                    "value": 21394.6074
                },
                {
                    "time": "2022-10-24T02:50:00Z",
                    "value": 21392.3496
                },
                {
                    "time": "2022-10-24T03:00:00Z",
                    "value": 21412.6875
                },
                {
                    "time": "2022-10-24T03:10:00Z",
                    "value": 21320.5469
                },
                {
                    "time": "2022-10-24T03:20:00Z",
                    "value": 21309.9746
                },
                {
                    "time": "2022-10-24T03:30:00Z",
                    "value": 21229.5879
                },
                {
                    "time": "2022-10-24T03:40:00Z",
                    "value": 21174.2891
                },
                {
                    "time": "2022-10-24T03:50:00Z",
                    "value": 21100.6406
                },
                {
                    "time": "2022-10-24T04:00:00Z",
                    "value": 21188.2207
                },
                {
                    "time": "2022-10-24T04:10:00Z",
                    "value": 21146.1191
                },
                {
                    "time": "2022-10-24T04:20:00Z",
                    "value": 21328.709
                },
                {
                    "time": "2022-10-24T04:30:00Z",
                    "value": 21216.4844
                },
                {
                    "time": "2022-10-24T04:40:00Z",
                    "value": 21104.7871
                },
                {
                    "time": "2022-10-24T04:50:00Z",
                    "value": 21096.8398
                },
                {
                    "time": "2022-10-24T05:00:00Z",
                    "value": 21147.1895
                },
                {
                    "time": "2022-10-24T05:10:00Z",
                    "value": 21330.084
                },
                {
                    "time": "2022-10-24T05:20:00Z",
                    "value": 21205.3965
                },
                {
                    "time": "2022-10-24T05:30:00Z",
                    "value": 21116.1309
                },
                {
                    "time": "2022-10-24T05:40:00Z",
                    "value": 21189.4648
                },
                {
                    "time": "2022-10-24T05:50:00Z",
                    "value": 21113.0703
                },
                {
                    "time": "2022-10-24T06:00:00Z",
                    "value": 21074.2109
                },
                {
                    "time": "2022-10-24T06:10:00Z",
                    "value": 21093.0605
                },
                {
                    "time": "2022-10-24T06:20:00Z",
                    "value": 21117.6934
                },
                {
                    "time": "2022-10-24T06:30:00Z",
                    "value": 21087.8496
                },
                {
                    "time": "2022-10-24T06:40:00Z",
                    "value": 21077.8047
                }
            ]
        }
    ]
}

My data got loaded, but not fully. The values array channels.dvalues.atimedt or dvalue is coming with null only. I tried to load data via a load statement and other ways to populate the values array and underlying structure, but nothing worked out.

SELECT channels[5].name,channels.unit,channels[5].dvalues.atimedt
FROM observematare.longtimedata_v4; 
U2 max  ["V","V","V","V","V","V","V","V","V","V","V","V"]   NULL
2   U2 max  ["V","V","V","V","V","V","V","V","V","V","V","V"]   NULL

Solution

  • There is no hive version mentioned in the question.

    Also you have not mentioned the reason for using different column names in table for the field names in data file. I assume it because 'values' and 'time' are reserved keywords.

    The failure (values coming as null) reason - column names are different from the field names in json data

    There are two ways to fix this issue

    1. Change the field names in data file(s) to match the column names in CREATE EXTERNAL TABLE statement
    2. Override the reserved keywords using back tick (`) - also known as quoted identifiers

    About using Reserved keywords from Hive documentation

    Reserved keywords are permitted as identifiers if you quote them as described in Supporting Quoted Identifiers in Column Names (version 0.13.0 and later, see HIVE-6013). Most of the keywords are reserved through HIVE-6617 in order to reduce the ambiguity in grammar (version 1.2.0 and later). There are two ways if the user still would like to use those reserved keywords as identifiers: (1) use quoted identifiers, (2) set hive.support.sql11.reserved.keywords=false. (version 2.1.0 and earlier)

    1. Changing the field names in data file
        {
            "measurementPointName": "Saab Scania, BBY2908, Sibbhult",
            "measurementPointId": 35,
            "dataTypeId": 1,
            "dataTypeName": "Voltage",
            "channels": [
                {
                    "name": "U1 RMS",
                    "unit": "V",
                    "dvalues": [
                        {
                            "atimedt": "2022-10-24T00:00:00Z",
                            "dvalue": 21348.7695
                        },
                        {
                            "atimedt": "2022-10-24T00:10:00Z",
                            "dvalue": 21342.7656
                        },
                        {
                            "atimedt": "2022-10-24T00:20:00Z",
                            "dvalue": 21401.0957
                        },
                        {
                            "atimedt": "2022-10-24T00:30:00Z",
                            "dvalue": 21415.2129
                        },
                        {
                            "atimedt": "2022-10-24T00:40:00Z",
                            "dvalue": 21456.9062
                        },
                        {
                            "atimedt": "2022-10-24T00:50:00Z",
                            "dvalue": 21501.541
                        },
                        {
                            "atimedt": "2022-10-24T01:00:00Z",
                            "dvalue": 21527.4355
                        },
                        {
                            "atimedt": "2022-10-24T01:10:00Z",
                            "dvalue": 21510.9512
                        },
                        {
                            "atimedt": "2022-10-24T01:20:00Z",
                            "dvalue": 21436.2266
                        },
                        {
                            "atimedt": "2022-10-24T01:30:00Z",
                            "dvalue": 21423.5176
                        },
                        {
                            "atimedt": "2022-10-24T01:40:00Z",
                            "dvalue": 21415.6211
                        },
                        {
                            "atimedt": "2022-10-24T01:50:00Z",
                            "dvalue": 21416.8008
                        },
                        {
                            "atimedt": "2022-10-24T02:00:00Z",
                            "dvalue": 21460.6016
                        },
                        {
                            "atimedt": "2022-10-24T02:10:00Z",
                            "dvalue": 21502.0195
                        },
                        {
                            "atimedt": "2022-10-24T02:20:00Z",
                            "dvalue": 21405.748
                        },
                        {
                            "atimedt": "2022-10-24T02:30:00Z",
                            "dvalue": 21321.9062
                        },
                        {
                            "atimedt": "2022-10-24T02:40:00Z",
                            "dvalue": 21394.6074
                        },
                        {
                            "atimedt": "2022-10-24T02:50:00Z",
                            "dvalue": 21392.3496
                        },
                        {
                            "atimedt": "2022-10-24T03:00:00Z",
                            "dvalue": 21412.6875
                        },
                        {
                            "atimedt": "2022-10-24T03:10:00Z",
                            "dvalue": 21320.5469
                        },
                        {
                            "atimedt": "2022-10-24T03:20:00Z",
                            "dvalue": 21309.9746
                        },
                        {
                            "atimedt": "2022-10-24T03:30:00Z",
                            "dvalue": 21229.5879
                        },
                        {
                            "atimedt": "2022-10-24T03:40:00Z",
                            "dvalue": 21174.2891
                        },
                        {
                            "atimedt": "2022-10-24T03:50:00Z",
                            "dvalue": 21100.6406
                        },
                        {
                            "atimedt": "2022-10-24T04:00:00Z",
                            "dvalue": 21188.2207
                        },
                        {
                            "atimedt": "2022-10-24T04:10:00Z",
                            "dvalue": 21146.1191
                        },
                        {
                            "atimedt": "2022-10-24T04:20:00Z",
                            "dvalue": 21328.709
                        },
                        {
                            "atimedt": "2022-10-24T04:30:00Z",
                            "dvalue": 21216.4844
                        },
                        {
                            "atimedt": "2022-10-24T04:40:00Z",
                            "dvalue": 21104.7871
                        },
                        {
                            "atimedt": "2022-10-24T04:50:00Z",
                            "dvalue": 21096.8398
                        },
                        {
                            "atimedt": "2022-10-24T05:00:00Z",
                            "dvalue": 21147.1895
                        },
                        {
                            "atimedt": "2022-10-24T05:10:00Z",
                            "dvalue": 21330.084
                        },
                        {
                            "atimedt": "2022-10-24T05:20:00Z",
                            "dvalue": 21205.3965
                        },
                        {
                            "atimedt": "2022-10-24T05:30:00Z",
                            "dvalue": 21116.1309
                        },
                        {
                            "atimedt": "2022-10-24T05:40:00Z",
                            "dvalue": 21189.4648
                        },
                        {
                            "atimedt": "2022-10-24T05:50:00Z",
                            "dvalue": 21113.0703
                        },
                        {
                            "atimedt": "2022-10-24T06:00:00Z",
                            "dvalue": 21074.2109
                        },
                        {
                            "atimedt": "2022-10-24T06:10:00Z",
                            "dvalue": 21093.0605
                        },
                        {
                            "atimedt": "2022-10-24T06:20:00Z",
                            "dvalue": 21117.6934
                        },
                        {
                            "atimedt": "2022-10-24T06:30:00Z",
                            "dvalue": 21087.8496
                        },
                        {
                            "atimedt": "2022-10-24T06:40:00Z",
                            "dvalue": 21077.8047
                        }
                    ]
                }
            ]
        }
    

    Select statement:

    SELECT channels[0].name,channels.unit,channels[0].dvalues.atimedt FROM longtimedata_v4;
    

    output: Please note the column name

    INFO  : OK
    +---------+--------+----------------------------------------------------+
    |  name   |  unit  |                      atimedt                       |
    +---------+--------+----------------------------------------------------+
    | U1 RMS  | ["V"]  | ["2022-10-24T00:00:00Z","2022-10-24T00:10:00Z","2022-10-24T00:20:00Z","2022-10-24T00:30:00Z","2022-10-24T00:40:00Z","2022-10-24T00:50:00Z","2022-10-24T01:00:00Z","2022-10-24T01:10:00Z","2022-10-24T01:20:00Z","2022-10-24T01:30:00Z","2022-10-24T01:40:00Z","2022-10-24T01:50:00Z","2022-10-24T02:00:00Z","2022-10-24T02:10:00Z","2022-10-24T02:20:00Z","2022-10-24T02:30:00Z","2022-10-24T02:40:00Z","2022-10-24T02:50:00Z","2022-10-24T03:00:00Z","2022-10-24T03:10:00Z","2022-10-24T03:20:00Z","2022-10-24T03:30:00Z","2022-10-24T03:40:00Z","2022-10-24T03:50:00Z","2022-10-24T04:00:00Z","2022-10-24T04:10:00Z","2022-10-24T04:20:00Z","2022-10-24T04:30:00Z","2022-10-24T04:40:00Z","2022-10-24T04:50:00Z","2022-10-24T05:00:00Z","2022-10-24T05:10:00Z","2022-10-24T05:20:00Z","2022-10-24T05:30:00Z","2022-10-24T05:40:00Z","2022-10-24T05:50:00Z","2022-10-24T06:00:00Z","2022-10-24T06:10:00Z","2022-10-24T06:20:00Z","2022-10-24T06:30:00Z","2022-10-24T06:40:00Z"] |
    +---------+--------+----------------------------------------------------+
    
    1. Using quoted identifiers without changing column names while creating table
    create external table longtimedata_v4 (
      measurementPointName string,
      measurementPointId string, 
      dataTypeId string,
      dataTypeName string, 
      channels ARRAY < struct < name : string, 
      unit : string, `values` : ARRAY < struct < `time` : string, 
      `value` : string >>>>
    )
    ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
    STORED AS TEXTFILE
    LOCATION '/warehouse/tablespace/external/hive/longtimedata'
    

    and while running select statement

    SELECT channels[0].name,channels.unit,channels[0].`values`.`time` FROM longtimedata_v4;
    

    output: Please note the column name

    INFO  : OK
    +---------+--------+----------------------------------------------------+
    |  name   |  unit  |                        time                        |
    +---------+--------+----------------------------------------------------+
    | U1 RMS  | ["V"]  | ["2022-10-24T00:00:00Z","2022-10-24T00:10:00Z","2022-10-24T00:20:00Z","2022-10-24T00:30:00Z","2022-10-24T00:40:00Z","2022-10-24T00:50:00Z","2022-10-24T01:00:00Z","2022-10-24T01:10:00Z","2022-10-24T01:20:00Z","2022-10-24T01:30:00Z","2022-10-24T01:40:00Z","2022-10-24T01:50:00Z","2022-10-24T02:00:00Z","2022-10-24T02:10:00Z","2022-10-24T02:20:00Z","2022-10-24T02:30:00Z","2022-10-24T02:40:00Z","2022-10-24T02:50:00Z","2022-10-24T03:00:00Z","2022-10-24T03:10:00Z","2022-10-24T03:20:00Z","2022-10-24T03:30:00Z","2022-10-24T03:40:00Z","2022-10-24T03:50:00Z","2022-10-24T04:00:00Z","2022-10-24T04:10:00Z","2022-10-24T04:20:00Z","2022-10-24T04:30:00Z","2022-10-24T04:40:00Z","2022-10-24T04:50:00Z","2022-10-24T05:00:00Z","2022-10-24T05:10:00Z","2022-10-24T05:20:00Z","2022-10-24T05:30:00Z","2022-10-24T05:40:00Z","2022-10-24T05:50:00Z","2022-10-24T06:00:00Z","2022-10-24T06:10:00Z","2022-10-24T06:20:00Z","2022-10-24T06:30:00Z","2022-10-24T06:40:00Z"] |
    +---------+--------+----------------------------------------------------+