I am trying to load an external table via a JSON file having free schema arrays and structures.
create external table longtimedata_v4
(measurementPointName string,measurementPointId string,dataTypeId string,dataTypeName string,channels ARRAY< struct<name:string,unit:string,dvalues:ARRAY<struct<atimedt:string,dvalue:string>>>>)
ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
STORED AS TEXTFILE
LOCATION '/warehouse/tablespace/external/hive/longtimedata'
{
"measurementPointName": "Saab Scania, BBY2908, Sibbhult",
"measurementPointId": 35,
"dataTypeId": 1,
"dataTypeName": "Voltage",
"channels": [
{
"name": "U1 RMS",
"unit": "V",
"values": [
{
"time": "2022-10-24T00:00:00Z",
"value": 21348.7695
},
{
"time": "2022-10-24T00:10:00Z",
"value": 21342.7656
},
{
"time": "2022-10-24T00:20:00Z",
"value": 21401.0957
},
{
"time": "2022-10-24T00:30:00Z",
"value": 21415.2129
},
{
"time": "2022-10-24T00:40:00Z",
"value": 21456.9062
},
{
"time": "2022-10-24T00:50:00Z",
"value": 21501.541
},
{
"time": "2022-10-24T01:00:00Z",
"value": 21527.4355
},
{
"time": "2022-10-24T01:10:00Z",
"value": 21510.9512
},
{
"time": "2022-10-24T01:20:00Z",
"value": 21436.2266
},
{
"time": "2022-10-24T01:30:00Z",
"value": 21423.5176
},
{
"time": "2022-10-24T01:40:00Z",
"value": 21415.6211
},
{
"time": "2022-10-24T01:50:00Z",
"value": 21416.8008
},
{
"time": "2022-10-24T02:00:00Z",
"value": 21460.6016
},
{
"time": "2022-10-24T02:10:00Z",
"value": 21502.0195
},
{
"time": "2022-10-24T02:20:00Z",
"value": 21405.748
},
{
"time": "2022-10-24T02:30:00Z",
"value": 21321.9062
},
{
"time": "2022-10-24T02:40:00Z",
"value": 21394.6074
},
{
"time": "2022-10-24T02:50:00Z",
"value": 21392.3496
},
{
"time": "2022-10-24T03:00:00Z",
"value": 21412.6875
},
{
"time": "2022-10-24T03:10:00Z",
"value": 21320.5469
},
{
"time": "2022-10-24T03:20:00Z",
"value": 21309.9746
},
{
"time": "2022-10-24T03:30:00Z",
"value": 21229.5879
},
{
"time": "2022-10-24T03:40:00Z",
"value": 21174.2891
},
{
"time": "2022-10-24T03:50:00Z",
"value": 21100.6406
},
{
"time": "2022-10-24T04:00:00Z",
"value": 21188.2207
},
{
"time": "2022-10-24T04:10:00Z",
"value": 21146.1191
},
{
"time": "2022-10-24T04:20:00Z",
"value": 21328.709
},
{
"time": "2022-10-24T04:30:00Z",
"value": 21216.4844
},
{
"time": "2022-10-24T04:40:00Z",
"value": 21104.7871
},
{
"time": "2022-10-24T04:50:00Z",
"value": 21096.8398
},
{
"time": "2022-10-24T05:00:00Z",
"value": 21147.1895
},
{
"time": "2022-10-24T05:10:00Z",
"value": 21330.084
},
{
"time": "2022-10-24T05:20:00Z",
"value": 21205.3965
},
{
"time": "2022-10-24T05:30:00Z",
"value": 21116.1309
},
{
"time": "2022-10-24T05:40:00Z",
"value": 21189.4648
},
{
"time": "2022-10-24T05:50:00Z",
"value": 21113.0703
},
{
"time": "2022-10-24T06:00:00Z",
"value": 21074.2109
},
{
"time": "2022-10-24T06:10:00Z",
"value": 21093.0605
},
{
"time": "2022-10-24T06:20:00Z",
"value": 21117.6934
},
{
"time": "2022-10-24T06:30:00Z",
"value": 21087.8496
},
{
"time": "2022-10-24T06:40:00Z",
"value": 21077.8047
}
]
}
]
}
My data got loaded, but not fully. The values array channels.dvalues.atimedt
or dvalue
is coming with null
only.
I tried to load data via a load statement and other ways to populate the values array and underlying structure, but nothing worked out.
SELECT channels[5].name,channels.unit,channels[5].dvalues.atimedt
FROM observematare.longtimedata_v4;
U2 max ["V","V","V","V","V","V","V","V","V","V","V","V"] NULL
2 U2 max ["V","V","V","V","V","V","V","V","V","V","V","V"] NULL
There is no hive version mentioned in the question.
Also you have not mentioned the reason for using different column names in table for the field names in data file. I assume it because 'values' and 'time' are reserved keywords.
The failure (values coming as null) reason - column names are different from the field names in json data
There are two ways to fix this issue
CREATE EXTERNAL TABLE
statementquoted identifiers
About using Reserved keywords from Hive documentation
Reserved keywords are permitted as identifiers if you quote them as described in Supporting Quoted Identifiers in Column Names (version 0.13.0 and later, see HIVE-6013). Most of the keywords are reserved through HIVE-6617 in order to reduce the ambiguity in grammar (version 1.2.0 and later). There are two ways if the user still would like to use those reserved keywords as identifiers: (1) use quoted identifiers, (2) set hive.support.sql11.reserved.keywords=false. (version 2.1.0 and earlier)
{
"measurementPointName": "Saab Scania, BBY2908, Sibbhult",
"measurementPointId": 35,
"dataTypeId": 1,
"dataTypeName": "Voltage",
"channels": [
{
"name": "U1 RMS",
"unit": "V",
"dvalues": [
{
"atimedt": "2022-10-24T00:00:00Z",
"dvalue": 21348.7695
},
{
"atimedt": "2022-10-24T00:10:00Z",
"dvalue": 21342.7656
},
{
"atimedt": "2022-10-24T00:20:00Z",
"dvalue": 21401.0957
},
{
"atimedt": "2022-10-24T00:30:00Z",
"dvalue": 21415.2129
},
{
"atimedt": "2022-10-24T00:40:00Z",
"dvalue": 21456.9062
},
{
"atimedt": "2022-10-24T00:50:00Z",
"dvalue": 21501.541
},
{
"atimedt": "2022-10-24T01:00:00Z",
"dvalue": 21527.4355
},
{
"atimedt": "2022-10-24T01:10:00Z",
"dvalue": 21510.9512
},
{
"atimedt": "2022-10-24T01:20:00Z",
"dvalue": 21436.2266
},
{
"atimedt": "2022-10-24T01:30:00Z",
"dvalue": 21423.5176
},
{
"atimedt": "2022-10-24T01:40:00Z",
"dvalue": 21415.6211
},
{
"atimedt": "2022-10-24T01:50:00Z",
"dvalue": 21416.8008
},
{
"atimedt": "2022-10-24T02:00:00Z",
"dvalue": 21460.6016
},
{
"atimedt": "2022-10-24T02:10:00Z",
"dvalue": 21502.0195
},
{
"atimedt": "2022-10-24T02:20:00Z",
"dvalue": 21405.748
},
{
"atimedt": "2022-10-24T02:30:00Z",
"dvalue": 21321.9062
},
{
"atimedt": "2022-10-24T02:40:00Z",
"dvalue": 21394.6074
},
{
"atimedt": "2022-10-24T02:50:00Z",
"dvalue": 21392.3496
},
{
"atimedt": "2022-10-24T03:00:00Z",
"dvalue": 21412.6875
},
{
"atimedt": "2022-10-24T03:10:00Z",
"dvalue": 21320.5469
},
{
"atimedt": "2022-10-24T03:20:00Z",
"dvalue": 21309.9746
},
{
"atimedt": "2022-10-24T03:30:00Z",
"dvalue": 21229.5879
},
{
"atimedt": "2022-10-24T03:40:00Z",
"dvalue": 21174.2891
},
{
"atimedt": "2022-10-24T03:50:00Z",
"dvalue": 21100.6406
},
{
"atimedt": "2022-10-24T04:00:00Z",
"dvalue": 21188.2207
},
{
"atimedt": "2022-10-24T04:10:00Z",
"dvalue": 21146.1191
},
{
"atimedt": "2022-10-24T04:20:00Z",
"dvalue": 21328.709
},
{
"atimedt": "2022-10-24T04:30:00Z",
"dvalue": 21216.4844
},
{
"atimedt": "2022-10-24T04:40:00Z",
"dvalue": 21104.7871
},
{
"atimedt": "2022-10-24T04:50:00Z",
"dvalue": 21096.8398
},
{
"atimedt": "2022-10-24T05:00:00Z",
"dvalue": 21147.1895
},
{
"atimedt": "2022-10-24T05:10:00Z",
"dvalue": 21330.084
},
{
"atimedt": "2022-10-24T05:20:00Z",
"dvalue": 21205.3965
},
{
"atimedt": "2022-10-24T05:30:00Z",
"dvalue": 21116.1309
},
{
"atimedt": "2022-10-24T05:40:00Z",
"dvalue": 21189.4648
},
{
"atimedt": "2022-10-24T05:50:00Z",
"dvalue": 21113.0703
},
{
"atimedt": "2022-10-24T06:00:00Z",
"dvalue": 21074.2109
},
{
"atimedt": "2022-10-24T06:10:00Z",
"dvalue": 21093.0605
},
{
"atimedt": "2022-10-24T06:20:00Z",
"dvalue": 21117.6934
},
{
"atimedt": "2022-10-24T06:30:00Z",
"dvalue": 21087.8496
},
{
"atimedt": "2022-10-24T06:40:00Z",
"dvalue": 21077.8047
}
]
}
]
}
Select statement:
SELECT channels[0].name,channels.unit,channels[0].dvalues.atimedt FROM longtimedata_v4;
output: Please note the column name
INFO : OK
+---------+--------+----------------------------------------------------+
| name | unit | atimedt |
+---------+--------+----------------------------------------------------+
| U1 RMS | ["V"] | ["2022-10-24T00:00:00Z","2022-10-24T00:10:00Z","2022-10-24T00:20:00Z","2022-10-24T00:30:00Z","2022-10-24T00:40:00Z","2022-10-24T00:50:00Z","2022-10-24T01:00:00Z","2022-10-24T01:10:00Z","2022-10-24T01:20:00Z","2022-10-24T01:30:00Z","2022-10-24T01:40:00Z","2022-10-24T01:50:00Z","2022-10-24T02:00:00Z","2022-10-24T02:10:00Z","2022-10-24T02:20:00Z","2022-10-24T02:30:00Z","2022-10-24T02:40:00Z","2022-10-24T02:50:00Z","2022-10-24T03:00:00Z","2022-10-24T03:10:00Z","2022-10-24T03:20:00Z","2022-10-24T03:30:00Z","2022-10-24T03:40:00Z","2022-10-24T03:50:00Z","2022-10-24T04:00:00Z","2022-10-24T04:10:00Z","2022-10-24T04:20:00Z","2022-10-24T04:30:00Z","2022-10-24T04:40:00Z","2022-10-24T04:50:00Z","2022-10-24T05:00:00Z","2022-10-24T05:10:00Z","2022-10-24T05:20:00Z","2022-10-24T05:30:00Z","2022-10-24T05:40:00Z","2022-10-24T05:50:00Z","2022-10-24T06:00:00Z","2022-10-24T06:10:00Z","2022-10-24T06:20:00Z","2022-10-24T06:30:00Z","2022-10-24T06:40:00Z"] |
+---------+--------+----------------------------------------------------+
create external table longtimedata_v4 (
measurementPointName string,
measurementPointId string,
dataTypeId string,
dataTypeName string,
channels ARRAY < struct < name : string,
unit : string, `values` : ARRAY < struct < `time` : string,
`value` : string >>>>
)
ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'
STORED AS TEXTFILE
LOCATION '/warehouse/tablespace/external/hive/longtimedata'
and while running select statement
SELECT channels[0].name,channels.unit,channels[0].`values`.`time` FROM longtimedata_v4;
output: Please note the column name
INFO : OK
+---------+--------+----------------------------------------------------+
| name | unit | time |
+---------+--------+----------------------------------------------------+
| U1 RMS | ["V"] | ["2022-10-24T00:00:00Z","2022-10-24T00:10:00Z","2022-10-24T00:20:00Z","2022-10-24T00:30:00Z","2022-10-24T00:40:00Z","2022-10-24T00:50:00Z","2022-10-24T01:00:00Z","2022-10-24T01:10:00Z","2022-10-24T01:20:00Z","2022-10-24T01:30:00Z","2022-10-24T01:40:00Z","2022-10-24T01:50:00Z","2022-10-24T02:00:00Z","2022-10-24T02:10:00Z","2022-10-24T02:20:00Z","2022-10-24T02:30:00Z","2022-10-24T02:40:00Z","2022-10-24T02:50:00Z","2022-10-24T03:00:00Z","2022-10-24T03:10:00Z","2022-10-24T03:20:00Z","2022-10-24T03:30:00Z","2022-10-24T03:40:00Z","2022-10-24T03:50:00Z","2022-10-24T04:00:00Z","2022-10-24T04:10:00Z","2022-10-24T04:20:00Z","2022-10-24T04:30:00Z","2022-10-24T04:40:00Z","2022-10-24T04:50:00Z","2022-10-24T05:00:00Z","2022-10-24T05:10:00Z","2022-10-24T05:20:00Z","2022-10-24T05:30:00Z","2022-10-24T05:40:00Z","2022-10-24T05:50:00Z","2022-10-24T06:00:00Z","2022-10-24T06:10:00Z","2022-10-24T06:20:00Z","2022-10-24T06:30:00Z","2022-10-24T06:40:00Z"] |
+---------+--------+----------------------------------------------------+