I was trying to load this json data in hive
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
using DDL commands
ADD JAR /home/cloudera/Downloads/json-serde-1.3.6-SNAPSHOT-jar-with-dependencies.jar;
CREATE EXTERNAL TABLE format.json_serde (
`id` string,
`type` string,
`name` string,
`ppu` float,
batters` struct < `batter`:array < struct <`bid`:string, `btype`:string >>>,
`topping`:array < struct<`tid`:int, `ttype`:string>>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';
is throwing me error
FAILED: ParseException line 7:11 cannot recognize input near ':' 'array' '<' in column type </b>
You got typos
ttype`:string should be ttype:string
battersstruct should be batters struct
topping:array should be topping array
JSON SerDe mapping is done by name.
Your structs fields names should match the actual names, e.g. id
and not bid
or tid
, otherwise you'll get NULL values for these fields.
There is already a JSON SerDe whicg is part of the Hive installation. https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-RowFormats&SerDe
create external table json_serde
(
id string
,type string
,name string
,ppu float
,batters struct<batter:array<struct<id:string,type:string>>>
,topping array<struct<id:string,type:string>>
)
row format serde
'org.apache.hive.hcatalog.data.JsonSerDe'
stored as textfile
;
select * from json_serde
;
+------+-------+------+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| id | type | name | ppu | batters | topping |
+------+-------+------+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| 0001 | donut | Cake | 0.550000011920929 | {"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"},{"id":"1003","type":"Blueberry"},{"id":"1004","type":"Devil'sFood"}]} | [{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5007","type":"PowderedSugar"},{"id":"5006","type":"ChocolatewithSprinkles"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}] |
+------+-------+------+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+