Search code examples
jsonhadoophivenestedhive-serde

Error in Nested JSON in HIve


I was trying to load this json data in hive

{
    "id": "0001",
    "type": "donut",
     "name": "Cake",
     "ppu": 0.55,
     "batters":
         {
             "batter":
                 [
                     { "id": "1001", "type": "Regular" },
                     { "id": "1002", "type": "Chocolate" },
                    { "id": "1003", "type": "Blueberry" },
                    { "id": "1004", "type": "Devil's Food" }
                ]
        },
    "topping":
        [
            { "id": "5001", "type": "None" },
            { "id": "5002", "type": "Glazed" },
            { "id": "5005", "type": "Sugar" },
            { "id": "5007", "type": "Powdered Sugar" },
            { "id": "5006", "type": "Chocolate with Sprinkles" },
            { "id": "5003", "type": "Chocolate" },
            { "id": "5004", "type": "Maple" }
        ]
}

using DDL commands

ADD JAR /home/cloudera/Downloads/json-serde-1.3.6-SNAPSHOT-jar-with-dependencies.jar;

CREATE EXTERNAL TABLE format.json_serde (
  `id` string,
  `type` string,
  `name` string,
 `ppu` float,       
  batters` struct < `batter`:array < struct <`bid`:string, `btype`:string >>>,
  `topping`:array < struct<`tid`:int, `ttype`:string>>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe';

is throwing me error

FAILED: ParseException line 7:11 cannot recognize input near ':' 'array' '<' in column type </b>

Solution

    1. You got typos
      ttype`:string should be ttype:string
      battersstruct should be batters struct
      topping:array should be topping array

    2. JSON SerDe mapping is done by name.
      Your structs fields names should match the actual names, e.g. id and not bid or tid, otherwise you'll get NULL values for these fields.

    3. There is already a JSON SerDe whicg is part of the Hive installation. https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-RowFormats&SerDe


    create external table json_serde 
    ( 
        id      string
       ,type    string 
       ,name    string 
       ,ppu     float
       ,batters struct<batter:array<struct<id:string,type:string>>>
       ,topping array<struct<id:string,type:string>>
    ) 
    row format serde 
    'org.apache.hive.hcatalog.data.JsonSerDe' 
    stored as textfile
    ;
    

    select * from json_serde
    ;
    

    +------+-------+------+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
    |  id  | type  | name |        ppu        |                                                                     batters                                                                      |                                                                                                                  topping                                                                                                                  |
    +------+-------+------+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
    | 0001 | donut | Cake | 0.550000011920929 | {"batter":[{"id":"1001","type":"Regular"},{"id":"1002","type":"Chocolate"},{"id":"1003","type":"Blueberry"},{"id":"1004","type":"Devil'sFood"}]} | [{"id":"5001","type":"None"},{"id":"5002","type":"Glazed"},{"id":"5005","type":"Sugar"},{"id":"5007","type":"PowderedSugar"},{"id":"5006","type":"ChocolatewithSprinkles"},{"id":"5003","type":"Chocolate"},{"id":"5004","type":"Maple"}] |
    +------+-------+------+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+