Search code examples
jsongoogle-bigqueryjsonpath

how to read multiple levels of JSON data in Big Query using JSON_EXTRACT or JSON_EXTRACT_SCALAR


I am trying to read the below JSON structure in Bigquery using JSON_EXTRACT in Bigquery ..it says unsupported operator Path "*"

Tried all the ways I can in BigQuery and Request your help

Error: Unsupported operator in JSONPath: *

****JSON data:** JUST THE PORTION that has multiple values and which has Issues while reading . Need to read all 4 "id" values below as an e.g. and need to read all other columns as well under Combo section which produces 4 rows with different ID,Type etc.**

"Combos": [
  {
    "Id": "1111",
    "Type": 0,
    "Description": "ABCD",
    "ComboDuration": {
      "StartDate": "2009-10-26T08:00:00",
      "EndDate": "2009-10-29T08:00:00"
    }
  },
  {
    "Id": "2222",
    "Type": 1,
    "Description": "XYZ",
    "ComboDuration": {
      "StartDate": "2019-10-26T08:00:00",
      "EndDate": "2019-10-29T08:00:00"
    }
  },
  {
    "Id": "39933",
    "Type": 3,
    "Description": "General",
    "ComboDuration": {
      "StartDate": "2019-10-26T08:00:00",
      "EndDate": "2019-10-29T08:00:00"
    }
  },
  {
    "Id": "39934",
    "Type": 2,
    "Description": "ABCDXYZ",
    "ComboDuration": {
      "StartDate": "2019-10-26T08:00:00",
      "EndDate": "2019-10-29T08:00:00"
    }
  },

]

****Code:** P.S - conv_column is a string column where my JSON structure stored**

SELECT 
JSON_EXTRACT(conv_column,"$.Combos.*.Id") as combo_id
from lz.json_file

SELECT JSON_EXTRACT(conv_column,"$.Combos[*].Id") as combo_id
from lz.json_file

SELECT JSON_EXTRACT(conv_column,"$.Combos[[email protected]]") as combo_id
from lz.json_file

Solution

  • Below example BigQuery for Standard SQL

    #standardSQL
    CREATE TEMP FUNCTION jsonparse(input STRING)
    RETURNS ARRAY<STRING>
    LANGUAGE js AS """
      return JSON.parse(input).map(x=>JSON.stringify(x));
    """; 
    WITH `project.lz.json_file` AS (
      SELECT '''{
      "Combos": [  {
        "Id": "1111",
        "Type": 0,
        "Description": "ABCD",
        "ComboDuration": {
          "StartDate": "2009-10-26T08:00:00",
          "EndDate": "2009-10-29T08:00:00"
        }  },  {
        "Id": "2222",
        "Type": 1,
        "Description": "XYZ",
        "ComboDuration": {
          "StartDate": "2019-10-26T08:00:00",
          "EndDate": "2019-10-29T08:00:00"
        }  },  {
        "Id": "39933",
        "Type": 3,
        "Description": "General",
        "ComboDuration": {
          "StartDate": "2019-10-26T08:00:00",
          "EndDate": "2019-10-29T08:00:00"
        }  },  {
        "Id": "39934",
        "Type": 2,
        "Description": "ABCDXYZ",
        "ComboDuration": {
          "StartDate": "2019-10-26T08:00:00",
          "EndDate": "2019-10-29T08:00:00"
        }  }]}  ''' AS conv_column
    )
    SELECT
      JSON_EXTRACT_SCALAR(combo, '$.Id') AS Id,
      JSON_EXTRACT_SCALAR(combo, '$.Type') AS Type,
      JSON_EXTRACT_SCALAR(combo, '$.Description') AS Description,
      JSON_EXTRACT_SCALAR(combo, '$.ComboDuration.StartDate') AS StartDate,
      JSON_EXTRACT_SCALAR(combo, '$.ComboDuration.EndDate') AS EndDate
    FROM `project.lz.json_file`,
    UNNEST(jsonparse(JSON_EXTRACT(conv_column, '$.Combos'))) combo
    

    with output

    Row Id      Type    Description StartDate           EndDate  
    1   1111    0       ABCD        2009-10-26T08:00:00 2009-10-29T08:00:00  
    2   2222    1       XYZ         2019-10-26T08:00:00 2019-10-29T08:00:00  
    3   39933   3       General     2019-10-26T08:00:00 2019-10-29T08:00:00  
    4   39934   2       ABCDXYZ     2019-10-26T08:00:00 2019-10-29T08:00:00