I have a json col in a table along with an id:
id | json_col |
---|---|
abc123 | json_text |
json value is as below:
[
{
"type": 0,
"isPro": true,
"addOns": [
{
"cod": "DAIRY_PLAN",
"base": 0,
"type": 0,
"count": 1,
"price": 140,
"goodsType": null,
"typeAddOn": 2,
"goodRelatedId": null
},
{
"cod": "HW12-201809",
"base": 0,
"type": 0,
"count": 1,
"price": 99,
"goodsType": null,
"typeAddOn": 1,
"goodRelatedId": null
},
{
"cod": "HW_BAND_E",
"base": 0,
"type": 0,
"count": 1,
"price": 40,
"goodsType": null,
"typeAddOn": 3,
"goodRelatedId": null
},
{
"cod": "HW_LEVEL_3",
"base": 279,
"type": 1,
"count": 1,
"price": 100,
"goodsType": null,
"typeAddOn": 4,
"goodRelatedId": null
}
]
},
{
"type": 1,
"isPro": true,
"addOns": [
{
"cod": "HW12-FW",
"base": 0,
"type": 0,
"count": 1,
"price": 99.3333,
"goodsType": null,
"typeAddOn": 1,
"goodRelatedId": null
},
{
"cod": "HW_BAND_SHEEP_A",
"base": 0,
"type": 0,
"count": 1,
"price": 0,
"goodsType": null,
"typeAddOn": 3,
"goodRelatedId": null
},
{
"cod": "HW_LEVEL_2",
"base": 99.33,
"type": 1,
"count": 1,
"price": 50,
"goodsType": null,
"typeAddOn": 4,
"goodRelatedId": null
},
{
"cod": "SHEEP_PLAN",
"base": 0,
"type": 0,
"count": 1,
"price": 0,
"goodsType": null,
"typeAddOn": 2,
"goodRelatedId": null
}
]
},
{
"type": 2,
"isPro": true,
"addOns": [
{
"cod": "CROP_PLAN",
"base": 0,
"type": 0,
"count": 1,
"price": 0,
"goodsType": null,
"typeAddOn": 2,
"goodRelatedId": null
},
{
"cod": "CW_BAND_G",
"base": 0,
"type": 0,
"count": 1,
"price": 120,
"goodsType": null,
"typeAddOn": 3,
"goodRelatedId": null
},
{
"cod": "HW12-CROP",
"base": 0,
"type": 0,
"count": 1,
"price": 99,
"goodsType": null,
"typeAddOn": 1,
"goodRelatedId": null
},
{
"cod": "HW_LEVEL_1",
"base": 219,
"type": 1,
"count": 1,
"price": 0,
"goodsType": null,
"typeAddOn": 4,
"goodRelatedId": null
}
]
}
]
I want to extract each nested object and arrive at:
id | item1_name | item2_name | item3_name | item4_name | item1_price | item2_price | item3_price | item4_price |
---|---|---|---|---|---|---|---|---|
abc123 | json_text[0].addons[0].cod | json_text[0].addons[1].cod | json_text[0].addons[2].cod | json_text[0].addons[3].cod | json_text[0].addons[0].price | json_text[0].addons[1].price | json_text[0].addons[2].price | json_text[0].addons[3].price |
Tried a few ways to access it but without explicitly entering a subscript number im unable to get all values using *
e.g.
with
explode_array as (
select json_query(json_col,'lax $[*].addOns' ) as array_items from get_inner_json
)
select * from explode_array
but it just throws null? but when i check the docs it seems like it should work? https://trino.io/docs/current/functions/json.html#array-accessor
Any ideas on how to tackle this? Im using aws athena (trino) so ANSI SQL
Trino does not support dynamic flattening/pivoting, so if you want separate columns then you will need to create them manually. json_extract
/json_extract_scalar
will work:
select json_extract(json_col, '$[0].addOns[0].cod'),
json_extract(json_col, '$[0].addOns[1].cod'),
json_extract(json_col, '$[0].addOns[2].cod'),
json_extract(json_col, '$[0].addOns[3].cod'),
json_extract(json_col, '$[0].addOns[0].price'),
json_extract(json_col, '$[0].addOns[1].price'),
json_extract(json_col, '$[0].addOns[2].price'),
json_extract(json_col, '$[0].addOns[3].price')
from dataset;
Output:
_col0 | _col1 | _col2 | _col3 | _col4 | _col5 | _col6 | _col7 |
---|---|---|---|---|---|---|---|
"DAIRY_PLAN" | "HW12-201809" | "HW_BAND_E" | "HW_LEVEL_3" | 140 | 99 | 40 | 100 |
Otherwise you need to use arrays. Note that you have array in array, so you need to use double wildcards:
select json_query(json_col,'lax $[*].addOns[*].cod' with array wrapper) names, -- use lax $[*].addOns[*] to get whole JSON object
json_query(json_col,'lax $[*].addOns[*].price' with array wrapper) prices
from dataset;
Output:
names | prices |
---|---|
["DAIRY_PLAN","HW12-201809","HW_BAND_E","HW_LEVEL_3","HW12-FW","HW_BAND_SHEEP_A","HW_LEVEL_2","SHEEP_PLAN","CROP_PLAN","CW_BAND_G","HW12-CROP","HW_LEVEL_1"] | [140,99,40,100,99.3333,0,50,0,0,120,99,0] |