I have a json saved in Athena table like
{
"VALIDATION_TYPE": "ROW_BY_ROW",
"DATABASE": "erp",
"TABLES": {
"APPLICATION_STATUS_TYPE": {
"BATCH_VALIDATION": {
"BATCHES": [{
"0": {
"FAILED": "FALSE",
"FAILURE_MSG": ""
}
}, {
"1": {
"FAILED": "TRUE",
"FAILURE_MSG": "NULL POINTER EXCEPTION"
}
}]
}
},
"APPLICATION": {
"BATCH_VALIDATION": {
"BATCHES": [{
"0": {
"FAILED": "FALSE",
"FAILURE_MSG": ""
}
}, {
"1": {
"FAILED": "TRUE",
"FAILURE_MSG": "NULL POINTER EXCEPTION"
}
}]
}
}
}
}
I need to write a query in Athena to find, find all the FAILED=TRUE records like below
output:
VALIDATION_TYPE,DATABASE,TABLE,ID,FAILED,FAILURE,FAIURE_MSG
----------------------------------------------------
ROW_BY_ROW,erp,APPLICATION_STATUS_TYPE,1,TRUE,NULL POINTER EXCEPTION
ROW_BY_ROW,erp,APPLICATION,1,TRUE,NULL POINTER EXCEPTION
I have tried various functions like TRANSFORM,UNNEST,JSON_EXTRACT etc , but no luck yet. Please advise if there are any specific functions I can use.
Thanks in advance
One trick which can be used is to cast some json parts as map(varchar, something)
and/or array
's:
-- sample data
with dataset(json_val) as (
values (json '{
"VALIDATION_TYPE": "ROW_BY_ROW",
"DATABASE": "erp",
"TABLES": {
"APPLICATION_STATUS_TYPE": {
"BATCH_VALIDATION": {
"BATCHES": [{
"0": {
"FAILED": "FALSE",
"FAILURE_MSG": ""
}
}, {
"1": {
"FAILED": "TRUE",
"FAILURE_MSG": "NULL POINTER EXCEPTION"
}
}]
}
},
"APPLICATION": {
"BATCH_VALIDATION": {
"BATCHES": [{
"0": {
"FAILED": "FALSE",
"FAILURE_MSG": ""
}
}, {
"1": {
"FAILED": "TRUE",
"FAILURE_MSG": "NULL POINTER EXCEPTION"
}
}]
}
}
}
}')
)
-- query
select json_extract_scalar(json_val, '$.VALIDATION_TYPE') VALIDATION_TYPE,
json_extract_scalar(json_val, '$.DATABASE') DATABASE,
t1.k "TABLE",
t3.k ID,
t3.map_v['FAILED'] FAILED,
t3.map_v['FAILURE_MSG'] FAILURE_MSG
from dataset
, unnest(cast(json_extract(json_val, '$.TABLES') as map(varchar, json))) as t1(k, v)
, unnest(cast(json_extract(t1.v, '$.BATCH_VALIDATION.BATCHES') as array(map(varchar, map(varchar, json))))) as t2(m)
, unnest(t2.m) as t3(k, map_v);
Output:
VALIDATION_TYPE | DATABASE | TABLE | ID | FAILED | FAILURE_MSG |
---|---|---|---|---|---|
ROW_BY_ROW | erp | APPLICATION | 0 | FALSE | |
ROW_BY_ROW | erp | APPLICATION | 1 | TRUE | NULL POINTER EXCEPTION |
ROW_BY_ROW | erp | APPLICATION_STATUS_TYPE | 0 | FALSE | |
ROW_BY_ROW | erp | APPLICATION_STATUS_TYPE | 1 | TRUE | NULL POINTER EXCEPTION |
And then you can apply the filtering.