I need to parse the complex JSON (below) IN SCALA to get the values of "expression" and "value" in "measure" key i.e I need List (COUNT, COUNT_DISTINCT ...) and List (1,tbl1.USER_ID ...).
I tried multiple options, but it is not working. Any help is appreciated
{
"uuid": "uuidddd",
"last_modified": 1559080222953,
"version": "2.6.1.0",
"name": "FULL_DAY_2_mand_date",
"is_draft": false,
"model_name": "FULL_DAY_1_may05",
"description": "",
"null_string": null,
"dimensions": [
{
"name": "PLATFORM",
"table": "tbl1",
"column": "PLATFORM",
"derived": null
},
{
"name": "OS_VERSION",
"table": "tbl1",
"column": "OS_VERSION",
"derived": null
}
],
"measures": [
{
"name": "_COUNT_",
"function": {
"expression": "COUNT",
"parameter": {
"type": "constant",
"value": "1"
},
"returntype": "bigint"
}
},
{
"name": "UU",
"function": {
"expression": "COUNT_DISTINCT",
"parameter": {
"type": "column",
"value": "tbl1.USER_ID"
},
"returntype": "hllc(12)"
}
},
{
"name": "CONT_SIZE",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "tbl1.SIZE"
},
"returntype": "bigint"
}
},
{
"name": "CONT_COUNT",
"function": {
"expression": "SUM",
"parameter": {
"type": "column",
"value": "tbl1.COUNT"
},
"returntype": "bigint"
}
}
],
"dictionaries": [],
"rowkey": {
"rowkey_columns": [
{
"column": "tbl1.OS_VERSION",
"encoding": "dict",
"encoding_version": 1,
"isShardBy": false
},
{
"column": "tbl1.PLATFORM",
"encoding": "dict",
"encoding_version": 1,
"isShardBy": false
},
{
"column": "tbl1.DEVICE_FAMILY",
"encoding": "dict",
"encoding_version": 1,
"isShardBy": false
}
]
},
"hbase_mapping": {
"column_family": [
{
"name": "F1",
"columns": [
{
"qualifier": "M",
"measure_refs": [
"_COUNT_",
"CONT_SIZE",
"CONT_COUNT"
]
}
]
},
{
"name": "F2",
"columns": [
{
"qualifier": "M",
"measure_refs": [
"UU"
]
}
]
}
]
},
"aggregation_groups": [
{
"includes": [
"tbl1.PLATFORM",
"tbl1.OS_VERSION"
],
"select_rule": {
"hierarchy_dims": [],
"mandatory_dims": [
"tbl1.DATE_HR"
],
"joint_dims": []
}
}
],
"signature": "ttrrs==",
"notify_list": [],
"status_need_notify": [
"ERROR",
"DISCARDED",
"SUCCEED"
],
"partition_date_start": 0,
"partition_date_end": 3153600000000,
"auto_merge_time_ranges": [
604800000,
2419200000
],
"volatile_range": 0,
"retention_range": 0,
"engine_type": 4,
"storage_type": 2,
"override_kylin_properties": {
"job.queuename": "root.production.P0",
"is-mandatory-only-valid": "true"
},
"cuboid_black_list": [],
"parent_forward": 3,
"mandatory_dimension_set_list": [],
"snapshot_table_desc_list": []
}
This is a snippet of the code I tried, and it is giving a null list
import org.json4s._
import org.json4s.jackson.JsonMethods._
implicit val formats = org.json4s.DefaultFormats
case class Function (
expression: String,
parameter: Parameter,
returntype: String
)
case class Parameter (
`type`: String,
value: String
)
case class Measures (
name: String,
function: Function
)
case class AllMeasuresData(uuid: String, measure: List[Measures])
val data = parse(tmp).extract[AllMeasuresData]
val names = data.measure.map(_.name)
println(names)
case class AllMeasuresData(uuid: String, measure: List[Measures])
val data = parse(tmp).extract[AllMeasuresData]
val names = data.measure.map(_.name)
println(names)
There are couple typos in your ADT:
Here is what you need:
case class Function (
expression: String,
parameter: Parameter,
returntype: String
)
case class Parameter (
`type`: String,
value: String
)
case class Measures (
name: String,
function: Function
)
case class AllMeasuresData(uuid: String, measures: List[Measures])
There is also an extra comma int the json, here is the correct:
{
"uuid":"uuidddd",
"last_modified":1559080222953,
"version":"2.6.1.0",
"name":"FULL_DAY_2_mand_date",
"is_draft":false,
"model_name":"FULL_DAY_1_may05",
"description":"",
"null_string":null,
"dimensions":[
{
"name":"PLATFORM",
"table":"tbl1",
"column":"PLATFORM",
"derived":null
},
{
"name":"OS_VERSION",
"table":"tbl1",
"column":"OS_VERSION",
"derived":null
} // There was an extra trailing comma here
],
"measures":[
{
"name":"_COUNT_",
"function":{
"expression":"COUNT",
"parameter":{
"type":"constant",
"value":"1"
},
"returntype":"bigint"
}
},
{
"name":"UU",
"function":{
"expression":"COUNT_DISTINCT",
"parameter":{
"type":"column",
"value":"tbl1.USER_ID"
},
"returntype":"hllc(12)"
}
},
{
"name":"CONT_SIZE",
"function":{
"expression":"SUM",
"parameter":{
"type":"column",
"value":"tbl1.SIZE"
},
"returntype":"bigint"
}
},
{
"name":"CONT_COUNT",
"function":{
"expression":"SUM",
"parameter":{
"type":"column",
"value":"tbl1.COUNT"
},
"returntype":"bigint"
}
}
],
"dictionaries":[
],
"rowkey":{
"rowkey_columns":[
{
"column":"tbl1.OS_VERSION",
"encoding":"dict",
"encoding_version":1,
"isShardBy":false
},
{
"column":"tbl1.PLATFORM",
"encoding":"dict",
"encoding_version":1,
"isShardBy":false
},
{
"column":"tbl1.DEVICE_FAMILY",
"encoding":"dict",
"encoding_version":1,
"isShardBy":false
}
]
},
"hbase_mapping":{
"column_family":[
{
"name":"F1",
"columns":[
{
"qualifier":"M",
"measure_refs":[
"_COUNT_",
"CONT_SIZE",
"CONT_COUNT"
]
}
]
},
{
"name":"F2",
"columns":[
{
"qualifier":"M",
"measure_refs":[
"UU"
]
}
]
}
]
},
"aggregation_groups":[
{
"includes":[
"tbl1.PLATFORM",
"tbl1.OS_VERSION"
],
"select_rule":{
"hierarchy_dims":[
],
"mandatory_dims":[
"tbl1.DATE_HR"
],
"joint_dims":[
]
}
}
],
"signature":"ttrrs==",
"notify_list":[
],
"status_need_notify":[
"ERROR",
"DISCARDED",
"SUCCEED"
],
"partition_date_start":0,
"partition_date_end":3153600000000,
"auto_merge_time_ranges":[
604800000,
2419200000
],
"volatile_range":0,
"retention_range":0,
"engine_type":4,
"storage_type":2,
"override_kylin_properties":{
"job.queuename":"root.production.P0",
"is-mandatory-only-valid":"true"
},
"cuboid_black_list":[
],
"parent_forward":3,
"mandatory_dimension_set_list":[
],
"snapshot_table_desc_list":[
]
}
Now you can run:
val data = parse(tmp).extract[AllMeasuresData]
val names = data.measures.map(_.name)
println(names)
// Displays
// List(_COUNT_, UU, CONT_SIZE, CONT_COUNT)