Search code examples
jsonscalajson4s

Parse Complex JSON -- Map


I need to parse the complex JSON (below) IN SCALA to get the values of "expression" and "value" in "measure" key i.e I need List (COUNT, COUNT_DISTINCT ...) and List (1,tbl1.USER_ID ...).

I tried multiple options, but it is not working. Any help is appreciated

{
  "uuid": "uuidddd",
  "last_modified": 1559080222953,
  "version": "2.6.1.0",
  "name": "FULL_DAY_2_mand_date",
  "is_draft": false,
  "model_name": "FULL_DAY_1_may05",
  "description": "",
  "null_string": null,
  "dimensions": [
    {
      "name": "PLATFORM",
      "table": "tbl1",
      "column": "PLATFORM",
      "derived": null
    },
    {
      "name": "OS_VERSION",
      "table": "tbl1",
      "column": "OS_VERSION",
      "derived": null
    }
  ],
  "measures": [
    {
      "name": "_COUNT_",
      "function": {
        "expression": "COUNT",
        "parameter": {
          "type": "constant",
          "value": "1"
        },
        "returntype": "bigint"
      }
    },
    {
      "name": "UU",
      "function": {
        "expression": "COUNT_DISTINCT",
        "parameter": {
          "type": "column",
          "value": "tbl1.USER_ID"
        },
        "returntype": "hllc(12)"
      }
    },
    {
      "name": "CONT_SIZE",
      "function": {
        "expression": "SUM",
        "parameter": {
          "type": "column",
          "value": "tbl1.SIZE"
        },
        "returntype": "bigint"
      }
    },
    {
      "name": "CONT_COUNT",
      "function": {
        "expression": "SUM",
        "parameter": {
          "type": "column",
          "value": "tbl1.COUNT"
        },
        "returntype": "bigint"
      }
    }
  ],
  "dictionaries": [],
  "rowkey": {
    "rowkey_columns": [
      {
        "column": "tbl1.OS_VERSION",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "tbl1.PLATFORM",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "tbl1.DEVICE_FAMILY",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      }
    ]
  },
  "hbase_mapping": {
    "column_family": [
      {
        "name": "F1",
        "columns": [
          {
            "qualifier": "M",
            "measure_refs": [
              "_COUNT_",
              "CONT_SIZE",
              "CONT_COUNT"
            ]
          }
        ]
      },
      {
        "name": "F2",
        "columns": [
          {
            "qualifier": "M",
            "measure_refs": [
              "UU"
            ]
          }
        ]
      }
    ]
  },
  "aggregation_groups": [
    {
      "includes": [
        "tbl1.PLATFORM",
        "tbl1.OS_VERSION"
      ],
      "select_rule": {
        "hierarchy_dims": [],
        "mandatory_dims": [
          "tbl1.DATE_HR"
        ],
        "joint_dims": []
      }
    }
  ],
  "signature": "ttrrs==",
  "notify_list": [],
  "status_need_notify": [
    "ERROR",
    "DISCARDED",
    "SUCCEED"
  ],
  "partition_date_start": 0,
  "partition_date_end": 3153600000000,
  "auto_merge_time_ranges": [
    604800000,
    2419200000
  ],
  "volatile_range": 0,
  "retention_range": 0,
  "engine_type": 4,
  "storage_type": 2,
  "override_kylin_properties": {
    "job.queuename": "root.production.P0",
    "is-mandatory-only-valid": "true"
  },
  "cuboid_black_list": [],
  "parent_forward": 3,
  "mandatory_dimension_set_list": [],
  "snapshot_table_desc_list": []
}

This is a snippet of the code I tried, and it is giving a null list

import org.json4s._
import org.json4s.jackson.JsonMethods._

implicit val formats = org.json4s.DefaultFormats

case class Function (
                      expression: String,
                      parameter: Parameter,
                      returntype: String
                    )

case class Parameter (
                       `type`: String,
                       value: String
                     )


case class Measures (
                      name: String,
                      function: Function
                    )

case class AllMeasuresData(uuid: String, measure: List[Measures])

val data = parse(tmp).extract[AllMeasuresData]
val names = data.measure.map(_.name)

println(names)

case class AllMeasuresData(uuid: String, measure: List[Measures])

val data = parse(tmp).extract[AllMeasuresData]
val names = data.measure.map(_.name)

println(names)

Solution

  • There are couple typos in your ADT:

    Here is what you need:

      case class Function (
                            expression: String,
                            parameter: Parameter,
                            returntype: String
                          )
    
      case class Parameter (
                             `type`: String,
                             value: String
                           )
    
    
      case class Measures (
                            name: String,
                            function: Function
                          )
    
      case class AllMeasuresData(uuid: String, measures: List[Measures])
    

    There is also an extra comma int the json, here is the correct:

      {
       "uuid":"uuidddd",
       "last_modified":1559080222953,
       "version":"2.6.1.0",
       "name":"FULL_DAY_2_mand_date",
       "is_draft":false,
       "model_name":"FULL_DAY_1_may05",
       "description":"",
       "null_string":null,
       "dimensions":[
          {
             "name":"PLATFORM",
             "table":"tbl1",
             "column":"PLATFORM",
             "derived":null
          },
          {
             "name":"OS_VERSION",
             "table":"tbl1",
             "column":"OS_VERSION",
             "derived":null
          } // There was an extra trailing comma here
       ],
       "measures":[
          {
             "name":"_COUNT_",
             "function":{
                "expression":"COUNT",
                "parameter":{
                   "type":"constant",
                   "value":"1"
                },
                "returntype":"bigint"
             }
          },
          {
             "name":"UU",
             "function":{
                "expression":"COUNT_DISTINCT",
                "parameter":{
                   "type":"column",
                   "value":"tbl1.USER_ID"
                },
                "returntype":"hllc(12)"
             }
          },
          {
             "name":"CONT_SIZE",
             "function":{
                "expression":"SUM",
                "parameter":{
                   "type":"column",
                   "value":"tbl1.SIZE"
                },
                "returntype":"bigint"
             }
          },
          {
             "name":"CONT_COUNT",
             "function":{
                "expression":"SUM",
                "parameter":{
                   "type":"column",
                   "value":"tbl1.COUNT"
                },
                "returntype":"bigint"
             }
          }
       ],
       "dictionaries":[
    
       ],
       "rowkey":{
          "rowkey_columns":[
             {
                "column":"tbl1.OS_VERSION",
                "encoding":"dict",
                "encoding_version":1,
                "isShardBy":false
             },
             {
                "column":"tbl1.PLATFORM",
                "encoding":"dict",
                "encoding_version":1,
                "isShardBy":false
             },
             {
                "column":"tbl1.DEVICE_FAMILY",
                "encoding":"dict",
                "encoding_version":1,
                "isShardBy":false
             }
          ]
       },
       "hbase_mapping":{
          "column_family":[
             {
                "name":"F1",
                "columns":[
                   {
                      "qualifier":"M",
                      "measure_refs":[
                         "_COUNT_",
                         "CONT_SIZE",
                         "CONT_COUNT"
                      ]
                   }
                ]
             },
             {
                "name":"F2",
                "columns":[
                   {
                      "qualifier":"M",
                      "measure_refs":[
                         "UU"
                      ]
                   }
                ]
             }
          ]
       },
       "aggregation_groups":[
          {
             "includes":[
                "tbl1.PLATFORM",
                "tbl1.OS_VERSION"
             ],
             "select_rule":{
                "hierarchy_dims":[
    
                ],
                "mandatory_dims":[
                   "tbl1.DATE_HR"
                ],
                "joint_dims":[
    
                ]
             }
          }
       ],
       "signature":"ttrrs==",
       "notify_list":[
    
       ],
       "status_need_notify":[
          "ERROR",
          "DISCARDED",
          "SUCCEED"
       ],
       "partition_date_start":0,
       "partition_date_end":3153600000000,
       "auto_merge_time_ranges":[
          604800000,
          2419200000
       ],
       "volatile_range":0,
       "retention_range":0,
       "engine_type":4,
       "storage_type":2,
       "override_kylin_properties":{
          "job.queuename":"root.production.P0",
          "is-mandatory-only-valid":"true"
       },
       "cuboid_black_list":[
    
       ],
       "parent_forward":3,
       "mandatory_dimension_set_list":[
    
       ],
       "snapshot_table_desc_list":[
    
       ]
    }
    
    

    Now you can run:

      val data = parse(tmp).extract[AllMeasuresData]
      val names = data.measures.map(_.name)
    
      println(names)
    // Displays
    // List(_COUNT_, UU, CONT_SIZE, CONT_COUNT)