Search code examples
javajsonjolt

Jolt transformation to grouping ID by inside the nested json


I have below tried to build the json by using the jolt spec by remove the id in the nested json array please help me to provide the expected out json below Transactions object grouping ID needs to be corrected by removing the id As shown below in expected output json

**Input json **

[
  {
    "ALERT_IDENTIFIER": "123",
    "ACCOUNT_ID": "Acc345",
    "CREATION_TIMESTAMP": 1615902235602,
    "TRANSACTION_ID": "Tr123",
    "REASON_SRC_UNIQUE_ID": "TEST123",
    "REASON_SCORE": 67,
    "REASON_TIME_PERIOD": "Daily",
    "REASON_DESCRIPTION": "CB05b: In the previous day there was a high amount",
    "DATASOURCE_ACC_ID": "NOCTEST1234"
  },
  {
    "ALERT_IDENTIFIER": "123",
    "ACCOUNT_ID": "Acc3478",
    "CREATION_TIMESTAMP": 1615902235602,
    "TRANSACTION_ID": "Tr12356",
    "REASON_SRC_UNIQUE_ID": "TEST123456",
    "REASON_SCORE": 87,
    "REASON_TIME_PERIOD": "Daily",
    "REASON_DESCRIPTION": "CB05b: In the previous day there was a high amount",
    "DATASOURCE_ACC_ID": "NOCTEST1234"
  },
  {
    "ALERT_IDENTIFIER": "123",
    "ACCOUNT_ID": "Acc3456",
    "CREATION_TIMESTAMP": 1615902235602,
    "TRANSACTION_ID": "Tr123678",
    "REASON_SRC_UNIQUE_ID": "TEST123456",
    "REASON_SCORE": 87,
    "REASON_TIME_PERIOD": "Daily",
    "REASON_DESCRIPTION": "CB05b: In the previous day there was a high amount",
    "DATASOURCE_ACC_ID": "NOCTEST1234"
  }
]

**Jolt spec tried **

[
  {
    "operation": "shift",
    "spec": {
      "*": {
        "ALERT_IDENTIFIER": "@1,ALERT_IDENTIFIER.id",
        "CREATION_TIMESTAMP": "@1,ALERT_IDENTIFIER.CreationTime",
        "DATASOURCE_ACC_ID": "@1,ALERT_IDENTIFIER.WFI_Reasons.@1,REASON_SRC_UNIQUE_ID.SRC_AccountID",
        "REASON_SRC_UNIQUE_ID": "@1,ALERT_IDENTIFIER.WFI_Reasons.@1,REASON_SRC_UNIQUE_ID.SRC_ID",
        "REASON_SCORE": "@1,ALERT_IDENTIFIER.WFI_Reasons.@1,REASON_SRC_UNIQUE_ID.DetectionScore",
        "REASON_TIME_PERIOD": "@1,ALERT_IDENTIFIER.WFI_Reasons.@1,REASON_SRC_UNIQUE_ID.ReasonTimePeriod",
        "ACCOUNT_ID": "@1,ALERT_IDENTIFIER.WFI_Reasons.@1,REASON_SRC_UNIQUE_ID.Transactions.@1,TRANSACTION_ID.AccountID",
        "TRANSACTION_ID": "@1,ALERT_IDENTIFIER.WFI_Reasons.@1,REASON_SRC_UNIQUE_ID.Transactions.@1,TRANSACTION_ID.TransactionID",
        "REASON_DESCRIPTION": "@1,ALERT_IDENTIFIER.WFI_Reasons.@1,REASON_SRC_UNIQUE_ID.ReasonDescription"
      }
    }
  },
  { //get rid of repetitions, eg. convert arrays to attributes 
    "operation": "cardinality",
    "spec": {
      "*": {
        "*": "ONE",
        "WFI_Reasons": {
          "*": {
            "*": "ONE"
          }
        }
      }
    }
  }, {
    "operation": "shift",
    "spec": {
      "*": {
        "*": "[#2].WF_Workitem.&",
        "WFI_Reasons": {
          "*": "[#3].WF_Workitem.WF_Instance.&1[]"
        }
      }
    }
  }]

Expected output

[ {
  "WF_Workitem" : {
    "id" : "123",
    "CreationTime" : 1615902235602,
    "WF_Instance" : {
      "WFI_Reasons" : [ {
        "SRC_AccountID" : "NOCTEST1234",
        "SRC_ID" : "TEST123",
        "DetectionScore" : 67,
        "ReasonTimePeriod" : "Daily",
        "Transactions" : [{
           
            "AccountID" : "Acc345",
            "TransactionID" : "Tr123"
          
        }],
        "ReasonDescription" : "CB05b: In the previous day there was a high amount"
      }, {
        "SRC_AccountID" : "NOCTEST1234",
        "SRC_ID" : "TEST123456",
        "DetectionScore" : 87,
        "ReasonTimePeriod" : "Daily",
        "Transactions" : [
          {
            "AccountID" : "Acc3478",
            "TransactionID" : "Tr12356"
          },
           {
            "AccountID" : "Acc3456",
            "TransactionID" : "Tr123678"
          }
        ],
        "ReasonDescription" : "CB05b: In the previous day there was a high amount"
      } ]
    }
  }
} ]

actual output

[ {
  "WF_Workitem" : {
    "id" : "123",
    "CreationTime" : 1615902235602,
    "WF_Instance" : {
      "WFI_Reasons" : [ {
        "SRC_AccountID" : "NOCTEST1234",
        "SRC_ID" : "TEST123",
        "DetectionScore" : 67,
        "ReasonTimePeriod" : "Daily",
        "Transactions" : {
          "Tr123" : {
            "AccountID" : "Acc345",
            "TransactionID" : "Tr123"
          }
        },
        "ReasonDescription" : "CB05b: In the previous day there was a high amount"
      }, {
        "SRC_AccountID" : "NOCTEST1234",
        "SRC_ID" : "TEST123456",
        "DetectionScore" : 87,
        "ReasonTimePeriod" : "Daily",
        "Transactions" : {
          "Tr12356" : {
            "AccountID" : "Acc3478",
            "TransactionID" : "Tr12356"
          },
          "Tr123678" : {
            "AccountID" : "Acc3456",
            "TransactionID" : "Tr123678"
          }
        },
        "ReasonDescription" : "CB05b: In the previous day there was a high amount"
      } ]
    }
  }
} ]

Solution

  • I think you are on track in the first two transformation spec, however in the 3rd transformation you seem to want to re bucket each group level into an array vs having group header field and I dont think this is possible in one transformation. Each group level has to be done using its own shift transformation starting from the highest group level then going downward since the next group level will depend on how the parent group level is structured if that makes any sense.

    With that after the cardinality transformation you would have the following 3 shift transformation for each group level (workitem -> wfi_reasons -> transactions):

    [...
     {
        "operation": "shift",
        "spec": {
          "*": "[].WF_Workitem"
        }
      }
      ,
      {
        "operation": "shift",
        "spec": {
          "*": {
            "WF_Workitem": {
              "*": "[&2].&1.&",
              "WFI_Reasons": {
                "*": "[&3].&2.WF_Instance.&1[]"
              }
            }
          }
        }
      }
      ,
      {
        "operation": "shift",
        "spec": {
          "*": {
            "WF_Workitem": {
              "*": "[&2].&1.&",
              "WF_Instance": {
                "WFI_Reasons": {
                  "*": {
                    "*": "[&5].&4.&3.&2[&1].&",
                    "Transactions": {
                      "*": "[&6].&5.&4.&3[&2].&1[]"
                    }
                  }
                }
              }
            }
          }
        }
      }]
    

    Also I would recommend in these scenarios looking into jslt which is another transformation language for json (if you are using Nifi, there is processor for that as well). Given that jslt has group-by function, the transformation is going to be easier and more readable with much less lines of codes as in the following:

    import "http://jslt.schibsted.com/2018/experimental" as exp
    
    let id_group=  exp:group-by(., {"id":.ALERT_IDENTIFIER,"CreationTime":.CREATION_TIMESTAMP},.)
    
    let id_nested_group= [ for($id_group) 
                              {
                                 "WF_Workitem":{
                                    "WF_Instance" :{
                                         let x= exp:group-by(.values,
                                                            {"SRC_AccountID":.DATASOURCE_ACC_ID,
                                                             "SRC_ID":.REASON_SRC_UNIQUE_ID,
                                                              "DetectionScore":.REASON_SCORE,
                                                              "ReasonTimePeriod":.REASON_TIME_PERIOD,
                                                              "ReasonDescription":.REASON_DESCRIPTION},
                                                            {"AccountID":.ACCOUNT_ID,
                                                              "TransactionID":.TRANSACTION_ID
                                                            })
                                         "WFI_Reasons": [for($x) {"Transactions":.values}+.key]
                                                  }
                                             }+.key
                             }
                       ]
    
    $id_nested_group   
    

    Hope you find this helpful.