Search code examples
arraysjsonjolt

Remove duplicate from nested json array using jolt spec


please provide the jolt spec to get the desired output json below provided jolt spec is needs to updated to remove the dupicate inside the subitems array

the input JSON :

[
  {
    "itemid": "1",
    "itemName": "coco",
    "itemType": "brg",
    "subitemId": "444",
    "subitemName": "INDICATIVE",
    "Status": "N"
  },
  {
    "itemid": "1",
    "itemName": "coco",
    "itemType": "brg",
    "subitemId": "333",
    "subitemName": "BRGS",
    "Status": "P"
  },
  {
    "itemid": "1",
    "itemName": "coco",
    "itemType": "brg",
    "subitemId": "444",
    "subitemName": "INDICATIVE",
    "Status": "N"
  },
  {
    "itemid": "1",
    "itemName": "coco",
    "itemType": "brg",
    "subitemId": "333",
    "subitemName": "BRGS",
    "Status": "P"
  },
  {
    "itemid": "2",
    "itemName": "limk",
    "itemType": "cmds",
    "subitemId": "456",
    "subitemName": "NMPS",
    "Status": "P"
  },
  {
    "itemid": "2",
    "itemName": "limk",
    "itemType": "cmds",
    "subitemId": "4568",
    "subitemName": "NMPSI",
    "Status": "P"
  },
  {
    "itemid": "2",
    "itemName": "limk",
    "itemType": "cmds",
    "subitemId": "456",
    "subitemName": "NMPS",
    "Status": "P"
  },
  {
    "itemid": "2",
    "itemName": "limk",
    "itemType": "cmds",
    "subitemId": "4568",
    "subitemName": "NMPSI",
    "Status": "P"
  },
  {
    "itemid": "3",
    "itemName": "limk",
    "itemType": "cmds",
    "subitemId": "789",
    "subitemName": "NMPS",
    "Status": "P"
  },
  {
    "itemid": "3",
    "itemName": "limk",
    "itemType": "cmds",
    "subitemId": "789",
    "subitemName": "NMPS",
    "Status": "P"
  },
  {
    "itemid": "3",
    "itemName": "limk",
    "itemType": "cmds",
    "subitemId": "789",
    "subitemName": "NMPS",
    "Status": "P"
  },
  {
    "itemid": "3",
    "itemName": "limk",
    "itemType": "cmds",
    "subitemId": "873",
    "subitemName": "NMSI",
    "Status": "N"
  },
  {
    "itemid": "3",
    "itemName": "limk",
    "itemType": "cmds",
    "subitemId": "873",
    "subitemName": "NMSI",
    "Status": "N"
  }
]

Expected output JSON :

[
  {
    "work_item": {
      "itemid": "1",
      "itemName": "coco",
      "itemType": "brg",
      "wf_instance": {
        "subitems": [
          {
            "currentsubitemId": "444",
            "currentsubitemName": "INDICATIVE",
            "CurrentStatus": "NotPresent"
          },
          {
            "currentsubitemId": "333",
            "currentsubitemName": "BRGS",
            "CurrentStatus": "Present"
          }
        ]
      }
    }
  },
  {
    "work_item": {
      "itemid": "2",
      "itemName": "limk",
      "itemType": "cmds",
      "wf_instance": {
        "subitems": [
          {
            "currentsubitemId": "456",
            "currentsubitemName": "NMPS",
            "CurrentStatus": "Present"
          },
          {
            "currentsubitemId": "4568",
            "currentsubitemName": "NMPSI",
            "CurrentStatus": "Present"
          }
        ]
      }
    }
  },
  {
    "work_item": {
      "itemid": "3",
      "itemName": "limk",
      "itemType": "cmds",
      "wf_instance": {
        "subitems": [
          {
            "currentsubitemId": "789",
            "currentsubitemName": "NMPS",
            "CurrentStatus": "Present"
          },
          {
            "currentsubitemId": "873",
            "currentsubitemName": "NMSI",
            "CurrentStatus": "NotPresent"
          }
        ]
      }
    }
  }
]

Tried below jolt spec but it is not removing the duplicate entry inside subitems array with status value update from

P to Present and

N to NotPresent

[
  {
    "operation": "modify-default-beta",
    "spec": {
      "*": {
        "comp_id": "=concat(@(1,itemid),'_', @(1,itemName), '_', @(1,itemType))"
      }
    }
  },
  {
    "operation": "shift",
    "spec": {
      "*": {
        "*": "@(1,comp_id).&[]"
      }
    }
  },
  {
    "operation": "shift",
    "spec": {
      "*": {
        "*": "[#2].&",
        "comp_id": null
      }
    }
  },
  {
    "operation": "modify-overwrite-beta",
    "spec": {
      "*": {
        "itemid": "=firstElement(@(1,itemid))",
        "itemName": "=firstElement(@(1,itemName))",
        "itemType": "=firstElement(@(1,itemType))"
      }
    }
  },
  {
    "operation": "shift",
    "spec": {
      "*": {
        "itemid": "[&1].work_item.itemid",
        "itemName": "[&1].work_item.itemName",
        "itemType": "[&1].work_item.itemType",
        "subitemId": {
          "*": "[&2].work_item.wf_instance.subitems[&].currentsubitemId"
        },
        "subitemName": {
          "*": "[&2].work_item.wf_instance.subitems[&].currentsubitemName"
        },
        "Status": {
          "*": "[&2].work_item.wf_instance.subitems[&].CurrentStatus"
        }
      }
    }
  }
]

Solution

  • You could group by itemid and subitemId by prefixing the right hand sides of the pairs by them such as

    [
      {//grouping occurs here while Status literals are chenged conditionally 
        "operation": "shift",
        "spec": {
          "*": {
            "item*": "@1,itemid.&",
            "sub*": "@1,itemid.subitems.@1,subitemId.current&",
            "Status": {
              "P": {
                "#Present": "@3,itemid.subitems.@3,subitemId.Current&2"
              },
              "N": {
                "#NotPresent": "@3,itemid.subitems.@3,subitemId.Current&2"
              }
            }
          }
        }
      },
      { //get rid of repetitions, eg. convert arrays to attributes 
        "operation": "cardinality",
        "spec": {
          "*": {
            "*": "ONE",
            "subitems": {
              "*": {
                "*": "ONE"
              }
            }
          }
        }
      },
      {
        "operation": "shift",
        "spec": {
          "*": {
            "*": "[#2].work_item.&",
            "subitems": {
              "*": "[#3].work_item.wf_instance.&1[]"
            }
          }
        }
      }
    ]