Search code examples
jsonapache-nifijolt

Convert Flat json to Nested Json with multiple arrays and keep null values in output using Jolt transform


I'm trying to write a spec to do the below transformation using jolt transformation. I need to convert the flat JSON to nested JSON by keeping null values. I attached the input, expected output and jolt transform. I need to keep the null values in the output but it doesn't show in output after jolt transform. I didn't get exact output with my jolt transform.

I am having some trouble with converting the flat JSON to nested JSON. I have looked at examples and didn't get any closer as to what is mentioned above. I need to transform a JSON structure by using a JOLT spec. I use https://jolt-demo.appspot.com to test the following below.

Input:

[
  {
    "container_id": "ABC",
    "shipperN": null,
    "PNumber": null,
    "trackingNumber": null,
    "priority": null,
    "HType": "IN_Load",
    "loadNumber": "123345",
    "billOfLading": "12345",
    "referenceNumbers": "LID",
    "addressLine1": "ABC Street",
    "addressLine2": "null",
    "city": "Chicago",
    "country": "US",
    "latitude": "null",
    "longitude": "null",
    "earliestAppointmentTime": "XXXXX09:25",
    "latestAppointmentTime": "XXXXX09:25",
    "postalCode": "XXXXX3",
    "sequence": "1",
    "state": "XY",
    "stopReferenceId": "0001",
    "stopType": "PU",
    "truckNumber": null,
    "trailerNumber": null,
    "driverPhone": null,
    "railEquipmentInitials": null,
    "railEquipmentNumber": null,
    "containerNumber": "XXXXXXXX"
  },
  {
    "container_id": "ABC",
    "shipperN": null,
    "PNumber": null,
    "trackingNumber": null,
    "priority": null,
    "HType": "IN_Load",
    "loadNumber": "123345",
    "billOfLading": "12345",
    "referenceNumbers": "LID",
    "addressLine1": "null",
    "addressLine2": "null",
    "city": "null",
    "country": "null",
    "latitude": null,
    "longitude": null,
    "earliestAppointmentTime": "XXXXX09:25",
    "latestAppointmentTime": "XXXXX09:25",
    "name": "null",
    "postalCode": "null",
    "sequence": "2",
    "state": "null",
    "stopReferenceId": "XXXXD",
    "stopType": "PL",
    "truckNumber": null,
    "trailerNumber": null,
    "driverPhone": null,
    "railEquipmentInitials": null,
    "railEquipmentNumber": null,
    "containerNumber": "XXXXXXXX"
  }
]

Desired Output:

{
  "load": {
    "container_id": "ABC",
    "shipperN": null,
    "PNumber": null,
    "trackingNumber": null,
    "priority": null,
    "HType": [ "IN_Load" ],
    "loadNumber": "123345",
    "billOfLading": "12345",
    "referenceNumbers": [ "LID" ],
    "stops": [
      {
        "addressLine1": "ABC Street",
        "addressLine2": "null",
        "city": "Chicago",
        "country": "US",
        "earliestAppointmentTime": "XXXXX09:25",
        "latestAppointmentTime": "XXXXX09:25",
        "postalCode": "XXXXX3",
        "sequence": "1",
        "state": "XY",
        "stopReferenceId": "0001",
        "stopType": "PU"
      },
      {
        "earliestAppointmentTime": "2021-03-09T15:25:00.203Z",
        "latestAppointmentTime": "2021-03-09T15:25:00.203Z",
        "sequence": "2",
        "stopReferenceId": "dummy",
        "stopType": "PL",
        "externalAddressId": "dummy"
      }
    ]
  },
  "containerInfo": {
    "containerNumber": "XXXXXXXX"
  },
  "trackingInfo": {
    "truckNumber": null,
    "trailerNumber": null,
    "driverPhone": null,
    "railEquipmentInitials": null,
    "railEquipmentNumber": null
  }
}

Jolt Spec that I'm using :

[
  {
    "operation": "shift",
    "spec": {
      "*": {
        "*": "@(1,container_id).load.stops[&1].&",
        "container_id": "@(1,container_id).load.&", // "else" case
        "shipperN": "@(1,container_id).load.&",
        "PNumber": "@(1,container_id).load.&",
        "trackingNumber": "@(1,container_id).load.&",
        "priority": "@(1,container_id).load.&",
        "HType": "@(1,container_id).load.&",
        "loadNumber": "@(1,container_id).load.&",
        "billOfLading": "@(1,container_id).load.&",
        "referenceNumbers": "@(1,container_id).load.&",
        "containerNumber": "@(1,container_id).containerInfo.&",
        "truckNumber": "@(1,container_id).trackingInfo.&",
        "trailerNumber": "@(1,container_id).trackingInfo.&",
        "driverPhone": "@(1,container_id).trackingInfo.&",
        "railEquipmentInitials": "@(1,container_id).trackingInfo.&",
        "railEquipmentNumber": "@(1,container_id).trackingInfo.&"
      }
    }
  },
  {
    "operation": "modify-overwrite-beta",
    "spec": {
      "*": "=recursivelySquashNulls"
    }
  },
  {
    "operation": "cardinality",
    "spec": {
      "*": {
        "*": {
          "container_id": "ONE",
          "shipperN": "ONE",
          "PNumber": "ONE",
          "trackingNumber": "ONE",
          "priority": "ONE",
          "HType": "ONE",
          "referenceNumbers": "ONE",
          "loadNumber": "ONE",
          "billOfLading": "ONE",
          "containerInfo": {
            "*": "ONE"
          },
          "trackingInfo": {
            "*": "ONE"
          }
        }
      }
    }
  },
  {
    "operation": "shift",
    "spec": {
      "*": ""
    }
  }
]

Solution

  • You're so close;

    1. The spec containing recursivelySquashNulls should be removed

    2. The identifier .&[] should be used proper to the attributes HType and referenceNumbers

    3. The cardinality spec preferably be shortened

    So use the following as a whole spec

    [
      {
        "operation": "shift",
        "spec": {
          "*": {
            "*": "@(1,container_id).load.stops[&1].&",
            "container_id": "@(1,container_id).load.&", // "else" case
            "shipperN": "@(1,container_id).load.&",
            "PNumber": "@(1,container_id).load.&",
            "trackingNumber": "@(1,container_id).load.&",
            "priority": "@(1,container_id).load.&",
            "HType": "@(1,container_id).load.&",
            "loadNumber": "@(1,container_id).load.&",
            "billOfLading": "@(1,container_id).load.&",
            "referenceNumbers": "@(1,container_id).load.&",
            "containerNumber": "@(1,container_id).containerInfo.&",
            "truckNumber": "@(1,container_id).trackingInfo.&",
            "trailerNumber": "@(1,container_id).trackingInfo.&",
            "driverPhone": "@(1,container_id).trackingInfo.&",
            "railEquipmentInitials": "@(1,container_id).trackingInfo.&",
            "railEquipmentNumber": "@(1,container_id).trackingInfo.&"
          }
        }
      },
      {
        "operation": "cardinality",
        "spec": {
          "*": {
            "*": {
              "*": "ONE",
              "stops": "MANY"
            }
          }
        }
      },
      {
        "operation": "shift",
        "spec": {
          "*": {
            "*": "&",
            "load": {
              "HType|referenceNumbers": "&1.&[]", 
              "*": "&1.&" // &1 stands for the key "load", and & replicates the leaf values  
            }
          }
        }
      }
    ]