Search code examples
jsonapache-nifijolt

Flattening a nested JSON using jolt transform involving NULL values


I am facing a problem, transforming a very complex nested JSON using jolt transformation. Input and output detail is given below.

Input

{
  "metadata": {
    "timestamp": "2023-04-19T19:22:11+0000",
    "dataProvider": "P44_Ocean"
  },
  "data": {
    "stops": [
      {
        "stopNumber": "1"
      },
      {
        "stopNumber": "2"
      }
    ],
    "latestEvents": [
      {
        "stopNumber": null,
        "stopId": "XXXXXX-XXX-31a4-XXXX-389659XXXX",
        "routeSegment": null,
        "statusCode": "XXXX_OUT_FULL",
        "eventDateTime": null,
        "arrivalCode": null,
        "arrivalDateTime": null,
        "departureDateTime": null,
        "groupStatus": null,
        "arrivalEstimate": {
          "plannedDateTime": "2023-04-18T04:30:00+0000",
          "lastCalculatedDateTime": null,
          "estimatedArrivalWindow": {
            "startDateTime": null,
            "endDateTime": null
          }
        },
        "additionalAppointmentWindowStatuses": null
      },
      {
        "stopNumber": null,
        "stopId": "XXXXXX-XXX-31a4-XXXX-389659XXXX",
        "routeSegment": "XXXXb4-5d1a-3e3a-ac08-ea5ZZZZZ",
        "statusCode": "YYYYY_FROM_STOP",
        "eventDateTime": "2023-04-19T14:05:00+0000",
        "arrivalCode": null,
        "arrivalDateTime": null,
        "departureDateTime": null,
        "groupStatus": null,
        "arrivalEstimate": {
          "plannedDateTime": null,
          "lastCalculatedDateTime": null,
          "estimatedArrivalWindow": {
            "startDateTime": null,
            "endDateTime": null
          }
        },
        "additionalAppointmentWindowStatuses": null
      },
      {
        "stopNumber": null,
        "stopId": "ZZZZZZ1-391d-3c67-9c18-3e04YYYYYY",
        "routeSegment": "XXXXb4-5d1a-3e3a-ac08-ea5ZZZZZ",
        "statusCode": "UUUUU_AT_STOP",
        "eventDateTime": null,
        "arrivalCode": null,
        "arrivalDateTime": null,
        "departureDateTime": null,
        "groupStatus": null,
        "arrivalEstimate": {
          "plannedDateTime": "2023-05-01T07:00:00+0000",
          "lastCalculatedDateTime": "2023-04-19T19:22:11+0000",
          "estimatedArrivalWindow": {
            "startDateTime": "2023-05-01T07:00:00+0000",
            "endDateTime": null
          }
        },
        "additionalAppointmentWindowStatuses": null
      },
      {
        "stopNumber": null,
        "stopId": "ZZZZZZ1-391d-3c67-9c18-3e04YYYYYY",
        "routeSegment": null,
        "statusCode": "XXXXXXVERY",
        "eventDateTime": null,
        "arrivalCode": null,
        "arrivalDateTime": null,
        "departureDateTime": null,
        "groupStatus": null,
        "arrivalEstimate": {
          "plannedDateTime": "2023-05-02T06:00:00+0000",
          "lastCalculatedDateTime": null,
          "estimatedArrivalWindow": {
            "startDateTime": null,
            "endDateTime": null
          }
        },
        "additionalAppointmentWindowStatuses": null
      }
    ]
  }
}


Output Expected

[ {
  "StopNumber" : "1",
  "LatestEventStopNumber" : null,
  "LatestEventStopId" : [ "XXXXXX-XXX-31a4-XXXX-389659XXXX", "XXXXXX-XXX-31a4-XXXX-389659XXXX", "ZZZZZZ1-391d-3c67-9c18-3e04YYYYYY", "ZZZZZZ1-391d-3c67-9c18-3e04YYYYYY" ],
  "LatestEventRouteSegment" : [ "XXXXb4-5d1a-3e3a-ac08-ea5ZZZZZ", "XXXXb4-5d1a-3e3a-ac08-ea5ZZZZZ", null ],
  "LatestEventStatusCode" : [ "XXXX_OUT_FULL", "YYYYY_FROM_STOP", "UUUUU_AT_STOP", "XXXXXXVERY" ],
  "LatestEventDateTime" : [ null,"2023-04-19T14:05:00+0000", null, null ],
  "LatestEventArrivalCode" : null,
  "LatestEventArrivalDateTime" : null,
  "LatestEventDepartureDateTime" : null,
  "LatestEventEstimatedArrivalPlannedDateTime" : [ "2023-04-18T04:30:00+0000", null, "2023-05-01T07:00:00+0000", "2023-05-02T06:00:00+0000" ],
  "LatestEventEstimatedArrivalLastCalculatedDateTime" : [ null,null,"2023-04-19T19:22:11+0000", null ],
  "LatestEventEstimatedArrivalWindowStartDateTime" : [ null,null,"2023-05-01T07:00:00+0000", null ],
  "LatestEventEstimatedArrivalWindowEndDateTime" : null,
  "LatestEventAdditionalAppointmentWindowStatuses" : null
}, {
  "StopNumber" : "2",
  "LatestEventStopNumber" : null,
  "LatestEventStopId" : [ "XXXXXX-XXX-31a4-XXXX-389659XXXX", "XXXXXX-XXX-31a4-XXXX-389659XXXX", "ZZZZZZ1-391d-3c67-9c18-3e04YYYYYY", "ZZZZZZ1-391d-3c67-9c18-3e04YYYYYY" ],
  "LatestEventRouteSegment" : [ "XXXXb4-5d1a-3e3a-ac08-ea5ZZZZZ", "XXXXb4-5d1a-3e3a-ac08-ea5ZZZZZ", null ],
  "LatestEventStatusCode" : [ "XXXX_OUT_FULL", "YYYYY_FROM_STOP", "UUUUU_AT_STOP", "XXXXXXVERY" ],
  "LatestEventDateTime" : [ null,"2023-04-19T14:05:00+0000", null, null ],
  "LatestEventArrivalCode" : null,
  "LatestEventArrivalDateTime" : null,
  "LatestEventDepartureDateTime" : null,
  "LatestEventEstimatedArrivalPlannedDateTime" : [ "2023-04-18T04:30:00+0000", null, "2023-05-01T07:00:00+0000", "2023-05-02T06:00:00+0000" ],
  "LatestEventEstimatedArrivalLastCalculatedDateTime" : [ null,null,"2023-04-19T19:22:11+0000", null ],
  "LatestEventEstimatedArrivalWindowStartDateTime" : [ null,null,"2023-05-01T07:00:00+0000", null ],
  "LatestEventEstimatedArrivalWindowEndDateTime" : null,
  "LatestEventAdditionalAppointmentWindowStatuses" : null
} ]

But with the current jolt spec, we are not getting the proper EventDateTime, we are getting only 3 values, whereas we should get 4 values including the first NULl value

[
  {
    "operation": "shift",
    "spec": {
      "data": {
        "stops": {
          "*": {
            "@(stopNumber)": "[&1].StopNumber",
            "@(2,latestEvents)": {
              "*": {
                "@(stopNumber)": "[&3].LatestEventStopNumber",
                "@(stopId)": "[&3].LatestEventStopId",
                "@(routeSegment)": "[&3].LatestEventRouteSegment",
                "@(statusCode)": "[&3].LatestEventStatusCode",
                "@(eventDateTime)": "[&2].LatestEventDateTime",
                "@(arrivalCode)": "[&3].LatestEventArrivalCode",
                "@(arrivalDateTime)": "[&3].LatestEventArrivalDateTime",
                "@(departureDateTime)": "[&3].LatestEventDepartureDateTime",
                "@(arrivalEstimate.plannedDateTime)": "[&3].LatestEventEstimatedArrivalPlannedDateTime",
                "@(arrivalEstimate.lastCalculatedDateTime)": "[&3].LatestEventEstimatedArrivalLastCalculatedDateTime",
                "@(arrivalEstimate.estimatedArrivalWindow.startDateTime)": "[&3].LatestEventEstimatedArrivalWindowStartDateTime",
                "@(arrivalEstimate.estimatedArrivalWindow.endDateTime)": "[&3].LatestEventEstimatedArrivalWindowEndDateTime",
                "@(additionalAppointmentWindowStatuses)": "[&3].LatestEventAdditionalAppointmentWindowStatuses"
              }
            }
          }
        }
      }
    }
  },
  {
    "operation": "shift",
    "spec": {
      "*": ""
    }
  }
  ]

We are getting this

[ {
  "StopNumber" : "1",
  "LatestEventStopNumber" : null,
  "LatestEventStopId" : [ "XXXXXX-XXX-31a4-XXXX-389659XXXX", "XXXXXX-XXX-31a4-XXXX-389659XXXX", "ZZZZZZ1-391d-3c67-9c18-3e04YYYYYY", "ZZZZZZ1-391d-3c67-9c18-3e04YYYYYY" ],
  "LatestEventRouteSegment" : [ "XXXXb4-5d1a-3e3a-ac08-ea5ZZZZZ", "XXXXb4-5d1a-3e3a-ac08-ea5ZZZZZ", null ],
  "LatestEventStatusCode" : [ "XXXX_OUT_FULL", "YYYYY_FROM_STOP", "UUUUU_AT_STOP", "XXXXXXVERY" ],
  "LatestEventDateTime" : [ "2023-04-19T14:05:00+0000", null, null ],
  "LatestEventArrivalCode" : null,
  "LatestEventArrivalDateTime" : null,
  "LatestEventDepartureDateTime" : null,
  "LatestEventEstimatedArrivalPlannedDateTime" : [ "2023-04-18T04:30:00+0000", null, "2023-05-01T07:00:00+0000", "2023-05-02T06:00:00+0000" ],
  "LatestEventEstimatedArrivalLastCalculatedDateTime" : [ "2023-04-19T19:22:11+0000", null ],
  "LatestEventEstimatedArrivalWindowStartDateTime" : [ "2023-05-01T07:00:00+0000", null ],
  "LatestEventEstimatedArrivalWindowEndDateTime" : null,
  "LatestEventAdditionalAppointmentWindowStatuses" : null
}, {
  "StopNumber" : "2",
  "LatestEventStopNumber" : null,
  "LatestEventStopId" : [ "XXXXXX-XXX-31a4-XXXX-389659XXXX", "XXXXXX-XXX-31a4-XXXX-389659XXXX", "ZZZZZZ1-391d-3c67-9c18-3e04YYYYYY", "ZZZZZZ1-391d-3c67-9c18-3e04YYYYYY" ],
  "LatestEventRouteSegment" : [ "XXXXb4-5d1a-3e3a-ac08-ea5ZZZZZ", "XXXXb4-5d1a-3e3a-ac08-ea5ZZZZZ", null ],
  "LatestEventStatusCode" : [ "XXXX_OUT_FULL", "YYYYY_FROM_STOP", "UUUUU_AT_STOP", "XXXXXXVERY" ],
  "LatestEventDateTime" : [ "2023-04-19T14:05:00+0000", null, null ],
  "LatestEventArrivalCode" : null,
  "LatestEventArrivalDateTime" : null,
  "LatestEventDepartureDateTime" : null,
  "LatestEventEstimatedArrivalPlannedDateTime" : [ "2023-04-18T04:30:00+0000", null, "2023-05-01T07:00:00+0000", "2023-05-02T06:00:00+0000" ],
  "LatestEventEstimatedArrivalLastCalculatedDateTime" : [ "2023-04-19T19:22:11+0000", null ],
  "LatestEventEstimatedArrivalWindowStartDateTime" : [ "2023-05-01T07:00:00+0000", null ],
  "LatestEventEstimatedArrivalWindowEndDateTime" : null,
  "LatestEventAdditionalAppointmentWindowStatuses" : null
} ]

Pls help Can anyone who is a jolt expert, help me get the desired output. I think i m stuck in the last step


Solution

  • There seems to be a bug. Even LatestEventRouteSegment array has the same issue as much as I observe.

    You might alternatively use partitioning bystops[].stopNumber(using @(5,&4[&3].stopNumber)) as looping through the lastEvents array such as

    [
      {
        "operation": "shift",
        "spec": {
          "data": {
            "stops": {
              "*": {
                "@2,latestEvents": {
                  "*": {
                    "stopNumber": "@(5,&4[&3].stopNumber).LatestEventStopNumber[&1]",
                    "stopId": "@(5,&4[&3].stopNumber).LatestEventStopId[&1]",
                    "routeSegment": "@(5,&4[&3].stopNumber).LatestEventRouteSegment[&1]",
                    "statusCode": "@(5,&4[&3].stopNumber).LatestEventStatusCode[&1]",
                    "eventDateTime": "@(5,&4[&3].stopNumber).LatestEventDateTime[&1]",
                    ...
                  }
                }
              }
            }
          }
        }
      },
      {
        "operation": "shift",
        "spec": {
          "*": {
            "$": "[#2].StopNumber",
            "*": "[#2].&"
          }
        }
      }
    ]
    

    the following option is shorter and cleaner

    [
      {
        "operation": "shift",
        "spec": {
          "data": {
            "stops": {
              "*": {
                "@2,stops[&].stopNumber": "[&1].StopNumber",
                "@2,latestEvents": {
                  "*": {
                    "stopNumber": "[&3].LatestEventStopNumber[&1]",
                    "stopId": "[&3].LatestEventStopId[&1]",
                    "routeSegment": "[&3].LatestEventRouteSegment[&1]",
                    "statusCode": "[&3].LatestEventStatusCode[&1]",
                    "eventDateTime": "[&3].LatestEventDateTime[&1]",
                    .... 
                  }
                }
              }
            }
          }
        }
      }
    ]