Search code examples
jsonapache-nifijolt

How to write jolt spec for json array aggregaion in Apache nifi


we want to group the data by deviceid and get the json record count for that device using jolt spec in Apache nifi.

Input :

[
  {
    "altitude": 19.1,
    "analog1": 0.016,
    "analog2": 0.004,
    "batchprocessgroupid": 0,
    "batterylevel": 1.598,
    "bleid": "",
    "command": 3,
    "devicedatetime": 1679325571000,
    "deviceid": "863071015139949",
    "digital1": 0,
    "digital2": 0,
    "gpsdirection": 160.1,
    "gpsfix": 1,
    "hdop": 1,
    "ibutton": "47000019DBC1D001",
    "ignitionstatus": 1,
    "ioeventid": 7,
    "iostring": "5 -> 1, 78 -> 2003, 29 -> 28764, 28 -> 1, 70 -> 0000000000000000, 137 -> 0, 65 -> 164295509, 173 -> 1, 134 -> 0, 2 -> 0, 32 -> 38, 34 -> 47000019DBC1D001, 22 -> 16, 27 -> 14, 71 -> 0000000000000000, 49 -> 0, 130 -> 2, 135 -> 0, 3 -> 0, 150 -> 42402, 139 -> 60, 23 -> 4, 30 -> 1598, 136 -> 0, 79 -> 2003, 131 -> 0",
    "isprimary": "1",
    "latitude": 24.99225,
    "listenerdatetime": 1679311155585,
    "locationbit": 1,
    "locationen": "Dubai, Mena Jabal Ali",
    "longitude": 55.0920783,
    "mainpower": 28.764,
    "odometer": 164295.509,
    "panic": 0,
    "providertenantuids": "",
    "reason": 0,
    "recordstatus": 1,
    "satellites": 11,
    "speed": 0,
    "temperature1": 0,
    "temperature2": 0,
    "tenantgroupuid": "4",
    "tenantuid": "2",
    "uniqueid": 19532
  },
  {
    "altitude": 16.3,
    "analog1": 0.016,
    "analog2": 0.005,
    "batchprocessgroupid": 0,
    "batterylevel": 2.645,
    "bleid": "",
    "command": 3,
    "devicedatetime": 1679325626000,
    "deviceid": "863071015139949",
    "digital1": 0,
    "digital2": 0,
    "gpsdirection": 307.9,
    "gpsfix": 1,
    "hdop": 1,
    "ibutton": "47000019DBC1D001",
    "ignitionstatus": 1,
    "ioeventid": 9,
    "iostring": "5 -> 1, 78 -> 2003, 29 -> 28793, 28 -> 1, 70 -> 0000000000000000, 137 -> 0, 65 -> 164295532, 173 -> 1, 134 -> 0, 2 -> 0, 32 -> 38, 34 -> 47000019DBC1D001, 22 -> 16, 27 -> 20, 71 -> 0000000000000000, 49 -> 255, 130 -> 5, 135 -> 0, 3 -> 0, 150 -> 42402, 139 -> 55, 23 -> 5, 30 -> 2645, 136 -> 0, 79 -> 2003, 131 -> 0",
    "isprimary": "1",
    "latitude": 24.9923233,
    "listenerdatetime": 1679311210701,
    "locationbit": 1,
    "locationen": "Dubai, Mena Jabal Ali",
    "longitude": 55.0921766,
    "mainpower": 28.793,
    "odometer": 164295.532,
    "panic": 0,
    "providertenantuids": "",
    "reason": 0,
    "recordstatus": 1,
    "satellites": 11,
    "speed": 5,
    "temperature1": 0,
    "temperature2": 0,
    "tenantgroupuid": "4",
    "tenantuid": "2",
    "uniqueid": 31628
  }
]

Expected output should look like:

[
  {
    "deviceid": "863071015139949",
    "count": 2,
    "devicedata": [
      {
        "altitude": 19.1,
        "analog1": 0.016,
        "analog2": 0.004,
        "batchprocessgroupid": 0,
        "batterylevel": 1.598,
        "bleid": "",
        "command": 3,
        "devicedatetime": 1679325571000,
        "deviceid": "863071015139949",
        "digital1": 0,
        "digital2": 0,
        "gpsdirection": 160.1,
        "gpsfix": 1,
        "hdop": 1,
        "ibutton": "47000019DBC1D001",
        "ignitionstatus": 1,
        "ioeventid": 7,
        "iostring": "5 -> 1, 78 -> 2003, 29 -> 28764, 28 -> 1, 70 -> 0000000000000000, 137 -> 0, 65 -> 164295509, 173 -> 1, 134 -> 0, 2 -> 0, 32 -> 38, 34 -> 47000019DBC1D001, 22 -> 16, 27 -> 14, 71 -> 0000000000000000, 49 -> 0, 130 -> 2, 135 -> 0, 3 -> 0, 150 -> 42402, 139 -> 60, 23 -> 4, 30 -> 1598, 136 -> 0, 79 -> 2003, 131 -> 0",
        "isprimary": "1",
        "latitude": 24.99225,
        "listenerdatetime": 1679311155585,
        "locationbit": 1,
        "locationen": "Dubai, Mena Jabal Ali",
        "longitude": 55.0920783,
        "mainpower": 28.764,
        "odometer": 164295.509,
        "panic": 0,
        "providertenantuids": "",
        "reason": 0,
        "recordstatus": 1,
        "satellites": 11,
        "speed": 0,
        "temperature1": 0,
        "temperature2": 0,
        "tenantgroupuid": "4",
        "tenantuid": "2",
        "uniqueid": 19532
      },
      {
        "altitude": 16.3,
        "analog1": 0.016,
        "analog2": 0.005,
        "batchprocessgroupid": 0,
        "batterylevel": 2.645,
        "bleid": "",
        "command": 3,
        "devicedatetime": 1679325626000,
        "deviceid": "863071015139949",
        "digital1": 0,
        "digital2": 0,
        "gpsdirection": 307.9,
        "gpsfix": 1,
        "hdop": 1,
        "ibutton": "47000019DBC1D001",
        "ignitionstatus": 1,
        "ioeventid": 9,
        "iostring": "5 -> 1, 78 -> 2003, 29 -> 28793, 28 -> 1, 70 -> 0000000000000000, 137 -> 0, 65 -> 164295532, 173 -> 1, 134 -> 0, 2 -> 0, 32 -> 38, 34 -> 47000019DBC1D001, 22 -> 16, 27 -> 20, 71 -> 0000000000000000, 49 -> 255, 130 -> 5, 135 -> 0, 3 -> 0, 150 -> 42402, 139 -> 55, 23 -> 5, 30 -> 2645, 136 -> 0, 79 -> 2003, 131 -> 0",
        "isprimary": "1",
        "latitude": 24.9923233,
        "listenerdatetime": 1679311210701,
        "locationbit": 1,
        "locationen": "Dubai, Mena Jabal Ali",
        "longitude": 55.0921766,
        "mainpower": 28.793,
        "odometer": 164295.532,
        "panic": 0,
        "providertenantuids": "",
        "reason": 0,
        "recordstatus": 1,
        "satellites": 11,
        "speed": 5,
        "temperature1": 0,
        "temperature2": 0,
        "tenantgroupuid": "4",
        "tenantuid": "2",
        "uniqueid": 31628
      }
    ]
  }
]

Solution

  • Principally you can get count value by using size function after derived the array("devicedata") such that

    [
      {
        "operation": "shift",
        "spec": {
          "*": {
            "@": "@1,deviceid" // groups the objects by deviceid nested within an array
          }
        }
      },
      {
        "operation": "shift",
        "spec": {
          "*": {
            "$": "deviceid", // generates "deviceid" attribute
            "@": "devicedata"
          }
        }
      },
      {
        "operation": "modify-overwrite-beta",
        "spec": { // creates "count" attribute
          "count": "=size(@(1,devicedata))"
        }
      },
      { // to sort based on the desired output
        "operation": "shift",
        "spec": {
          "deviceid": "&",
          "count": "&",
          "devicedata": "&"
        }
      }
    ]
    

    but this solution is based on the current case which has unique deviceid presented.