Search code examples
jsonapache-nifijolt

Jolt transform spec


I'm making ETL in NiFi, Mongo sends JSON messages to Kafka with structure like this:

{
  "regionPriceEvent": {
    "42": {
      "type": "ACTIVATION",
      "date": "2022-07-02T18:24:50.719Z"
    },
    "55": {
      "type": "ACTIVATION",
      "date": "2022-07-02T18:24:50.719Z"
    }
  },
  "visibilityInRegions": [
    {
      "regionId": "42",
      "visibility": "true"
    },
    {
      "regionId": "66",
      "visibility": "true"
    }
  ]
}

And i need to transform it to structure like this, but don't know how to do that, it looks like full join in SQL:

{
  "regionPriceEvent": [
    {
      "regionId": "42",
      "type": "ACTIVATION",
      "date": "2022-07-02T18:24:50.719Z",
      "visibility": "true"
    },
    {
      "regionId": "55",
      "type": "ACTIVATION",
      "date": "2022-07-02T18:24:50.719Z",
      "visibility": ""
    },
    {
      "regionId": "66",
      "type": "",
      "date": "",
      "visibility": "true"
    }
  ]
}

Is it possible to do? Or i'm just wasting a time? Here is my spec:

[
  {
    "operation": "shift",
    "spec": {
      "visibilityInRegions": {
        "*": {
          "@": "visibilityInRegions.@regionId"
        }
      },
      "*": "&"
    }
  },
  {
    "operation": "remove",
    "spec": {
      "visibilityInRegions": {
        "*": {
          "regionId": ""
        }
      }
    }
  },
  {
    "operation": "shift",
    "spec": {
      "regionPriceEvent": {
        "*": "&"
      },
      "visibilityInRegions": {
        "*": "&"
      }
    }
  }
]

Here is the result:

{
  "42": [
    {
      "type": "ACTIVATION",
      "date": "2022-07-02T18:24:50.719Z"
    },
    {
      "visibility": "true"
    }
  ],
  "55": {
    "type": "ACTIVATION",
    "date": "2022-07-02T18:24:50.719Z"
  },
  "66": {
    "visibility": "true"
  }
}

Solution

  • Yes, it's possible to do such as

    [
      {
        // Collect all attributes under common "regionId" values 
        "operation": "shift",
        "spec": {
          "reg*": {
            "@": "&"
          },
          "*": {
            "*": {
              "*": "regionPriceEvent.@(1,regionId).&"
            }
          }
        }
      },
      {
        "operation": "default",
        "spec": {
          "*": {
            "*": {
              // Default all "subobjects" to have a these keys
              "type": "",
              "date": "",
              "visibility": ""
            }
          }
        }
      },
      {
        // complete the missing attributes called "regionId"
        "operation": "shift",
        "spec": {
          "*": {
            "*": {
              "$": "&2.&1.regionId"
            },
            "@": "&"
          }
        }
      },
      {
        // get rid of the labels of the objects 
        "operation": "shift",
        "spec": {
          "*": {
            "*": ""
          }
        }
      },
      {
       // pick only single one of the repeating "regionId" values 
        "operation": "cardinality",
        "spec": {
          "*": {
            "*": "ONE"
          }
        }
      }
    ]
    

    the demo on the site http://jolt-demo.appspot.com/ is :

    enter image description here