Search code examples
jsonjolt

Compare arrays and join values together


Source JSON:

{
  "query": {
    "ids": [
      44911681
    ],
    "dimensions": [
      "ym:s:date",
      "ym:s:cross_device_last_significantUTMSource",
      "ym:s:cross_device_last_significantUTMMedium",
      "ym:s:cross_device_last_significantUTMCampaign"
    ],
    "metrics": [
      "ym:s:goal3044911681reaches",
      "ym:s:goal3044911681visits",
      "ym:s:goal5130447458reaches",
      "ym:s:goal5130447458visits"
    ],
    "date1": "2024-10-01",
    "date2": "2024-10-05"
  },
  "data": [
    {
      "dimensions": [
        {
          "name": "2024-10-03"
        },
        {
          "name": "Yandex_network"
        },
        {
          "name": "cpc"
        },
        {
          "name": "Spar_october_2024"
        }
      ],
      "metrics": [
        5,
        4,
        12,
        7
      ]
    },
    {
      "dimensions": [
        {
          "name": "2024-10-04"
        },
        {
          "name": "Yandex_network"
        },
        {
          "name": "cpc"
        },
        {
          "name": "Spar_october_2024"
        }
      ],
      "metrics": [
        5,
        5,
        3,
        9
      ]
    }
  ],
  "total_rows": 2
}

From souce JSON I need:

  • id from query.ids array and call new property counter_id
  • compare and join values from array query.dimensions and values from array data.dimensions, but give them new names. Values from query.dimensions should be property names for values from data.dimensions (we need this to keep right order). After this I want to change these names: ym:s:date should be date_of_visit, ym:s:lastSignUTMSource should be utm_source, ym:s:lastSignUTMMedium to utm_medium, ym:s:lastSignUTMCampaign to utm_campaign
  • compare and join values from query.metrics with values from array data.metrics. But each element from array query.metrics, for example ym:s:goal304491168visits (or goal3044911681reaches) should be written as goal_id: 304491168. And elements from array data.metrics should be written as: 1) as conversions (where metric name ends with visits) 2) as goal_reaches (where metric name ends as reaches)

So, based on this JSON and this description I expect:

[
  {
    "counter_id": 44911681,
    "date_of_visit": "2024-10-03",
    "utm_source": "Yandex_network",
    "utm_medium": "cpc",
    "utm_campaign": "Spar_october_2024",
    "goal_id": 3044911681,
    "conversions": 5,
    "goal_reaches": 4
  },
  {
    "counter_id": 44911681,
    "date_of_visit": "2024-10-03",
    "utm_source": "Yandex_network",
    "utm_medium": "cpc",
    "utm_campaign": "Spar_october_2024",
    "goal_id": 5130447458,
    "conversions": 12,
    "goal_reaches": 7
  },
  {
    "counter_id": 44911681,
    "date_of_visit": "2024-10-04",
    "utm_source": "Yandex_network",
    "utm_medium": "cpc",
    "utm_campaign": "Spar_october_2024",
    "goal_id": 3044911681,
    "conversions": 5,
    "goal_reaches": 5
  },
  {
    "counter_id": 44911681,
    "date_of_visit": "2024-10-04",
    "utm_source": "Yandex_network",
    "utm_medium": "cpc",
    "utm_campaign": "Spar_october_2024",
    "goal_id": 5130447458,
    "conversions": 3,
    "goal_reaches": 9
  }
]

I was only able to build this JOLT config, that matches dimensions array. I need to extract id from goal and match metrics array.

[
  {
    "operation": "shift",
    "spec": {
      "data": {
        "*": {
          "metrics": {
            "*": {
              "@4,query.ids[0]": "&3_&.counter_id",
              "@2,dimensions[0].name": "&3_&.date_of_visit",
              "@2,dimensions[1].name": "&3_&.utm_source",
              "@2,dimensions[2].name": "&3_&.utm_medium",
              "@2,dimensions[3].name": "&3_&.utm_campaign"
            }
          }
        }
      }
    }
  },
  {
    "operation": "shift",
    "spec": {
      "*": ""
    }
  }
]

How to add these metrics and id from goal...visits?


Solution

  • You can use the following transformation spec :

    [
      { //match dimensions vs. metrics
        "operation": "shift",
        "spec": {
          "@query.ids": {
            "*": "counter_id"
          },
          "data": {
            "*": {
              "*ens*s": { //abbreviate the object name to "dim" by using &(2,1) below
                "*": {
                  "*": "&3.&(2,1).@(5,query.&2[&1])"
                }
              },
              "metrics": {
                "*": "&2.@(4,query.&1[&])"
              }
            }
          }
        }
      },
      { //key prunings, renamings and groupings occur here
        "operation": "shift",
        "spec": {
          "counter_id": { "": "" }, //meanwhile get rid of this one
          "*": {
            "dim": {
              "@2,counter_id": "&2.&1.counter_id",
              "ym:s:*": "&2.&1.&(0,1)_of_visit",
              "ym:s:cross_device_last_significantUTM*": "&2.&1.utm_&(0,1)"
            },
            "ym:s:goal*r*": "&1.&(0,1).goal_r&(0,2)", //group by goal_id values
            "ym:s:goal*v*": "&1.&(0,1).v&(0,2)"
          }
        }
      },
      {
        "operation": "shift",
        "spec": {
          "*": {
            "dim": { "": "" },
            "*": {
              "@1,dim": { "*": "&3_&2.&" },
              "$": "&2_&1.goal_id",
              "*": "&2_&1.&",
              "visits": "&2_&1.conversions" //another renaming
            }
          }
        }
      },
      { //get an array of objects with no key
        "operation": "shift",
        "spec": {
          "*": "[]"
        }
      }
    ]
    ´´´