Search code examples
groovy

Join values from array as keys to values from another array


From this JSON below:

{
   "query":{
      "ids":[
         95445875
      ],
      "group": "day",
      "dimensions":[
         "ym:s:date",
         "ym:s:lastSignUTMSource",
         "ym:s:lastSignUTMMedium",
         "ym:s:lastSignUTMCampaign",
         "ym:s:lastSignUTMContent",
         "ym:s:lastSignUTMTerm"
      ],
      "metrics":[
         "ym:s:goal315094773visits",
         "ym:s:goal318656867visits"
      ]
   },
   "data":[
      {
         "dimensions":[
            {
               "name":"2024-01-12"
            },
            {
               "name":"5ka"
            },
            {
               "name":"SITE"
            },
            {
               "name":"5ka_HR_nov23"
            },
            {
               "name":"button"
            },
            {
               "name":"click"
            }
         ],
         "metrics":[
            64.0,
            2.0
         ]
      },
      {
         "dimensions":[
            {
               "name":"2024-01-13"
            },
            {
               "name":"5ka"
            },
            {
               "name":"SITE"
            },
            {
               "name":"5ka_HR_nov23"
            },
            {
               "name":"button"
            },
            {
               "name":"click"
            }
         ],
         "metrics":[
            13.0,
            0.0
         ]
      },
      {
         "dimensions":[
            {
               "name":"2024-01-13"
            },
            {
               "name":"Yandex"
            },
            {
               "name":"SEM"
            },
            {
               "name":"5ka_HR_oct-dec'23"
            },
            {
               "name":"TGB"
            },
            {
               "name":"shop"
            }
         ],
         "metrics":[
            5.0,
            4.0
         ]
      },
      {
         "dimensions":[
            {
               "name":"2024-01-12"
            },
            {
               "name":"Yandex"
            },
            {
               "name":"SEM"
            },
            {
               "name":"5ka_HR_oct-dec'23"
            },
            {
               "name":"TGB"
            },
            {
               "name":"shop"
            }
         ],
         "metrics":[
            3.0,
            0.0
         ]
      }
   ],
   "total_rows":4
}

I expect to get this result:

[
   {
      "counter_id":95445875,
      "date_of_visit":"2024-01-12",
      "utm_source":"5ka",
      "utm_medium":"SITE",
      "utm_campaign":"5ka_HR_nov23",
      "utm_content":"button",
      "utm_term":"click",
      "goal_id":"315094773",
      "conversions":64.0
   },
   {
      "counter_id":95445875,
      "date_of_visit":"2024-01-12",
      "utm_source":"5ka",
      "utm_medium":"SITE",
      "utm_campaign":"5ka_HR_nov23",
      "utm_content":"button",
      "utm_term":"click",
      "goal_id":"318656867",
      "conversions":2.0
   },
   {
      "counter_id":95445875,
      "date_of_visit":"2024-01-13",
      "utm_source":"5ka",
      "utm_medium":"SITE",
      "utm_campaign":"5ka_HR_nov23",
      "utm_content":"button",
      "utm_term":"click",
      "goal_id":"315094773",
      "conversions":13.0
   },
   {
      "counter_id":95445875,
      "date_of_visit":"2024-01-13",
      "utm_source":"5ka",
      "utm_medium":"SITE",
      "utm_campaign":"5ka_HR_nov23",
      "utm_content":"button",
      "utm_term":"click",
      "goal_id":"318656867",
      "conversions":0.0
   },
   {
      "counter_id":95445875,
      "date_of_visit":"2024-01-12",
      "utm_source":"Yandex",
      "utm_medium":"SEM",
      "utm_campaign":"5ka_HR_oct-dec'23",
      "utm_content":"TGB",
      "utm_term":"shop",
      "goal_id":"315094773",
      "conversions":5.0
   },
   {
      "counter_id":95445875,
      "date_of_visit":"2024-01-12",
      "utm_source":"Yandex",
      "utm_medium":"SEM",
      "utm_campaign":"5ka_HR_oct-dec'23",
      "utm_content":"TGB",
      "utm_term":"shop",
      "goal_id":"318656867",
      "conversions":4.0
   },
   {
      "counter_id":95445875,
      "date_of_visit":"2024-01-13",
      "utm_source":"Yandex",
      "utm_medium":"SEM",
      "utm_campaign":"5ka_HR_oct-dec'23",
      "utm_content":"TGB",
      "utm_term":"shop",
      "goal_id":"315094773",
      "conversions":3.0
   },
   {
      "counter_id":95445875,
      "date_of_visit":"2024-01-13",
      "utm_source":"Yandex",
      "utm_medium":"SEM",
      "utm_campaign":"5ka_HR_oct-dec'23",
      "utm_content":"TGB",
      "utm_term":"shop",
      "goal_id":"318656867",
      "conversions":0.0
   }
]

Explanation

From souce JSON I need:

  • id from query.ids array and call new property counter_id
  • compare and join values from array query.dimensions and values from array data.dimensions, but give them new names. Values from query.dimensions should be property names for values from data.dimensions (we need this to keep right order). After this I want to change these names: ym:s:date should be date_of_visit, ym:s:lastSignUTMSource should be utm_source, ym:s:lastSignUTMMedium to utm_medium, ym:s:lastSignUTMCampaign to utm_campaign, ym:s:lastSignUTMContent to utm_content, ym:s:lastSignUTMTerm to utm_term.
  • compare and join values from query.metrics with values from array data.metrics. But each element from array query.metrics. f.e. ym:s:goal315094773visits should be written as goal_id: 315094773 (we need value between goal....visits). And value from data.metrics should be written as conversions.

I tried to write a Groovy script:

def inputJson = """
"""

def parseMetric(metric) {
    def matcher = metric =~ /ym:s:goal(\d+)visits/
    return ["goal_id": matcher[0][1].toLong(), "conversions": null]
}

def transform(json) {
    def query = json.query
    def data = json.data

    def result = []

    data.each { entry ->
        def dimensionsMap = [:]
        def metricsMap = [:]

        query.dimensions.eachWithIndex { dim, index ->
            def dimName = dim =~ /ym:s:(\S+)/
            dimensionsMap[dimName[0][1]] = entry.dimensions[index].name
        }

        query.metrics.eachWithIndex { metric, index ->
            def parsedMetric = parseMetric(metric)
            parsedMetric.conversions = entry.metrics[index]
            metricsMap.putAll(parsedMetric)
        }

        def entryResult = [
                "counter_id": query.ids[0],
                "date_of_visit": dimensionsMap['date'],
                "utm_source": dimensionsMap['lastSignUTMSource'],
                "utm_medium": dimensionsMap['lastSignUTMMedium'],
                "utm_campaign": dimensionsMap['lastSignUTMCampaign'],
                "utm_content": dimensionsMap['lastSignUTMContent'],
                "utm_term": dimensionsMap['lastSignUTMTerm'],
        ]

        entryResult.putAll(metricsMap)
        result.add(entryResult)
    }

    return result
}

def jsonSlurper = new groovy.json.JsonSlurper()
def input = jsonSlurper.parseText(inputJson)

def output = transform(input)
def outputJson = groovy.json.JsonOutput.toJson(output)
println outputJson

But I got result not I expected:

result

Seems like it grab only flast values query.metrics. So I only got result for ym:s:goal315094773visits. Second value inside arrays ignored. How to fix it?


Solution

  • You could write your transform method like this:

    def transform(json) {
        def query = json.query
        def dims = query.dimensions
    
        json.data.collectMany { entry ->
            def dimensionsMap = dims.indices.collectEntries { idx ->
                [(dims[idx] =~ /ym:s:(\S+)/)[0][1], entry.dimensions[idx].name]
            }
    
            def metrics = query.metrics.indexed().collect { idx, metric ->
                [goal_id: (metric =~ /ym:s:goal(\d+)visits/)[0][1].toLong(),
                 conversions: entry.metrics[idx]]
            }
    
            metrics.collect {[
                counter_id: query.ids[0],
                date_of_visit: dimensionsMap['date'],
                utm_source: dimensionsMap['lastSignUTMSource'],
                utm_medium: dimensionsMap['lastSignUTMMedium'],
                utm_campaign: dimensionsMap['lastSignUTMCampaign'],
                utm_content: dimensionsMap['lastSignUTMContent'],
                utm_term: dimensionsMap['lastSignUTMTerm'],
                *:it
            ]}
        }
    }
    

    Plus delete your parseMetric method.