Search code examples
sqlsnowflake-cloud-data-platformaggregatevariant

Variant Column Aggregation


In source we have a variant column (events) and we need to sum up metrics (calls, impressions, leads, visits, etc.) in that column.

Tricky part here is that variant column can have new metrics added anytime and we dont want to update our code (stored procedure) every time.

Is there a way we can dynamically add metrics within variant column(events) to target variant column(total_events) without any code change? Source: enter image description here

Required Target: enter image description here

SQL for setting up source data

CREATE OR REPLACE TABLE user_activity (
    ACTIVITY DATE,
    ID       NUMBER  , 
    NAME     VARCHAR , 
    EVENTS   VARIANT
);

INSERT INTO user_activity (activity, id, name, events) 
SELECT '2023-09-05'::date, 101, 'Mickey Mouse', PARSE_JSON('{   "imps": 1,   "visits": 1,   "calls": 0,   "leads": 0 }');

INSERT INTO user_activity (activity, id, name, events) 
SELECT '2023-09-05'::date, 102, 'Mickey Mouse', PARSE_JSON('{   "imps": 1,   "visits": 1,   "calls": 0,   "leads": 0 }');

INSERT INTO user_activity (activity, id, name, events)
SELECT '2023-09-05'::date, 103, 'Mickey Mouse', PARSE_JSON('{   "imps": 1,   "visits": 0,   "calls": 1,   "leads": 0 }');

INSERT INTO user_activity (activity, id, name, events)
SELECT '2023-09-05'::date, 104, 'Mickey Mouse', PARSE_JSON('{   "imps": 1,   "visits": 0,   "calls": 0,   "leads": 1 }');

INSERT INTO user_activity (activity, id, name, events)
SELECT '2023-09-06'::date, 105, 'Mickey Mouse', PARSE_JSON('{   "imps": 1,   "visits": 0,   "calls": 0,   "leads": 1,   "service": 1 }');

INSERT INTO user_activity (activity, id, name, events)
SELECT '2023-09-06'::date, 106, 'Mickey Mouse', PARSE_JSON('{   "imps": 1,   "visits": 1,   "calls": 0 }');

SELECT * FROM user_activity ORDER BY 1,2;

Here is work in progress (from another post, apologies for duplicated entries):

select 
    activity     as activity
  , name         as name
  , OBJECT_CONSTRUCT( 
                  'calls',   sum(a.calls::NUMBER(2,1)),
                  'imps',    sum(a.imps::NUMBER(2,1)),
                  'leads',   sum(a.leads::NUMBER(2,1)),
                  'visits',  sum(a.visits::NUMBER(2,1)),
                  'service', sum(a.service::NUMBER(2,1))
                  ) AS total_events
from (
    SELECT
        activity       AS activity
      , name           AS name
      , events         AS events 
      , events:calls   AS calls
      , events:imps    AS imps
      , events:leads   AS leads
      , events:visits  AS visits
      , events:service AS service
    FROM user_activity,
    LATERAL FLATTEN(events, OUTER=> TRUE) f
    GROUP BY 1,2,3
) a
group by 1,2
order by 1,2,3;

I want SQL something like this (dummy sql) where I don't have to list down metrics embedded in variant column.

SELECT activity, SUM(events) AS total_events
FROM user_activity
GROUP BY activity;

Solution

  • This probably gives you what you want:

    with flat_data as (
    SELECT
            activity       AS activity
          , name           AS name
          ,f.key
          ,f.value
        FROM user_activity,
        LATERAL FLATTEN(events, OUTER=> TRUE) f
    )
    , sum_data as (
    SELECT
        activity
        ,name
        ,key
        ,sum(value)::INTEGER as value
    FROM
        flat_data
    GROUP BY
        activity, name, key
    )
    SELECT
        activity
        ,name
        ,OBJECT_AGG(key, value) TOTAL_EVENTS
    from sum_data
    group by activity, name
    ;