In source we have a variant column (events) and we need to sum up metrics (calls, impressions, leads, visits, etc.) in that column.
Tricky part here is that variant column can have new metrics added anytime and we dont want to update our code (stored procedure) every time.
Is there a way we can dynamically add metrics within variant column(events) to target variant column(total_events) without any code change?
Source:
SQL for setting up source data
CREATE OR REPLACE TABLE user_activity (
ACTIVITY DATE,
ID NUMBER ,
NAME VARCHAR ,
EVENTS VARIANT
);
INSERT INTO user_activity (activity, id, name, events)
SELECT '2023-09-05'::date, 101, 'Mickey Mouse', PARSE_JSON('{ "imps": 1, "visits": 1, "calls": 0, "leads": 0 }');
INSERT INTO user_activity (activity, id, name, events)
SELECT '2023-09-05'::date, 102, 'Mickey Mouse', PARSE_JSON('{ "imps": 1, "visits": 1, "calls": 0, "leads": 0 }');
INSERT INTO user_activity (activity, id, name, events)
SELECT '2023-09-05'::date, 103, 'Mickey Mouse', PARSE_JSON('{ "imps": 1, "visits": 0, "calls": 1, "leads": 0 }');
INSERT INTO user_activity (activity, id, name, events)
SELECT '2023-09-05'::date, 104, 'Mickey Mouse', PARSE_JSON('{ "imps": 1, "visits": 0, "calls": 0, "leads": 1 }');
INSERT INTO user_activity (activity, id, name, events)
SELECT '2023-09-06'::date, 105, 'Mickey Mouse', PARSE_JSON('{ "imps": 1, "visits": 0, "calls": 0, "leads": 1, "service": 1 }');
INSERT INTO user_activity (activity, id, name, events)
SELECT '2023-09-06'::date, 106, 'Mickey Mouse', PARSE_JSON('{ "imps": 1, "visits": 1, "calls": 0 }');
SELECT * FROM user_activity ORDER BY 1,2;
Here is work in progress (from another post, apologies for duplicated entries):
select
activity as activity
, name as name
, OBJECT_CONSTRUCT(
'calls', sum(a.calls::NUMBER(2,1)),
'imps', sum(a.imps::NUMBER(2,1)),
'leads', sum(a.leads::NUMBER(2,1)),
'visits', sum(a.visits::NUMBER(2,1)),
'service', sum(a.service::NUMBER(2,1))
) AS total_events
from (
SELECT
activity AS activity
, name AS name
, events AS events
, events:calls AS calls
, events:imps AS imps
, events:leads AS leads
, events:visits AS visits
, events:service AS service
FROM user_activity,
LATERAL FLATTEN(events, OUTER=> TRUE) f
GROUP BY 1,2,3
) a
group by 1,2
order by 1,2,3;
I want SQL something like this (dummy sql) where I don't have to list down metrics embedded in variant column.
SELECT activity, SUM(events) AS total_events
FROM user_activity
GROUP BY activity;
This probably gives you what you want:
with flat_data as (
SELECT
activity AS activity
, name AS name
,f.key
,f.value
FROM user_activity,
LATERAL FLATTEN(events, OUTER=> TRUE) f
)
, sum_data as (
SELECT
activity
,name
,key
,sum(value)::INTEGER as value
FROM
flat_data
GROUP BY
activity, name, key
)
SELECT
activity
,name
,OBJECT_AGG(key, value) TOTAL_EVENTS
from sum_data
group by activity, name
;