I have a table column with nested arrays in a Snowflake database. I want to convert the nested array into columns in the manner shown below in Snowflake SQL.
Table Name: SENSOR_DATA
The RX column is of data type VARIANT. The nested arrays will not always be 3 as shown below. There are cases where there are 20,000 nested arrays, and other cases where there are none.
| ID | RX |
|----|-----------------------------|
| 1 |[[0, 15], [1, 50], [2, 34.2]]|
| 2 |[[0, 20], [1, 25]] |
| 3 |[[0, 100], [1, 42], [2, 76]] |
I want to achieve something like this from the table above:
| ID |Col0 | Col1| Col2|
|----|-----|-----|-----|
| 1 | 15 | 50 | 34.2|
| 2 | 20 | 25 | NULL|
| 3 | 100 | 42 | 76 |
Lets recreate that table:
create table sensor_data as
select column1 id, parse_json(column2) rx
from values (1, '[[0, 15], [1, 50], [2, 34.2]]')
,(2, '[[0, 20], [1, 25]]')
,(3, '[[0, 100], [1, 42], [2, 76]]');
Then get the distinct column keys that you have:
select distinct r.value[0] from sensor_data, table(flatten(input=>rx)) r;
R.VALUE[0] |
---|
0 |
1 |
2 |
Given we want this to be dynamic, the next step is just to check with static SQL it gives us the correct answer:
select id
,max(iff(r.value[0] = 0, r.value[1], null)) as col_0
,max(iff(r.value[0] = 1, r.value[1], null)) as col_1
,max(iff(r.value[0] = 2, r.value[1], null)) as col_2
from sensor_data, table(flatten(input=>rx)) r
group by 1
order by 1;
ID | COL_0 | COL_1 | COL_2 |
---|---|---|---|
1 | 15 | 50 | 34.2 |
2 | 20 | 25 | null |
3 | 100 | 42 | 76 |
write now to do this dynamically:
declare
sql string;
c1 cursor for select distinct r.value[0] as key from sensor_data, table(flatten(input=>rx)) r;
begin
sql := 'select id ';
for record in c1 do
sql := sql || ',max(iff(r.value[0] = '|| record.key::text ||', r.value[1], null)) as col_' || record.key::text;
end for;
sql := sql || ' from sensor_data, table(flatten(input=>rx)) r group by 1 order by 1';
return sql;
end;
which gives us:
select id ,max(iff(r.value[0] = 0, r.value1, null)) as col_0,max(iff(r.value[0] = 1, r.value1, null)) as col_1,max(iff(r.value[0] = 2, r.value1, null)) as col_2 from sensor_data, table(flatten(input=>rx)) r group by 1 order by 1
which when run gives us the expected results.
so now we want to run that:
declare
sql string;
res resultset;
c1 cursor for select distinct r.value[0] as key from sensor_data, table(flatten(input=>rx)) r;
begin
sql := 'select id ';
for record in c1 do
sql := sql || ',max(iff(r.value[0] = '|| record.key::text ||', r.value[1], null)) as col_' || record.key::text;
end for;
sql := sql || ' from sensor_data, table(flatten(input=>rx)) r group by 1 order by 1';
res := (execute immediate :sql);
return table (res);
end;
gives:
ID | COL_0 | COL_1 | COL_2 |
---|---|---|---|
1 | 15 | 50 | 34.2 |
2 | 20 | 25 | null |
3 | 100 | 42 | 76 |
based of code from these sections of the manual:
create or replace table sensor_data as
select column1 id, parse_json(column2) rx
from values (1, '[[0, 15], [1, 50], [2, 34.2]]')
,(2, '[[0, 20], [1, 25]]')
,(3, '[[0, 100], [1, 42], [2, 76]]')
,(4, '[[0,20],[30,50], [45, 100]]');
declare
sql string;
res resultset;
c1 cursor for select distinct r.value[0] as key from sensor_data, table(flatten(input=>rx)) r order by key;
begin
sql := 'select id ';
for record in c1 do
sql := sql || ',max(iff(r.value[0] = '|| record.key::text ||', r.value[1], null)) as col_' || record.key::text;
end for;
sql := sql || ' from sensor_data, table(flatten(input=>rx)) r group by 1 order by 1';
res := (execute immediate :sql);
return table (res);
end;
gives:
ID | COL_0 | COL_1 | COL_2 | COL_30 | COL_45 |
---|---|---|---|---|---|
1 | 15 | 50 | 34.2 | null | null |
2 | 20 | 25 | null | null | null |
3 | 100 | 42 | 76 | null | null |
4 | 20 | null | null | 50 | 100 |
so if we go back in to original data
create or replace table sensor_data as
select column1 id, parse_json(column2) rx
from values (1, '[[0, 15], [1, 50], [2, 34.2]]')
,(2, '[[0, 20], [1, 25]]')
,(3, '[[0, 100], [1, 42], [2, 76]]');
and alter the code to not have the MAX and the GROUP BY
select id
,iff(r.value[0] = 0, r.value[1], null) as col_0
,iff(r.value[0] = 1, r.value[1], null) as col_1
,iff(r.value[0] = 2, r.value[1], null) as col_2
from sensor_data, table(flatten(input=>rx)) r
order by 1;
we see:
ID | COL_0 | COL_1 | COL_2 |
---|---|---|---|
1 | 15 | null | null |
1 | null | 50 | null |
1 | null | null | 34.2 |
2 | 20 | null | null |
2 | null | 25 | null |
3 | 100 | null | null |
3 | null | 42 | null |
3 | null | null | 76 |
so we can see those values getting unrolled, now the GROUP BY id, is going to roll those three id's up, thus for col0 we have 15, null, null
MAX takes the higest value, and 15 is higher that null so that is what is keep. This is the process this is working on.