sql snowflake-cloud-data-platform flatten

Convert Nested Array into Columns in Snowflake

I have a table column with nested arrays in a Snowflake database. I want to convert the nested array into columns in the manner shown below in Snowflake SQL.

Table Name: SENSOR_DATA

The RX column is of data type VARIANT. The nested arrays will not always be 3 as shown below. There are cases where there are 20,000 nested arrays, and other cases where there are none.

| ID |             RX              |
|----|-----------------------------|
| 1  |[[0, 15], [1, 50], [2, 34.2]]|
| 2  |[[0, 20], [1, 25]]           |
| 3  |[[0, 100], [1, 42], [2, 76]] |

I want to achieve something like this from the table above:

| ID |Col0 | Col1| Col2|
|----|-----|-----|-----|
| 1  |  15 |  50 | 34.2|
| 2  |  20 |  25 | NULL|
| 3  | 100 |  42 | 76  |

Solution

Lets recreate that table:

create table sensor_data as 
  select column1 id, parse_json(column2) rx
  from values (1, '[[0, 15], [1, 50], [2, 34.2]]')
             ,(2, '[[0, 20], [1, 25]]')
             ,(3, '[[0, 100], [1, 42], [2, 76]]');

Then get the distinct column keys that you have:

select distinct r.value[0] from sensor_data, table(flatten(input=>rx)) r;

R.VALUE[0]
0
1
2

Given we want this to be dynamic, the next step is just to check with static SQL it gives us the correct answer:

 select id
    ,max(iff(r.value[0] = 0, r.value[1], null)) as col_0
    ,max(iff(r.value[0] = 1, r.value[1], null)) as col_1
    ,max(iff(r.value[0] = 2, r.value[1], null)) as col_2
 from sensor_data, table(flatten(input=>rx)) r
 group by 1
 order by 1;

ID	COL_0	COL_1	COL_2
1	15	50	34.2
2	20	25	null
3	100	42	76

write now to do this dynamically:

declare
  sql string;
  c1 cursor for select distinct r.value[0] as key from sensor_data, table(flatten(input=>rx)) r;
begin
  sql := 'select id ';
  for record in c1 do
    sql := sql || ',max(iff(r.value[0] = '|| record.key::text ||', r.value[1], null)) as col_' || record.key::text;
  end for;
  sql := sql || ' from sensor_data, table(flatten(input=>rx)) r  group by 1  order by 1';
  return sql;
end;

which gives us:

select id ,max(iff(r.value[0] = 0, r.value1, null)) as col_0,max(iff(r.value[0] = 1, r.value1, null)) as col_1,max(iff(r.value[0] = 2, r.value1, null)) as col_2 from sensor_data, table(flatten(input=>rx)) r group by 1 order by 1

which when run gives us the expected results.

so now we want to run that:

declare
  sql string;
  res resultset;
  c1 cursor for select distinct r.value[0] as key from sensor_data, table(flatten(input=>rx)) r;
begin
  sql := 'select id ';
  for record in c1 do
    sql := sql || ',max(iff(r.value[0] = '|| record.key::text ||', r.value[1], null)) as col_' || record.key::text;
  end for;
  sql := sql || ' from sensor_data, table(flatten(input=>rx)) r  group by 1  order by 1';
  
  res := (execute immediate :sql);
  return table (res);
end;

gives:

ID	COL_0	COL_1	COL_2
1	15	50	34.2
2	20	25	null
3	100	42	76

based of code from these sections of the manual:

Working with loops

Working with Resultsets

With extra "tricky data" and the mentioned ORDER BY:

create or replace table sensor_data as 
  select column1 id, parse_json(column2) rx
  from values (1, '[[0, 15], [1, 50], [2, 34.2]]')
             ,(2, '[[0, 20], [1, 25]]')
             ,(3, '[[0, 100], [1, 42], [2, 76]]')
             ,(4, '[[0,20],[30,50], [45, 100]]');

declare
  sql string;
  res resultset;
  c1 cursor for select distinct r.value[0] as key from sensor_data, table(flatten(input=>rx)) r order by key;
begin
  sql := 'select id ';
  for record in c1 do
    sql := sql || ',max(iff(r.value[0] = '|| record.key::text ||', r.value[1], null)) as col_' || record.key::text;
  end for;
  sql := sql || ' from sensor_data, table(flatten(input=>rx)) r  group by 1  order by 1';
  
  res := (execute immediate :sql);
  return table (res);
end;

gives:

ID	COL_0	COL_1	COL_2	COL_30	COL_45
1	15	50	34.2	null	null
2	20	25	null	null	null
3	100	42	76	null	null
4	20	null	null	50	100

So yes, but that's the MAX doing:

so if we go back in to original data

create or replace table sensor_data as 
  select column1 id, parse_json(column2) rx
  from values (1, '[[0, 15], [1, 50], [2, 34.2]]')
             ,(2, '[[0, 20], [1, 25]]')
             ,(3, '[[0, 100], [1, 42], [2, 76]]');

and alter the code to not have the MAX and the GROUP BY

 select id
    ,iff(r.value[0] = 0, r.value[1], null) as col_0
    ,iff(r.value[0] = 1, r.value[1], null) as col_1
    ,iff(r.value[0] = 2, r.value[1], null) as col_2
 from sensor_data, table(flatten(input=>rx)) r
 order by 1;

we see:

ID	COL_0	COL_1	COL_2
1	15	null	null
1	null	50	null
1	null	null	34.2
2	20	null	null
2	null	25	null
3	100	null	null
3	null	42	null
3	null	null	76

so we can see those values getting unrolled, now the GROUP BY id, is going to roll those three id's up, thus for col0 we have 15, null, null MAX takes the higest value, and 15 is higher that null so that is what is keep. This is the process this is working on.