How to use base SQL to parse a very, very long string into many columns by a delimiter? I’ve been through a few posts (here for example, and here or here) on parsing into separate columns, rows, et cetera. They all however seem to manually identify and create each column. (Here, for example, the example includes three columns, the maximum number needed). I need up to 90 columns. I’m looking for a way to consolidate, or functionalize, the parsing into multiple columns so I don’t need to repeat the equivalent of ‘,nullif(split_part(my_string,'|',N),'') string_N’ from below 90 times.
with test_data (id, my_string) as
(
select 1, 'a|b|c|d' union all
select 2, 'abba|zabba|beta' union all
select 5, 'x|y' union all
select 4, 'z1|z2|z3'
)
select
id
,nullif(split_part(my_string,'|',1),'') string_1
,nullif(split_part(my_string,'|',2),'') string_2
,nullif(split_part(my_string,'|',3),'') string_3
,nullif(split_part(my_string,'|',4),'') string_4
from test_data ;
I tried building the solution manually, as above. Doing it for very, very long strings would create problems for me. The posted SQL solutions aren't getting there and I can't use python.
Taking pivot based technique a little further and combining it with some scripting, we can get around the problem of having to use "'column_name'"
instead of just column_name
.
--dummy data
create or replace temporary table t (id, my_string) as
select 1, 'a|b|c|d' union all
select 2, 'abba|zabba|beta' union all
select 5, 'x|y' union all
select 4, 'z1|z2|z3';
--start with a clone
create or replace temporary table t_split clone t;
--add additional varchar columns (string_1 through string_N) dynamically
declare
rs resultset;
num_cols int;
cols_def varchar;
begin
num_cols:= (select max(regexp_count(my_string,'[|]')) from t_split) + 1;
cols_def:= (select regexp_replace(array_to_string(array_generate_range(1,:num_cols+1),','),'([0-9]+)','string_\\1 varchar'));
rs:= (execute immediate 'alter table t_split add column '||:cols_def);
return table(rs);
end;
--insert to table
insert overwrite into t_split
with cte as
(select a.*, b.index as cols, b.value
from t a, lateral split_to_table(my_string,'|') as b)
select *
from cte
pivot (max(value) for cols in (any order by cols))
order by id;
--take a look
select * from t_split;