I have
contract, clause 1, Subsection 1.1, contract, clause 1, Subsection 1.2,
paragraph (a), contract, clause 1, Subsection 1.2, paragraph (b), contract,
clause 2
and I want to get
contract, clause 1, Subsection 1.1, Subsection 1.2, paragraph (a), paragraph
(b), clause 2
I've found that regexp can do this but I can not find which string to use to do it
Please help..
Based on this link to split a comma separated value into rows, I splitted the string into rows, kept the position of the first occurence, made a distinct a reaggregated the values
with test_string as (
select 1 as id,
'contract, clause 1, Subsection 1.1, contract, clause 1, Subsection 1.2, paragraph (a), contract, clause 1, Subsection 1.2, paragraph (b), contract, clause 2' val
from dual)
select id, listagg(word,', ') WITHIN GROUP (order by position) FROM (
select distinct id, first_value(position) over ( partition by word order by position ) position, word from (
select
distinct t.id,
levels.column_value as position,
trim(regexp_substr(t.val, '[^,]+', 1, levels.column_value)) as word
from
test_string t,
table(cast(multiset(select level from dual connect by level <= length (regexp_replace(t.val, '[^,]+')) + 1) as sys.OdciNumberList)) levels
)
) GROUP BY id
And if you are not interested in keeping the order
with test_string as (
select 1 as id,
'contract, clause 1, Subsection 1.1, contract, clause 1, Subsection 1.2, paragraph (a), contract, clause 1, Subsection 1.2, paragraph (b), contract, clause 2' val
from dual)
select id, listagg(word,', ') WITHIN GROUP (order by 1) FROM (
select
distinct t.id,
trim(regexp_substr(t.val, '[^,]+', 1, levels.column_value)) as word
from
test_string t,
table(cast(multiset(select level from dual connect by level <= length (regexp_replace(t.val, '[^,]+')) + 1) as sys.OdciNumberList)) levels
) GROUP BY id