Search code examples
oraclesplitquotesbracketsregexp-substr

oracle regular expression split but workaround brackets


I have a string like : "Att1:Val1,[Txt1,Txt2:Txt3]:Val2"
Using Oracle Sql, I would like to achieve a split into rows & columns as below :

lvl substr2 substr3
1 Att1 Val1
2 [Txt1,Txt2:Txt3] Val2

I have tried below code, but can't figure out how not to split by comma and colon the values between the brackets

with WTBL as
(
    select 'Att1:Val1,[Txt1,Txt2:Txt3]:Val2,' as WCLN
    from dual
)
select lvl, substr1, substr2, substr3, WCLN
from WTBL
cross join xmltable('if (contains($PRM,",")) 
        then
            let $list := ora:tokenize($PRM, ","),
                $cnt := count($list)
          for $val at $r in $list 
          where $r < $cnt
          return $val
        else $PRM'
  passing WCLN as PRM
  columns substr1 varchar2(4000) path '.'
    ,substr2 varchar2(4000) path 'if (contains( . , ":")) then
            let $list := ora:tokenize( . ,":"),
                $cnt := count($list)
          for $val at $r in $list
          where $r = $cnt - 1
          return $val
        else . '
    ,substr3 varchar2(4000) path 'if (contains( . , ":")) then
            let $list := ora:tokenize( . ,":"),
                $cnt := count($list)
          for $val at $r in $list
          where $r = $cnt
          return $val
        else . '
     ,lvl FOR ORDINALITY
) xm

Your help is much appreciated!
Vlad


Solution

  • You can use the regular expression (\[.*?\]|.*?):(.*?)(,|$) and a recursive sub-query:

    WITH matches (value, lvl, substr1, substr2, epos) AS (
      SELECT value,
             1,
             REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', 1, 1, NULL, 1),
             REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', 1, 1, NULL, 2),
             REGEXP_INSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', 1, 1, 1)
      FROM   table_name
    UNION ALL
      SELECT value,
             lvl + 1,
             REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', epos, 1, NULL, 1),
             REGEXP_SUBSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', epos, 1, NULL, 2),
             REGEXP_INSTR(value, '(\[.*?\]|.*?):(.*?)(,|$)', epos, 1, 1)
      FROM   matches
      WHERE  epos > 0
    )
    SELECT lvl,
           substr1,
           substr2
    FROM   matches
    WHERE  epos > 0;
    

    Or, simple (faster) string functions:

    WITH matches (value, lvl, spos, mpos, epos) AS (
      SELECT value,
             1,
             1,
             CASE
             WHEN SUBSTR(value, 1, 1) = '['
             THEN INSTR(value, ']:', 1) + 1
             ELSE INSTR(value, ':', 1)
             END,
             INSTR(
               value,
               ',',
               CASE
               WHEN SUBSTR(value, 1, 1) = '['
               THEN INSTR(value, ']:', 1) + 1
               ELSE INSTR(value, ':', 1)
               END
             )
      FROM   table_name
    UNION ALL
      SELECT value,
             lvl + 1,
             epos + 1,
             CASE
             WHEN SUBSTR(value, epos + 1, 1) = '['
             THEN INSTR(value, ']:', epos + 1) + 1
             ELSE INSTR(value, ':', epos + 1)
             END,
             INSTR(
               value,
               ',',
               CASE
               WHEN SUBSTR(value, epos + 1, 1) = '['
               THEN INSTR(value, ']:', epos + 1) + 1
               ELSE INSTR(value, ':', epos + 1)
               END
             )
      FROM   matches
      WHERE  epos > 0
    )
    SELECT lvl,
           SUBSTR(value, spos, mpos - spos) AS substr1,
           CASE epos
           WHEN 0
           THEN SUBSTR(value, mpos + 1)
           ELSE SUBSTR(value, mpos + 1, epos - mpos - 1)
           END AS substr2
    FROM   matches;
    

    Which, for the sample data:

    CREATE TABLE table_name (value) AS
    SELECT 'Att1:Val1,[Txt1,Txt2:Txt3]:Val2' FROM DUAL;
    

    Both output:

    LVL SUBSTR1 SUBSTR2
    1 Att1 Val1
    2 [Txt1,Txt2:Txt3] Val2

    db<>fiddle here