Search code examples
regexoracle-databaseregexp-substr

Oracle REGEXP_SUBSTR will not match the dot character


I'm trying to extract information from strings like:

FOO-BAR-AUDIT-DATABASE.NUPKG
FOO.BAR.DATABASE-2.0.0.NUPKG

to info like:

'FOO.BAR.DATABASE' '2.0.0'
        |             |
   module_name     version

Currently I'm not able to parse correctly when the module_name part contains . chars. See table below.

The example below show how I extract the information. The first group of the regexp is the one that do not work correctly '(.*?), the remaining groups handle the cases of varying version information.

select case module_name when expected then 'pass' else 'fail' end as test, y.* from(
select lower(regexp_substr(t.pck, g.regex, 1, 1, '', 1)) as module_name, 
       t.expected,
       to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 3)) as major,
       to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 5)) as minor,
       to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 7)) as patch,
       (t.pck) as package_name
from   (select 'FUNKY_LOG_DATABASE-1.0.0.NUPKG' as pck, 'funky_log_database' as expected from dual
        union select 'FOO.BAR.DATABASE-2.0.0.NUPKG', 'foo.bar.database' from dual 
        union select 'FOO-BAR-AUDIT-DATABASE.NUPKG', 'foo-bar-audit-database' from dual
        union select 'funk-database-1.nupkg', 'funk-database' from dual
        union select 'funk-database-1.2.nupkg', 'funk-database' from dual
        union select 'baz-database-1.0.1.nupkg', 'baz-database' from dual) t
cross  join (select '(.*?)(-(\d+)(\.(\d+))?(\.(\d+))?)?(\..*)' as regex from dual) g
)y;

The query above yields the following (Oracle 19c):

test module_name expected major minor patch package_name
pass foo-bar-audit-database foo-bar-audit-database FOO-BAR-AUDIT-DATABASE.NUPKG
fail foo foo.bar.database FOO.BAR.DATABASE-2.0.0.NUPKG
pass funky_log_database funky_log_database 1 0 0 FUNKY_LOG_DATABASE-1.0.0.NUPKG
pass baz-database baz-database 1 0 1 baz-database-1.0.1.nupkg
pass funk-database funk-database 1 2 funk-database-1.2.nupkg
pass funk-database funk-database 1 funk-database-1.nupkg

I've tried use ([[:alnum:]._-]*?) as the first group, but it yield the same result. Switching to a greedy match matches too much.

Any good suggestions out there?


Solution

  • You can match from the end to get the version then extract the sub-string before the version to get the module name:

    select case module_name when expected then 'pass' else 'fail' end as test,
           y.*
    from   (
      select lower(
               substr(
                 t.pck,
                 1,
                 REGEXP_INSTR(t.pck, g.regex) - 1
               )
             ) as module_name, 
             t.expected,
             to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 2)) as major,
             to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 3)) as minor,
             to_number(regexp_substr(t.pck, g.regex, 1, 1, '', 4)) as patch,
             t.pck as package_name
      from   (
        select 'FUNKY_LOG_DATABASE-1.0.0.NUPKG' as pck, 'funky_log_database' as expected from dual
        union select 'FOO.BAR.DATABASE-2.0.0.NUPKG', 'foo.bar.database' from dual 
        union select 'FOO-BAR-AUDIT-DATABASE.NUPKG', 'foo-bar-audit-database' from dual
        union select 'funk-database-1.nupkg', 'funk-database' from dual
        union select 'funk-database-1.2.nupkg', 'funk-database' from dual
        union select 'baz-database-1.0.1.nupkg', 'baz-database' from dual
      ) t
      cross  join (
        select '(-(\d+)\.?(\d+)?\.?(\d+)?)?\.[^.]+$' as regex from dual
      ) g
    )y;
    

    Outputs:

    TEST MODULE_NAME EXPECTED MAJOR MINOR PATCH PACKAGE_NAME
    pass foo-bar-audit-database foo-bar-audit-database FOO-BAR-AUDIT-DATABASE.NUPKG
    pass foo.bar.database foo.bar.database 2 0 0 FOO.BAR.DATABASE-2.0.0.NUPKG
    pass funky_log_database funky_log_database 1 0 0 FUNKY_LOG_DATABASE-1.0.0.NUPKG
    pass baz-database baz-database 1 0 1 baz-database-1.0.1.nupkg
    pass funk-database funk-database 1 2 funk-database-1.2.nupkg
    pass funk-database funk-database 1 funk-database-1.nupkg

    db<>fiddle here