Search code examples
sqlregexsnowflake-cloud-data-platform

regular expression with regexp_like is not working in snowflake


  SELECT 
       'HAB' AS NAME,
       CASE 
           WHEN REGEXP_LIKE(LIST_TXT, '(^|,)201(,|$)') THEN 'YES' 
       END AS MC_NM
  FROM TABLE1
)
SELECT * FROM HAB;

my reg expression works as expected outside of Snowflake, but it gives me null for MC_NM even though there is LIST_TXT with 201 in it.


Solution

  • REGEXP_LIKE behaves like LIKE with respect to tokens before/after the match clause. So that is the source of your problem.

    select column1 as list_txt,
        COLUMN2 AS EXPECTED,
        '(^|,)201(,|$)' as r1,
        REGEXP_LIKE(LIST_TXT, r1) as og,
        REGEXP_COUNT(LIST_TXT, r1) as rc,
        REGEXP_LIKE(LIST_TXT, '.*(^|,)201(,|$).*') as fixed
    from values
        ('201', 'match by self'),
        ('xx,201,yy', 'match 2 commas'), 
        ('xx,201', 'match last value'), 
        ('201,yy', 'match first value'),
        ('xx201,yy', 'bad no comma before 201'),
        ('xx,201yy', 'bad no comma after 201');
    
    LIST_TXT EXPECTED R1 OG RC FIXED
    201 match by self (^|,)201(,|$) TRUE 1 TRUE
    xx,201,yy match 2 commas (^|,)201(,|$) FALSE 1 TRUE
    xx,201 match last value (^|,)201(,|$) FALSE 1 TRUE
    201,yy match first value (^|,)201(,|$) FALSE 1 TRUE
    xx201,yy bad no comma before 201 (^|,)201(,|$) FALSE 0 FALSE
    xx,201yy bad no comma after 201 (^|,)201(,|$) FALSE 0 FALSE

    Then there is the result to text part, you are wanting a 'yes' or null, so it could be done with a CASE, or a IFF, or NVL2

    select column1 as list_txt,
        COLUMN2 AS EXPECTED,
        '(^|,)201(,|$)' as r1,
        REGEXP_LIKE(LIST_TXT, '.*'||r1||'.*') as fixed,
        CASE fixed WHEN TRUE THEN 'yes' end res_1,
        IFF(fixed, 'yes', null) as res_2, 
    from values
        ('201', 'match by self'),
        ('xx,201,yy', 'match 2 commas'), 
        ('xx,201', 'match last value'), 
        ('201,yy', 'match first value'),
        ('xx201,yy', 'bad no comma before 201'),
        ('xx,201yy', 'bad no comma after 201');
    
    LIST_TXT EXPECTED R1 FIXED RES_1 RES_2
    201 match by self (^ ,)201(,|$) TRUE yes
    xx,201,yy match 2 commas (^|,)201(,|$) TRUE yes yes
    xx,201 match last value (^|,)201(,|$) TRUE yes yes
    201,yy match first value (^|,)201(,|$) TRUE yes yes
    xx201,yy bad no comma before 201 (^|,)201(,|$) FALSE null null
    xx,201yy bad no comma after 201 (^|,)201(,|$) FALSE null null