Search code examples
sqlpostgresqlstring-comparisoncase-insensitive

PostgreSQL: Case insensitive string comparison


Is there a simple ignore-case-comparison for PostgreSQL?

I want to replace:

SELECT id, user_name 
    FROM users 
        WHERE lower(email) IN (lower('adamB@a.com'), lower('eveA@b.com'));

With something like:

SELECT id, user_name 
    FROM users 
        WHERE email IGNORE_CASE_IN ('adamB@a.com', 'eveA@b.com');

The like and ilike operators work on single values (e.g. like 'adamB@a.com'), but not on sets.


Solution

  • First, what not to do: don't use ILIKE...

    create table y
    (
    id serial not null,
    email text not null unique
    );
    
    insert into y(email) 
    values('iSteve.jobs@apple.com') ,('linus.Torvalds@linUX.com');
    insert into y(email) 
    select n from generate_series(1,1000) as i(n);
    
    -- no need to create an index on email, 
    -- UNIQUE constraint on email already makes an index.
    -- thanks a_horse_with_no_name
    -- create index ix_y on y(email);
    
    explain select * from y 
    where email ilike 
        ANY(ARRAY['ISteve.Jobs@Apple.com','Linus.Torvalds@Linux.com']);
    

    Execution Plan:

    memdb=# explain select * from y where email ilike ANY(ARRAY['ISteve.Jobs@Apple.com','Linus.Torvalds@Linux.com']);
                                           QUERY PLAN                                       
    ----------------------------------------------------------------------------------------
     Seq Scan on y  (cost=0.00..17.52 rows=1 width=7)
       Filter: (email ~~* ANY ('{ISteve.Jobs@Apple.com,Linus.Torvalds@Linux.com}'::text[]))
    (2 rows)
    

    It's either you create an indexed lower expression...

    create function lower(t text[]) returns text[]
    as
    $$
    select lower($1::text)::text[]
    $$ language sql;
    
    create unique index ix_y_2 on y(lower(email));
    
    explain select * from y 
    where lower(email) = 
        ANY(lower(ARRAY['ISteve.Jobs@Apple.com','Linus.Torvalds@Linux.com']));
    

    ...which properly uses index:

    memdb=# explain select * from y where lower(email) = ANY(lower(ARRAY['ISteve.Jobs@Apple.com','Linus.Torvalds@Linux.com']));
                                                               QUERY PLAN                                                           
    --------------------------------------------------------------------------------------------------------------------------------
     Bitmap Heap Scan on y  (cost=22.60..27.98 rows=10 width=7)
       Recheck Cond: (lower(email) = ANY ((lower(('{ISteve.Jobs@Apple.com,Linus.Torvalds@Linux.com}'::text[])::text))::text[]))
       ->  Bitmap Index Scan on ix_y_2  (cost=0.00..22.60 rows=10 width=0)
             Index Cond: (lower(email) = ANY ((lower(('{ISteve.Jobs@Apple.com,Linus.Torvalds@Linux.com}'::text[])::text))::text[]))
    (4 rows)
    

    Or you use citext data type...

    create table x
    (
    id serial not null,
    email citext not null unique
    );
    
    insert into x(email) 
    values('iSteve.jobs@apple.com'),('linus.Torvalds@linUX.com');
    insert into x(email) 
    select n from generate_series(1,1000) as i(n);
    
    -- no need to create an index on email, 
    -- UNIQUE constraint on email already makes an index.
    -- thanks a_horse_with_no_name
    -- create index ix_x on x(email);
    
    explain select * from x 
    where email = 
    ANY(ARRAY['ISteve.Jobs@Apple.com','Linus.Torvalds@Linux.com']::citext[]);
    

    ...which properly uses index even you don't create an index on expression (e.g. create index zzz on yyy(lower(field))):

    memdb=# explain select * from x where email = ANY(ARRAY['ISteve.Jobs@Apple.com','Linus.Torvalds@Linux.com']::citext[]);
                                                QUERY PLAN                                            
    --------------------------------------------------------------------------------------------------
    Bitmap Heap Scan on x  (cost=8.57..13.91 rows=2 width=36)
      Recheck Cond: (email = ANY ('{ISteve.Jobs@Apple.com,Linus.Torvalds@Linux.com}'::citext[]))
      ->  Bitmap Index Scan on x_email_key  (cost=0.00..8.57 rows=2 width=0)
            Index Cond: (email = ANY ('{ISteve.Jobs@Apple.com,Linus.Torvalds@Linux.com}'::citext[]))
    

    If citext field type is not yet installed, run this:

    CREATE EXTENSION IF NOT EXISTS citext WITH SCHEMA public;