Search code examples
sqlpostgresqlormplpgsqldynamic-sql

Robust approach for building SQL queries programmatically


I have to resort to raw SQL where the ORM is falling short (using Django 1.7). The problem is that most of the queries end up being 80-90% similar. I cannot figure out a robust & secure way to build queries without violating re-usability.

Is string concatenation the only way out, i.e. build parameter-less query strings using if-else conditions, then safely include the parameters using prepared statements (to avoid SQL injection). I want to follow a simple approach for templating SQL for my project instead of re-inventing a mini ORM.

For example, consider this query:

SELECT id, name, team, rank_score
FROM
  ( SELECT id, name, team
    ROW_NUMBER() OVER (PARTITION BY team
                       ORDER BY count_score DESC) AS rank_score
    FROM 
      (SELECT id, name, team
       COUNT(score) AS count_score
       FROM people
       INNER JOIN scores on (scores.people_id = people.id)
       GROUP BY id, name, team
      ) AS count_table
  ) AS rank_table
WHERE rank_score < 3

How can I:

a) add optional WHERE constraint on people or
b) change INNER JOIN to LEFT OUTER or
c) change COUNT to SUM or
d) completely skip the OVER / PARTITION clause?


Solution

  • Better query

    Fix the syntax, simplify and clarify:

    SELECT *
    FROM  (
       SELECT p.person_id, p.name, p.team, sum(s.score)::int AS score
            , rank() OVER (PARTITION BY p.team ORDER BY sum(s.score) DESC)::int AS rnk
        FROM  person p
        JOIN  score  s USING (person_id)
        GROUP BY 1
       ) sub
    WHERE  rnk < 3;
    

    Building on my updated table layout. See fiddle below.

    You do not need the additional subquery. Window functions are executed after aggregate functions, so you can nest it like demonstrated.

    While talking about "rank", you probably want to use rank(), not row_number().

    Assuming people.people_id is the PK, you can simplify the GROUP BY clause.

    Be sure to table-qualify all column names that might be ambiguous.

    PL/pgSQL function

    I would write a PL/pgSQL function that takes parameters for your variable parts. Implementing a - c of your points. d is unclear, leaving that for you to add.

    CREATE TABLE person (
      person_id serial PRIMARY KEY
    , name text NOT NULL
    , team text
    );
    
    CREATE TABLE score (
      score_id serial PRIMARY KEY
    , person_id int NOT NULL REFERENCES person
    , score int NOT NULL
    );
    
    -- dummy values
    WITH ins AS (
       INSERT INTO person(name, team)
       SELECT 'Jon Doe ' || p, t
       FROM   generate_series(1,20) p                   -- 20 guys x
            , unnest ('{team1,team2,team3}'::text[]) t  -- 3 teams
       RETURNING person_id
       )
    INSERT INTO score(person_id, score)
    SELECT i.person_id, (random() * 100)::int
    FROM   ins i, generate_series(1,5) g;               -- 5 scores each
    

    Function:

    CREATE OR REPLACE FUNCTION f_demo(_agg        text DEFAULT 'sum'
                                    , _left_join  bool DEFAULT false
                                    , _where_name text DEFAULT null)
      RETURNS TABLE(person_id int, name text, team text, score numeric, rnk bigint)
      LANGUAGE plpgsql AS
    $func$
    DECLARE
       _agg_op  CONSTANT text[] := '{count, sum, avg}';  -- allowed agg functions
       _sql     text;
    BEGIN
       -- assert --
       IF _agg ILIKE ANY (_agg_op) THEN
          -- all good
       ELSE
          RAISE EXCEPTION '_agg must be one of %', _agg_op;
       END IF;
    
       -- query --
       _sql := format('
       SELECT *
       FROM  (
          SELECT p.person_id, p.name, p.team, %1$s(s.score)::numeric AS score
               , rank() OVER (PARTITION BY p.team ORDER BY %1$s(s.score) DESC) AS rnk
           FROM  person p
           %2$s  score  s USING (person_id)
           %3$s
           GROUP BY 1
          ) sub
       WHERE  rnk < 3
       ORDER  BY team, rnk'
        , _agg                                                                 -- %1$s
        , CASE WHEN _left_join THEN 'LEFT JOIN' ELSE 'JOIN' END                -- %2$s
        , CASE WHEN _where_name <> '' THEN 'WHERE p.name LIKE $1' ELSE '' END  -- %3$s
       );
    
       -- debug   -- inspect query first
       -- RAISE NOTICE '%', _sql;
    
       -- execute -- unquote when tested ok
       RETURN QUERY EXECUTE _sql
       USING  _where_name;   -- $1
    END
    $func$;
    

    Call:

    SELECT * FROM f_demo();
    SELECT * FROM f_demo('sum', TRUE, '%2');    
    SELECT * FROM f_demo('avg', FALSE);
    SELECT * FROM f_demo(_where_name := '%1_'); -- named param
    

    fiddle
    Old sqlfiddle