Search code examples
postgresqlrecursive-cte

Postgres: query average landed cost from prior records


I have a query from my postgres tables that looks like this:

date sku incoming_unit_qty incoming_unit_cost landed_unit_qty
2023-08-01 sku-1 1,000 $3.00 0
2023-08-01 sku-2 2,000 $2.00 0
2023-08-01 sku-3 3,000 $1.00 0
2023-09-01 sku-1 1,000 $3.50 500
2023-09-01 sku-2 2,000 $2.50 1,000
2023-09-01 sku-3 3,000 $1.50 1,500
2023-10-01 sku-1 1,000 $4.00 750
2023-10-01 sku-2 2,000 $3.00 1,500
2023-10-01 sku-3 3,000 $2.00 2,250
2023-11-01 sku-1 1,000 $3.50 250
2023-11-01 sku-2 2,000 $2.50 500
2023-11-01 sku-3 3,000 $1.50 750

I am trying to generate a view that would calculate for each sku and each date, the weighted average cost. The result should look as follows:

date sku incoming_unit_qty incoming_unit_cost landed_unit_qty landed_unit_cost average_cost
2023-08-01 sku-1 1,000 $3.00 0 $0.00 $3.00
2023-08-01 sku-2 2,000 $2.00 0 $0.00 $2.00
2023-08-01 sku-3 3,000 $1.00 0 $0.00 $1.00
2023-09-01 sku-1 1,000 $3.50 500 $3.00 $3.33
2023-09-01 sku-2 2,000 $2.50 1,000 $2.00 $2.33
2023-09-01 sku-3 3,000 $1.50 1,500 $1.00 $1.33
2023-10-01 sku-1 1,000 $4.00 750 $3.33 $3.71
2023-10-01 sku-2 2,000 $3.00 1,500 $2.33 $2.71
2023-10-01 sku-3 3,000 $2.00 2,250 $1.33 $1.71
2023-11-01 sku-1 1,000 $3.50 250 $3.71 $3.54
2023-11-01 sku-2 2,000 $2.50 500 $2.71 $2.54
2023-11-01 sku-3 3,000 $1.50 750 $1.71 $1.54

How can I achieve this?

I have tried the below recursive CTE query, which results in the following error message recursive reference to query "lc" must not appear within a subquery LINE 15: (select average_cost from lc where date < lc.date and sku = lc.sku order by date desc limit 1) as landed_unit_cost. While I understand the error message, I cannot find a solution that doesn't involve a recursive subquery.

with recursive lc as (
    (select distinct on (sku)
      *,
      0 as landed_unit_cost,
      incoming_unit_cost as average_cost
    from lc_history
    order by sku, date)
  union
    select
      l.*,
      (l.incoming_unit_qty*l.incoming_unit_cost + l.landed_unit_qty*l.landed_unit_cost) / (l.incoming_unit_qty + l.landed_unit_qty) as average_cost
    from (
      select
        *,
        (select average_cost from lc where date < lc.date and sku = lc.sku order by date desc limit 1) as landed_unit_cost
      from lc_history
    ) l
)
select * from lc order by date, sku;

Thanks for your help!


Solution

  • I ended up writing a for loop in PL/pgSQL. It's not as elegant as an SQL query, but it works.

    create type landed_cost as (
      date date,
      sku text,
      incoming_unit_qty integer,
      incoming_unit_cost real,
      landed_unit_qty integer,
      landed_unit_cost real,
      average_cost real
    );
    
    create function get_landed_costs()
      returns table (rec landed_cost)
     language plpgsql
     security definer
    as $function$
    declare
      row landed_cost;
      landed_unit_cost real;
    begin
      create temporary table landed_costs of landed_cost;
      
      for row in select * from lc_history order by date
      loop
        landed_unit_cost := (select average_cost from landed_costs where date < row.date and sku = row.sku order by date desc limit 1);
    
        insert into landed_costs values (
          row.date, row.sku,
          row.incoming_unit_qty,
          row.incoming_unit_cost,
          row.landed_unit_qty,
          landed_unit_cost,
          (row.incoming_unit_qty*row.incoming_unit_cost + row.landed_unit_qty*coalesce(landed_unit_cost,0)) / (row.incoming_unit_qty + row.landed_unit_qty)
        );
      end loop;
    
      return query select * from landed_costs;
    end;
    $function$
    ;
    
    select * from get_landed_costs();