I have a table that contains products and sales data. Unfortunately, not every month a given product was sold, and I would like unsold products in a given month to also be shown. In Oracle, the query is built this way: https://www.oracle.com/ocom/groups/public/@otn/documents/webcontent/270646.htm.
I would like to do it similarly in Redshift, but I can't use the partitioning described here. Can I easily replace it somehow?
data sample:
WITH dates AS (
SELECT to_date('2023-01-01', 'YYYY-MM-DD') AS d FROM dual
UNION ALL
SELECT to_date('2023-02-01', 'YYYY-MM-DD') AS d FROM dual
UNION ALL
SELECT to_date('2023-03-01', 'YYYY-MM-DD') AS d FROM dual
UNION ALL
SELECT to_date('2023-04-01', 'YYYY-MM-DD') AS d FROM dual
UNION ALL
SELECT to_date('2023-05-01', 'YYYY-MM-DD') AS d FROM dual
)
, product_sales as(
--Yanuary
SELECT 1 AS prod_id, 'test1' AS prod_name, to_date('2023-01-01', 'YYYY-MM-DD') AS sale_date, 3 AS sale_amount FROM dual
UNION ALL
SELECT 2 AS prod_id, 'test2' AS prod_name, to_date('2023-01-01', 'YYYY-MM-DD') AS sale_date, 8 AS sale_amount FROM dual
UNION ALL
SELECT 3 AS prod_id, 'test3' AS prod_name, to_date('2023-01-01', 'YYYY-MM-DD') AS sale_date, 9 AS sale_amount FROM dual
UNION ALL
--February
SELECT 1 AS prod_id, 'test1' AS prod_name, to_date('2023-02-01', 'YYYY-MM-DD') AS sale_date, 5 AS sale_amount FROM dual
UNION ALL
SELECT 2 AS prod_id, 'test2' AS prod_name, to_date('2023-02-01', 'YYYY-MM-DD') AS sale_date, 2 AS sale_amount FROM dual
UNION ALL
SELECT 3 AS prod_id, 'test3' AS prod_name, to_date('2023-02-01', 'YYYY-MM-DD') AS sale_date, 1 AS sale_amount FROM dual
UNION ALL
--March
-- SELECT 1 AS prod_id, 'test1' AS prod_name, to_date('2023-02-01', 'YYYY-MM-DD') AS sale_date, 5 AS sale_amount FROM dual
-- UNION ALL
SELECT 2 AS prod_id, 'test2' AS prod_name, to_date('2023-03-01', 'YYYY-MM-DD') AS sale_date, 7 AS sale_amount FROM dual
-- UNION ALL
-- SELECT 3 AS prod_id, 'test3' AS prod_name, to_date('2023-02-01', 'YYYY-MM-DD') AS sale_date, 1 AS sale_amount FROM dual
UNION ALL
--April
SELECT 1 AS prod_id, 'test1' AS prod_name, to_date('2023-04-01', 'YYYY-MM-DD') AS sale_date, 1 AS sale_amount FROM dual
UNION ALL
SELECT 2 AS prod_id, 'test2' AS prod_name, to_date('2023-04-01', 'YYYY-MM-DD') AS sale_date, 2 AS sale_amount FROM dual
-- UNION ALL
-- SELECT 3 AS prod_id, 'test3' AS prod_name, to_date('2023-04-01', 'YYYY-MM-DD') AS sale_date, 1 AS sale_amount FROM dual
UNION ALL
--May
SELECT 1 AS prod_id, 'test1' AS prod_name, to_date('2023-05-01', 'YYYY-MM-DD') AS sale_date, 6 AS sale_amount FROM dual
UNION ALL
SELECT 2 AS prod_id, 'test2' AS prod_name, to_date('2023-05-01', 'YYYY-MM-DD') AS sale_date, 4 AS sale_amount FROM dual
UNION ALL
SELECT 3 AS prod_id, 'test3' AS prod_name, to_date('2023-05-01', 'YYYY-MM-DD') AS sale_date, 2 AS sale_amount FROM dual
)
SELECT ps.prod_id, ps.prod_name, nvl(ps.sale_amount, 0) AS sale_amount, da.d AS sale_date
FROM product_sales ps
PARTITION BY(ps.prod_id, ps.prod_name)
RIGHT OUTER JOIN dates da ON ps.sale_date = da.d
ORDER BY da.d, ps.prod_id
See example
select p.prod_id,p.prod_name,dates.d, coalesce(month_sale_amount,0) month_sale_amount
from dates
cross join (select distinct prod_id,prod_name from product_sales) p
left join(
select prod_id,date_trunc('month',sale_date) sale_date
,sum(sale_amount) as month_sale_amount
from product_sales
group by prod_id,date_trunc('month',sale_date)
) total_sales on sale_date=dates.d and total_sales.prod_id=p.prod_id
order by dates.d,p.prod_id
prod_id | prod_name | d | month_sale_amount |
---|---|---|---|
1 | test1 | 2023-01-01 | 3 |
2 | test2 | 2023-01-01 | 8 |
3 | test3 | 2023-01-01 | 9 |
1 | test1 | 2023-02-01 | 5 |
2 | test2 | 2023-02-01 | 2 |
3 | test3 | 2023-02-01 | 1 |
1 | test1 | 2023-03-01 | 0 |
2 | test2 | 2023-03-01 | 7 |
3 | test3 | 2023-03-01 | 0 |
1 | test1 | 2023-04-01 | 1 |
2 | test2 | 2023-04-01 | 2 |
3 | test3 | 2023-04-01 | 0 |
1 | test1 | 2023-05-01 | 6 |
2 | test2 | 2023-05-01 | 4 |
3 | test3 | 2023-05-01 | 2 |