I need to identify valid subscription for users for different order placed.
CREATE OR REPLACE TEMP TABLE customer_orders(
customer_id VARCHAR
, product_id VARCHAR
, subscription_start_date DATE
, subscription_end_date DATE
);
INSERT INTO customer_orders
VALUES
('customer_id_001', 'product_id_001', '2024-01-01', '2024-01-31'),
('customer_id_001', 'product_id_001', '2024-02-01', '2024-02-29'),
('customer_id_001', 'product_id_001', '2024-03-01', '2024-03-31'),
('customer_id_001', 'product_id_001', '2024-04-01', '2024-04-15'),
('customer_id_001', 'product_id_001', '2024-04-01', '2024-04-30');
SELECT * FROM customer_orders ORDER BY 1,2,3,4,5;
In this data, we have an invalid order order_id_004 as there is a subsequent order order_id_005, which covers the subscription of whole month. I need to flag this record for audit purpose.
CUSTOMER_ID PRODUCT_ID SUBSCRIPTION_START_DATE SUBSCRIPTION_END_DATE COMMENTS
customer_id_001 product_id_001 2024-01-01 2024-01-31 VALID ORDER
customer_id_001 product_id_001 2024-02-01 2024-02-29 VALID ORDER
customer_id_001 product_id_001 2024-03-01 2024-03-31 VALID ORDER
customer_id_001 product_id_001 2024-04-01 2024-04-15 INVALID ORDER
customer_id_001 product_id_001 2024-04-01 2024-04-30 VALID ORDER
Here is work in progress, producing invalid results:
SELECT
c1.customer_id
, c1.product_id
, c1.subscription_start_date
, c1.subscription_end_date
, CASE WHEN c1.subscription_start_date BETWEEN c2.subscription_start_date AND c2.subscription_end_date
OR c1.subscription_end_date BETWEEN c2.subscription_start_date AND c2.subscription_end_date
OR c2.subscription_start_date BETWEEN c1.subscription_start_date AND c1.subscription_end_date
THEN 'Duplicate Subscription'
ELSE 'Valid Subscription'
END AS comment
FROM customer_orders c1
INNER JOIN customer_orders c2
ON c1.customer_id = c2.customer_id
AND c1.product_id = c2.product_id
GROUP BY ALL
ORDER BY 1,2,3,4,5;
Try something like this:
WITH src AS (
SELECT *, ROW_NUMBER() OVER(ORDER BY customer_id, product_id, subscription_start_date, subscription_end_date) AS RN
FROM customer_orders
)
SELECT
c1.customer_id
, c1.product_id
, c1.subscription_start_date
, c1.subscription_end_date
, IFF(c2.customer_id IS NOT NULL, 'Duplicate Subscription', 'Valid Subscription') AS comment
, IFF(DATEDIFF(DAY, c2.subscription_start_date, c2.subscription_end_date) >= DATEDIFF(DAY, c1.subscription_start_date, c1.subscription_end_date), 'INVALID ORDER', 'VALID ORDER') AS Status
FROM src c1
LEFT JOIN src c2
ON c1.customer_id = c2.customer_id
AND c1.product_id = c2.product_id
AND c1.RN <> c2.RN
AND c1.subscription_start_date <= c2.subscription_end_date
AND c2.subscription_start_date <= c1.subscription_end_date
ORDER BY 1,2,3,4;