Search code examples
sqloracle-databasedata-warehouse

How to generate point in time snapshot table from a transaction fact table?


I have a transaction table that records the change in status (A,B,C,D) of a customer by closing the end date of the previous record and also opening a new record with the current system time and the end date for the new record will be set to a high open date.

FactID Cust_ID Status EffectiveDate EndDate
1 1 A 20/05/2021 8:52:29 PM 21/05/2021 3:08:22 PM
2 1 B 21/05/2021 3:08:22 PM 24/05/2021 2:47:28 PM
3 1 C 24/05/2021 2:47:28 PM 24/05/2021 4:15:45 PM
4 1 A 24/05/2021 4:15:45 PM 24/05/2021 8:05:09 PM
5 1 D 24/05/2021 8:05:09 PM 31/12/9000

I'm trying to build a snapshot at a point in time table (end of the day reporting) based on the above transaction table.

ReportDate Cust_ID EODStatus A_SDate A_EDate B_SDate B_EDate C_SDate C_EDate D_SDate D_EDate
20/05/2021 11:59:59 PM 1 A 20/05/2021 8:52:29 PM 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000
21/05/2021 11:59:59 PM 1 B 20/05/2021 8:52:29 PM 21/05/2021 3:08:22 PM 21/05/2021 3:08:22 PM 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000
22/05/2021 11:59:59 PM 1 B 20/05/2021 8:52:29 PM 21/05/2021 3:08:22 PM 21/05/2021 3:08:22 PM 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000
23/05/2021 11:59:59 PM 1 B 20/05/2021 8:52:29 PM 21/05/2021 3:08:22 PM 21/05/2021 3:08:22 PM 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000
24/05/2021 11:59:59 PM 1 D 20/05/2021 8:52:29 PM 24/05/2021 8:05:09 PM 21/05/2021 3:08:22 PM 24/05/2021 2:47:28 PM 24/05/2021 2:47:28 PM 24/05/2021 4:15:45 PM 24/05/2021 8:05:09 PM 31/12/9000
25/05/2021 11:59:59 PM 1 D 20/05/2021 8:52:29 PM 24/05/2021 8:05:09 PM 21/05/2021 3:08:22 PM 24/05/2021 2:47:28 PM 24/05/2021 2:47:28 PM 24/05/2021 4:15:45 PM 24/05/2021 8:05:09 PM 31/12/9000

I'm now stuck when I try to expand the transaction table before building the snapshot. Any pointers would be greatly appreciated.

WITH
    date_ranges
    AS
        (SELECT ROWNUM, TO_DATE ('21-05-2021', 'dd-mm-yyyy') + ROWNUM - 1.00001 reportdate
           FROM all_objects
          WHERE ROWNUM <= 6),
    transactions (factid, cust_id, status, effectivedate, enddate)
    AS
        (SELECT 1, 1, 'A', TO_DATE ('20/05/2021 8:52:29 PM', 'DD/MM/YYYY HH12:MI:SS AM'), TO_DATE ('21/05/2021 3:08:22 PM', 'DD/MM/YYYY HH12:MI:SS AM') FROM DUAL
         UNION ALL
         SELECT 2, 1, 'B', TO_DATE ('21/05/2021 3:08:22 PM', 'DD/MM/YYYY HH12:MI:SS AM'), TO_DATE ('24/05/2021 2:47:28 PM', 'DD/MM/YYYY HH12:MI:SS AM') FROM DUAL
         UNION ALL
         SELECT 3, 1, 'C', TO_DATE ('24/05/2021 2:47:28 PM', 'DD/MM/YYYY HH12:MI:SS AM'), TO_DATE ('24/05/2021 4:15:45 PM', 'DD/MM/YYYY HH12:MI:SS AM') FROM DUAL
         UNION ALL
         SELECT 4, 1, 'A', TO_DATE ('24/05/2021 4:15:45 PM', 'DD/MM/YYYY HH12:MI:SS AM'), TO_DATE ('24/05/2021 8:05:09 PM', 'DD/MM/YYYY HH12:MI:SS AM') FROM DUAL
         UNION ALL
         SELECT 5, 1, 'D', TO_DATE ('24/05/2021 8:05:09 PM', 'DD/MM/YYYY HH12:MI:SS AM'), TO_DATE ('31/12/9000', 'DD/MM/YYYY') FROM DUAL),
    dataset
    AS
        (SELECT DISTINCT reportdate,
                         cust_id,
                         status     AS eodstatus,
                         effectivedate,
                         enddate
           FROM transactions CROSS JOIN date_ranges)
  SELECT reportdate,
         cust_id,
         eodstatus,
         effectivedate,
         enddate,
         CASE
             WHEN eodstatus = 'A' THEN MIN (effectivedate)
             ELSE TO_DATE ('31/12/9000', 'DD/MM/YYYY')
         END             AS a_sdate,
         CASE WHEN eodstatus = 'A' THEN MAX (enddate) ELSE TO_DATE ('31/12/9000', 'DD/MM/YYYY') 
         END             AS a_edate,
         CASE
             WHEN eodstatus = 'B' THEN MIN (effectivedate)
             ELSE TO_DATE ('31/12/9000', 'DD/MM/YYYY')
         END             AS b_sdate,
         CASE WHEN eodstatus = 'B' THEN MAX (enddate) ELSE TO_DATE ('31/12/9000', 'DD/MM/YYYY') 
         END             AS b_edate,
         CASE
             WHEN eodstatus = 'C' THEN MIN (effectivedate)
             ELSE TO_DATE ('31/12/9000', 'DD/MM/YYYY')
         END             AS c_sdate,
         CASE WHEN eodstatus = 'C' THEN MAX (enddate) ELSE TO_DATE ('31/12/9000', 'DD/MM/YYYY') 
         END             AS c_edate,
         CASE
             WHEN eodstatus = 'D' THEN MIN (effectivedate)
             ELSE TO_DATE ('31/12/9000', 'DD/MM/YYYY')
         END             AS d_sdate,
         CASE WHEN eodstatus = 'D' THEN MAX (enddate) ELSE TO_DATE ('31/12/9000', 'DD/MM/YYYY') 
          END             AS d_edate
    FROM dataset t
   WHERE reportdate BETWEEN effectivedate AND enddate
GROUP BY reportdate, cust_id, eodstatus, effectivedate, enddate
ORDER BY reportdate, cust_id, eodstatus;
REPORTDATE CUST_ID EODSTATUS EFFECTIVEDATE ENDDATE A_SDATE A_EDATE B_SDATE B_EDATE C_SDATE C_EDATE D_SDATE D_EDATE
20/05/2021 11:59:59 PM 1 "A" 20/05/2021 8:52:29 PM 21/05/2021 3:08:22 PM 20/05/2021 8:52:29 PM 21/05/2021 3:08:22 PM 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000
21/05/2021 11:59:59 PM 1 "B" 21/05/2021 3:08:22 PM 24/05/2021 2:47:28 PM 31/12/9000 31/12/9000 21/05/2021 3:08:22 PM 24/05/2021 2:47:28 PM 31/12/9000 31/12/9000 31/12/9000 31/12/9000
22/05/2021 11:59:59 PM 1 "B" 21/05/2021 3:08:22 PM 24/05/2021 2:47:28 PM 31/12/9000 31/12/9000 21/05/2021 3:08:22 PM 24/05/2021 2:47:28 PM 31/12/9000 31/12/9000 31/12/9000 31/12/9000
23/05/2021 11:59:59 PM 1 "B" 21/05/2021 3:08:22 PM 24/05/2021 2:47:28 PM 31/12/9000 31/12/9000 21/05/2021 3:08:22 PM 24/05/2021 2:47:28 PM 31/12/9000 31/12/9000 31/12/9000 31/12/9000
24/05/2021 11:59:59 PM 1 "D" 24/05/2021 8:05:09 PM 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000 24/05/2021 8:05:09 PM 31/12/9000
25/05/2021 11:59:59 PM 1 "D" 24/05/2021 8:05:09 PM 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000 24/05/2021 8:05:09 PM 31/12/9000

SQLFiddle here

PS: I had a look at another thread in SO which had almost the same title but wasn't much helpful.

Update 1:

I am now able to get a daily status for all reporting dates, yet the start & end date calculations and carrying forward the values to the subsequent rows is still not happening (as I am yet to figure it out).

  • Start date - MIN (effectivedate for a given status)
  • End date - MAX(Enddate for a given status)

Update 2: The start date and end date calculated must not be greater than the reporting date. Refer to the SQL output which showcases the current issue


Solution

  • First of all my sincere thanks to all the people who tried to help me. I somehow managed to nail this near-impossible task with some convoluted logic (but it works). I tried providing inline comments to explain the derivations. A special mention to @Wernfried Domscheit who has written the PIVOT logic and deleted the answer and it helped me to a great extent.

    WITH
        date_ranges
    -- Generate dates 
        AS
            (SELECT ROWNUM, TO_DATE ('21-05-2021', 'dd-mm-yyyy') + ROWNUM - 1.00001 reportdate
               FROM all_objects
              WHERE ROWNUM <= 6),
    -- Mock up source records
        transactions (factid, cust_id,status,effectivedate,enddate)
        AS
            (SELECT 1,1,'A',
                    TO_DATE ('20/05/2021 8:52:29 PM', 'DD/MM/YYYY HH12:MI:SS AM'),
                    TO_DATE ('21/05/2021 3:08:22 PM', 'DD/MM/YYYY HH12:MI:SS AM') FROM DUAL
             UNION ALL
             SELECT 2,1,'B',
                    TO_DATE ('21/05/2021 3:08:22 PM', 'DD/MM/YYYY HH12:MI:SS AM'),
                    TO_DATE ('24/05/2021 2:47:28 PM', 'DD/MM/YYYY HH12:MI:SS AM') FROM DUAL
             UNION ALL
             SELECT 3,1,'C',
                    TO_DATE ('24/05/2021 2:47:28 PM', 'DD/MM/YYYY HH12:MI:SS AM'),
                    TO_DATE ('24/05/2021 4:15:45 PM', 'DD/MM/YYYY HH12:MI:SS AM') FROM DUAL
             UNION ALL
             SELECT 4,1,'A',
                    TO_DATE ('24/05/2021 4:15:45 PM', 'DD/MM/YYYY HH12:MI:SS AM'),
                    TO_DATE ('24/05/2021 8:05:09 PM', 'DD/MM/YYYY HH12:MI:SS AM') FROM DUAL
             UNION ALL
             SELECT 5,1,'D',
                    TO_DATE ('24/05/2021 8:05:09 PM', 'DD/MM/YYYY HH12:MI:SS AM'),
                    TO_DATE ('31/12/9000', 'DD/MM/YYYY') FROM DUAL),
        dataset
    -- Apply cross join to get report date into transactions
    -- Could've been much better; time crunched
        AS
            (SELECT DISTINCT reportdate,cust_id,status     AS eodstatus,effectivedate,enddate
               FROM transactions CROSS JOIN date_ranges),
        dataset1
    -- Ignore start and end dates if they are older than the reporting date
        AS
            (  SELECT reportdate,
                      cust_id,
                      eodstatus,
                      CASE
                          WHEN reportdate > effectivedate THEN effectivedate
                          ELSE TO_DATE ('31/12/9000', 'DD/MM/YYYY')
                      END    AS effectivedate,
                      CASE
                          WHEN reportdate > enddate THEN enddate
                          ELSE TO_DATE ('31/12/9000', 'DD/MM/YYYY')
                      END    AS enddate
                 FROM dataset
                WHERE reportdate > effectivedate),
        dataset2
    -- Grab the min of start and max of end for all reporting days
        AS
            (  SELECT reportdate,
                      cust_id,
                      eodstatus,
                      eodstatus               AS status,
                      MIN (effectivedate)     effectivedate,
                      MAX (enddate)           enddate
                 FROM dataset1
             GROUP BY reportdate, cust_id, eodstatus),
        dataset_new
    -- Apply PIVOT to capture the start and end date per known statues and replacing NULLs with high open end dates
        AS
            (  SELECT reportdate,
                      cust_id,
                      eodstatus,
                      COALESCE ('A','B','C','D')                           AS status,
                      NVL (a_sdate, TO_DATE ('31/12/9000', 'DD/MM/YYYY'))    a_sdate,
                      NVL (a_edate, TO_DATE ('31/12/9000', 'DD/MM/YYYY'))    a_edate,
                      NVL (b_sdate, TO_DATE ('31/12/9000', 'DD/MM/YYYY'))    b_sdate,
                      NVL (b_edate, TO_DATE ('31/12/9000', 'DD/MM/YYYY'))    b_edate,
                      NVL (c_sdate, TO_DATE ('31/12/9000', 'DD/MM/YYYY'))    c_sdate,
                      NVL (c_edate, TO_DATE ('31/12/9000', 'DD/MM/YYYY'))    c_edate,
                      NVL (d_sdate, TO_DATE ('31/12/9000', 'DD/MM/YYYY'))    d_sdate,
                      NVL (d_edate, TO_DATE ('31/12/9000', 'DD/MM/YYYY'))    d_edate
                 FROM dataset2
                      PIVOT (MIN (effectivedate) AS "SDATE", MAX (enddate) AS "EDATE"
                            FOR status
                            IN ('A' AS "A", 'B' AS "B", 'C' AS "C", 'D' AS "D"))
             ORDER BY reportdate),
        date_manipulations
    -- Merging multiple entries into one record a day
        AS
            (  SELECT reportdate,
                      cust_id,
                      MIN (a_sdate)     a_sdate,
                      MIN (a_edate)     a_edate,
                      MIN (b_sdate)     b_sdate,
                      MIN (b_edate)     b_edate,
                      MIN (c_sdate)     c_sdate,
                      MIN (c_edate)     c_edate,
                      MIN (d_sdate)     d_sdate,
                      MIN (d_edate)     d_edate
                 FROM dataset_new
             GROUP BY reportdate, cust_id
             ORDER BY 1)
    -- JOIN with transaction to report the original status 
    SELECT a.*, b.status
      FROM date_manipulations a JOIN transactions b ON reportdate BETWEEN effectivedate AND enddate;
    
    REPORTDATE CUST_ID A_SDATE A_EDATE B_SDATE B_EDATE C_SDATE C_EDATE D_SDATE D_EDATE STATUS
    20/05/2021 11:59:59 PM 1 20/05/2021 8:52:29 PM 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000 "A"
    21/05/2021 11:59:59 PM 1 20/05/2021 8:52:29 PM 21/05/2021 3:08:22 PM 21/05/2021 3:08:22 PM 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000 "B"
    22/05/2021 11:59:59 PM 1 20/05/2021 8:52:29 PM 21/05/2021 3:08:22 PM 21/05/2021 3:08:22 PM 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000 "B"
    23/05/2021 11:59:59 PM 1 20/05/2021 8:52:29 PM 21/05/2021 3:08:22 PM 21/05/2021 3:08:22 PM 31/12/9000 31/12/9000 31/12/9000 31/12/9000 31/12/9000 "B"
    24/05/2021 11:59:59 PM 1 20/05/2021 8:52:29 PM 24/05/2021 8:05:09 PM 21/05/2021 3:08:22 PM 24/05/2021 2:47:28 PM 24/05/2021 2:47:28 PM 24/05/2021 4:15:45 PM 24/05/2021 8:05:09 PM 31/12/9000 "D"
    25/05/2021 11:59:59 PM 1 20/05/2021 8:52:29 PM 24/05/2021 8:05:09 PM 21/05/2021 3:08:22 PM 24/05/2021 2:47:28 PM 24/05/2021 2:47:28 PM 24/05/2021 4:15:45 PM 24/05/2021 8:05:09 PM 31/12/9000 "D"