Search code examples
sqloracledate-range

combine two tables with date fields into a single table without overlapping dates


we receive data from a legacy application for a product and the data for tables is below

Table 1  (KEY1, ID,Valid_From, Valid_To, IS_LATEST_FLAG)
                
'1',    'abc',  '3/2/2020 12:00:00 AM', '3/3/2020 6:01:12 PM',   'N'
'2',    'abc',  '3/3/2020  6:01:13 PM', '3/3/2020 6:01:14 PM',   'N'
'3',    'abc',  '3/3/2020  6:01:15 PM', '3/3/2020 6:01:15 PM',   'N'
'4',    'abc',  '3/3/2020  6:01:16 PM', '3/3/2020 6:01:51 PM',   'N'
'5',    'abc',  '3/3/2020  6:01:52 PM', '3/4/2020 4:28:59 PM',   'N'
'6',    'abc',  '3/4/2020  4:29:00 PM', '3/4/2020 4:31:40 PM',   'N'
'7',    'abc',  '3/4/2020  4:31:41 PM', '12/31/4712',            'Y'
'8',    'bbc',  '3/19/2020 2:47:08 PM', '3/19/2020 2:50:36 PM',  'N'
'9',    'bbc',  '3/19/2020 2:50:37 PM', '3/19/2020 2:56:23 PM',  'N'
'10',   'bbc',  '3/19/2020 2:56:24 PM', '12/31/4712',            'Y'

Table 2  (KEY2, ID,Valid_From, Valid_To, IS_LATEST_FLAG)
'1',    'abc',  '3/3/2020 12:00:00 AM', '3/3/2020 6:01:13 PM', 'N'
'2',    'abc',  '3/3/2020 6:01:14  PM'  '3/3/2020 6:01:14 PM', 'N'
'3',    'abc',  '3/3/2020 6:01:15  PM'  '3/4/2020 4:28:59 PM', 'N'
'4',    'abc',  '3/4/2020 4:29:00  PM'  '12/31/4712',          'Y'
'5',    'bbc',  '3/4/2020 4:31:41  PM'  '3/19/2020 2:47:07 PM','N'
'6',    'bbc',  '3/19/2020 2:47:08 PM'  '3/19/2020 2:50:36 PM','N'
'7',    'bbc',  '3/19/2020 2:50:37 PM'  '3/20/2020 2:56:23 PM','N'
'8',    'bbc',  '3/20/2020 2:56:24 PM'  '12/31/4712',          'Y'

We need to create a superset of both the tables with all distinct valid_from date from both the tables and record_end_dt will automatically be 1 second less than the next record so that there wont be overlapping dates or gaps. The challenging part is to get the appropriate keys for that time frame.
Output Table should look like below. I tried union but its not working as expected. Look figure out how to achieve below.

Table 2  (KEY1,KEY2, ID,Valid_From, Valid_To, IS_LATEST_FLAG)

'1','NULL', 'abc', '3/2/2020 12:00:00 AM',  '3/2/2020 11:59:59 PM',   'N'
'1','1',    'abc', '3/3/2020 12:00:00 AM'   '3/2/2020 6:01:12 PM',    'N' 
'2','1',    'abc', '3/3/2020 6:01:13  PM'   '3/3/2020 6:01:13 PM',    'N'
'2','2',    'abc', '3/3/2020 6:01:14  PM'   '3/3/2020 6:01:14 PM',    'N'
'3','3',    'abc', '3/3/2020 6:01:15  PM'   '3/3/2020 6:01:15 PM',    'N'
'4','3',    'abc', '3/3/2020 6:01:16  PM'   '3/3/2020 6:01:51 PM',    'N'
'5','3',    'abc', '3/3/2020  6:01:52 PM',  '3/4/2020 4:28:59 PM',    'N'
'6','4',    'abc', '3/4/2020  4:29:00 PM',  '3/4/2020 4:31:40 PM',    'N'
'7','4',    'abc', '3/4/2020  4:31:41 PM',  '12/31/4712',             'Y'
'NULL','5', 'bbc', '3/4/2020 4:31:41  PM'   '3/19/2020 2:47:07 PM',   'N'
'8','6',    'bbc',  '3/19/2020 2:47:08 PM'  '3/19/2020 2:50:36 PM',   'N'
'9','7',    'bbc',  '3/19/2020 2:50:37 PM'  '3/20/2020 2:56:23 PM',   'N'
'10','8',   'bbc',  '3/20/2020 2:56:24 PM'  '12/31/4712',             'Y'

Solution

  • Looks like this is what you need:

    with tabs as (
       -- combining both tables: key2 is null for rows from table1 and key1 is null for rows from table2
       select key1,null as key2,ID,Valid_From, Valid_To, IS_LATEST_FLAG from table_1
       union all
       select null,key2,ID,Valid_From, Valid_To, IS_LATEST_FLAG from table_2
    )
    ,time_points as (
       -- using UNPIVOT to get all time points from all intervals from both tables
       select distinct
          id,
          --pt_type,IS_LATEST_FLAG,
          case when pt_type='VALID_FROM' then pt else pt + interval '1' second end as pit
       from (select distinct id,Valid_From, Valid_To,IS_LATEST_FLAG from tabs)
       unpivot (
         pt for pt_type in (Valid_From, Valid_To)
       )
    )
    ,intervals as (
       -- combining time points into intervals:
       select 
         id,row_number()over(partition by id order by pit) rnk
        ,pit as Valid_From
        ,lead(pit)over(partition by id order by pit)-interval '1'second as Valid_To
       from time_points
    )
    -- main query:
    -- simply joining table1 and table2 to our intervals:
    select
       i.id,
       i.rnk,
       i.valid_from,
       i.valid_to,
       t1.key1,
       t2.key2,
       least(nvl(t1.is_latest_flag,'N'),nvl(t2.is_latest_flag,'N')) as is_latest_flag
    from intervals i
         left join table_1 t1
              on  i.id = t1.id
              and i.valid_from between t1.valid_from and t1.valid_to
         left join table_2 t2
              on  i.id = t2.id
              and i.valid_from between t2.valid_from and t2.valid_to
    where i.valid_to is not null
    order by i.id,i.valid_from,i.valid_to;
    

    Full example with sample data:

    with Table_1 (KEY1, ID,Valid_From, Valid_To, IS_LATEST_FLAG) as (
       select '1',    'abc',  to_date('3/2/2020 12:00:00 AM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:12 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
       select '2',    'abc',  to_date('3/3/2020  6:01:13 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:14 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
       select '3',    'abc',  to_date('3/3/2020  6:01:15 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:15 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
       select '4',    'abc',  to_date('3/3/2020  6:01:16 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:51 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
       select '5',    'abc',  to_date('3/3/2020  6:01:52 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/4/2020 4:28:59 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
       select '6',    'abc',  to_date('3/4/2020  4:29:00 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/4/2020 4:31:40 PM', 'mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
       select '7',    'abc',  to_date('3/4/2020  4:31:41 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('12/31/4712',          'mm/dd/yyyy hh:mi:ss AM'),  'Y' from dual union all
       select '8',    'bbc',  to_date('3/19/2020 2:47:08 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/19/2020 2:50:36 PM','mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
       select '9',    'bbc',  to_date('3/19/2020 2:50:37 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/19/2020 2:56:23 PM','mm/dd/yyyy hh:mi:ss AM'),  'N' from dual union all
       select '10',   'bbc',  to_date('3/19/2020 2:56:24 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('12/31/4712',          'mm/dd/yyyy hh:mi:ss AM'),  'Y' from dual 
    )
    ,Table_2  (KEY2, ID,Valid_From, Valid_To, IS_LATEST_FLAG) as (
       select '1',    'abc',  to_date('3/3/2020 12:00:00 AM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:13 PM', 'mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
       select '2',    'abc',  to_date('3/3/2020 6:01:14  PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/3/2020 6:01:14 PM', 'mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
       select '3',    'abc',  to_date('3/3/2020 6:01:15  PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/4/2020 4:28:59 PM', 'mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
       select '4',    'abc',  to_date('3/4/2020 4:29:00  PM','mm/dd/yyyy hh:mi:ss AM'), to_date('12/31/4712',          'mm/dd/yyyy hh:mi:ss AM'),'Y' from dual union all
       select '5',    'bbc',  to_date('3/4/2020 4:31:41  PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/19/2020 2:47:07 PM','mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
       select '6',    'bbc',  to_date('3/19/2020 2:47:08 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/19/2020 2:50:36 PM','mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
       select '7',    'bbc',  to_date('3/19/2020 2:50:37 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('3/20/2020 2:56:23 PM','mm/dd/yyyy hh:mi:ss AM'),'N' from dual union all
       select '8',    'bbc',  to_date('3/20/2020 2:56:24 PM','mm/dd/yyyy hh:mi:ss AM'), to_date('12/31/4712',          'mm/dd/yyyy hh:mi:ss AM'),'Y' from dual 
    )
    ,tabs as (
       -- combining both tables: key2 is null for rows from table1 and key1 is null for rows from table2
       select key1,null as key2,ID,Valid_From, Valid_To, IS_LATEST_FLAG from table_1
       union all
       select null,key2,ID,Valid_From, Valid_To, IS_LATEST_FLAG from table_2
    )
    ,time_points as (
       -- using UNPIVOT to get all time points from all intervals from both tables
       select distinct
      id,
      --pt_type,IS_LATEST_FLAG,
      case when pt_type='VALID_FROM' then pt else pt + interval '1' second end as pit
       from (select distinct id,Valid_From, Valid_To,IS_LATEST_FLAG from tabs)
       unpivot (
     pt for pt_type in (Valid_From, Valid_To)
       )
    )
    ,intervals as (
       -- combining time points into intervals:
       select 
     id,row_number()over(partition by id order by pit) rnk
    ,pit as Valid_From
    ,lead(pit)over(partition by id order by pit)-interval '1'second as Valid_To
       from time_points
    )
    -- main query:
    -- simply joining table1 and table2 to our intervals:
    select
       i.id,
       i.rnk,
       i.valid_from,
       i.valid_to,
       t1.key1,
       t2.key2,
       least(nvl(t1.is_latest_flag,'N'),nvl(t2.is_latest_flag,'N')) as is_latest_flag
    from intervals i
     left join table_1 t1
          on  i.id = t1.id
          and i.valid_from between t1.valid_from and t1.valid_to
     left join table_2 t2
          on  i.id = t2.id
          and i.valid_from between t2.valid_from and t2.valid_to
    where i.valid_to is not null
    order by i.id,i.valid_from,i.valid_to;

    Results:

    ID         RNK VALID_FROM          VALID_TO            KEY1  KEY2  IS_LATEST_FL
    --- ---------- ------------------- ------------------- ----- ----- ------------
    abc          1 2020-03-02 00:00:00 2020-03-02 23:59:59 1           N
    abc          2 2020-03-03 00:00:00 2020-03-03 18:01:12 1     1     N
    abc          3 2020-03-03 18:01:13 2020-03-03 18:01:13 2     1     N
    abc          4 2020-03-03 18:01:14 2020-03-03 18:01:14 2     2     N
    abc          5 2020-03-03 18:01:15 2020-03-03 18:01:15 3     3     N
    abc          6 2020-03-03 18:01:16 2020-03-03 18:01:51 4     3     N
    abc          7 2020-03-03 18:01:52 2020-03-04 16:28:59 5     3     N
    abc          8 2020-03-04 16:29:00 2020-03-04 16:31:40 6     4     N
    abc          9 2020-03-04 16:31:41 4712-12-31 00:00:00 7     4     Y
    bbc          1 2020-03-04 16:31:41 2020-03-19 14:47:07       5     N
    bbc          2 2020-03-19 14:47:08 2020-03-19 14:50:36 8     6     N
    bbc          3 2020-03-19 14:50:37 2020-03-19 14:56:23 9     7     N
    bbc          4 2020-03-19 14:56:24 2020-03-20 14:56:23 10    7     N
    bbc          5 2020-03-20 14:56:24 4712-12-31 00:00:00 10    8     Y
    
    14 rows selected.