Search code examples
sqlsql-serverjoinleft-joinrow-number

Join query where row_number +1


My table contains rows, where [time_start] > [time_end] I want to rewrite the query so that the values [time_start] and [time_end] displayed correctly as shown below.

14, 2022-03-16, 2022-03-16 23:30:33.000,    2022-03-17 00:36:52.000
14, 2022-03-17, 2022-03-17 23:31:00.000,    2022-03-18 00:59:38.000
14, 2022-03-18, NULL                        ,NULL
14, 2022-03-19, 2022-03-19 23:30:51.000,    2022-03-19 23:38:05.000

I tried to connect via ROW_NUMBER, but I need to take into account the +1 row condition above. How can this be resolved? Thanks

CREATE TABLE [dbo].[#tmp_time](
    [num_wf] [varchar](50) NULL,
    [dt] [date] NULL,
    [time_start] [datetime] NULL,
    [time_end] [datetime] NULL
) ON [PRIMARY]

INSERT INTO [dbo].[#tmp_time] ([num_wf],[dt],[time_start],[time_end])
VALUES (14, '2022-03-16',   '20220316 23:30:33',    '20220316 01:23:40')
INSERT INTO [dbo].[#tmp_time] ([num_wf],[dt],[time_start],[time_end])
VALUES (14, '2022-03-17',   '20220317 23:31:00',    '20220317 00:36:52')
INSERT INTO [dbo].[#tmp_time] ([num_wf],[dt],[time_start],[time_end])
VALUES (14, '2022-03-18',   NULL,               '20220318 00:59:38')
INSERT INTO [dbo].[#tmp_time] ([num_wf],[dt],[time_start],[time_end])
VALUES (14, '2022-03-19',   '20220319 23:30:51',    '20220319 23:38:05')

select t1.[num_wf], t1.[dt], t1.[time_start], t2.[time_end]
from (
    select [num_wf],[dt], [time_start]
        ,ROW_NUMBER() OVER (PARTITION BY [num_wf] ORDER BY [dt], [time_start]) as rn_start
    from [dbo].[#tmp_time]
)t1
LEFT JOIN (
    select [num_wf],[time_end]
        ,ROW_NUMBER() OVER (PARTITION BY [num_wf] ORDER BY [dt], [time_end]) as rn_end
    from [dbo].[#tmp_time]
)t2 ON t1.[num_wf]=t2.[num_wf] AND t1.[rn_start]=t2.[rn_end] --AND t1.[time_start] < t2.[time_end]

Solution

  • Below will gives you the expected result.

    It uses LEAD() window function to obtain the next row time_end value

    select *,
           new_time_end = CASE WHEN [time_start] IS NULL
                               THEN NULL
                               WHEN [time_end] < [time_start]
                               THEN LEAD([time_end]) OVER (PARTITION BY num_wf 
                                                               ORDER BY dt)
                               ELSE [time_end]
                               END
    from   #tmp_time