Search code examples
sqlsql-servert-sqlgaps-and-islandssql-server-2019

Grouping result in continuous periods (dates)


I have the following SQL (including test data)

IF  EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[TEST]') AND type in (N'U'))
    DROP TABLE [dbo].[TEST]

CREATE TABLE [dbo].[TEST](
    [CPR] [varchar](50) NULL,
    [Period_start] [date] NULL,
    [Period_End] [date] NULL,
    [Funktion] [varchar](50) NULL,
    [Gruppe] [varchar](50) NULL
) ON [PRIMARY]
GO
INSERT [dbo].[TEST] ([CPR], [Period_start], [Period_End], [Funktion], [Gruppe]) VALUES 
    (N'111111-1111', CAST(N'2022-09-13' AS Date), CAST(N'2022-09-13' AS Date), N'53859', N'002'),
    (N'111111-1111', CAST(N'2022-12-21' AS Date), CAST(N'2022-12-21' AS Date), N'53859', N'002'),
    (N'111111-1111', CAST(N'2022-12-31' AS Date), CAST(N'2022-12-31' AS Date), N'53859', N'002'),
    (N'111111-1111', CAST(N'2022-12-31' AS Date), CAST(N'2022-12-31' AS Date), N'53859', N'002'),
    (N'111111-1111', CAST(N'2022-07-11' AS Date), CAST(N'2022-07-11' AS Date), N'53859', N'002'),
    (N'111111-1111', CAST(N'2022-07-11' AS Date), CAST(N'2022-07-11' AS Date), N'53859', N'002'),
    (N'111111-1111', CAST(N'2022-07-13' AS Date), CAST(N'2022-07-13' AS Date), N'53859', N'002'),
    (N'111111-1111', CAST(N'2022-07-14' AS Date), CAST(N'2022-07-14' AS Date), N'53859', N'002'),
    (N'111111-1111', CAST(N'2022-07-22' AS Date), CAST(N'2022-07-22' AS Date), N'53859', N'002'),
    (N'111111-1111', CAST(N'2022-08-17' AS Date), CAST(N'2022-08-17' AS Date), N'53859', N'002'),
    (N'111111-1111', CAST(N'2022-11-07' AS Date), CAST(N'2022-11-07' AS Date), N'53859', N'002'),
    (N'111111-1111', CAST(N'2022-11-24' AS Date), CAST(N'2022-11-24' AS Date), N'53859', N'002')

GO 

;with CTE_BASE as
(
    SELECT 
        CPR,
        DATEFROMPARTS(YEAR(Period_start), MONTH(Period_start), 1) Period_start,
        EOMONTH(Period_End) Period_End,
        EOMONTH(LAG(Period_End) OVER (PARTITION BY o.CPR, Funktion, o.Gruppe ORDER BY Period_start, Period_End)) Previous_EOM_Period_End
    FROM 
        TEST AS O
)
SELECT
   * ,  
     CASE Previous_EOM_Period_End
        WHEN  NULL THEN 0
        WHEN Period_End THEN 1
        WHEN DATEADD(Day, -1, Period_start) then 1 
        ELSE 0
     END AS TEST    
FROM 
    CTE_BASE

and here is the result

CPR Ydelse_startdato    Ydelse_slutdato Previous_EOM_Ydelse_slutdato    TEST
CPR Period_start    Period_End  Previous_EOM_Period_End TEST
111111-1111 2022-07-01  2022-07-31  NULL    0
111111-1111 2022-07-01  2022-07-31  2022-07-31  1
111111-1111 2022-07-01  2022-07-31  2022-07-31  1
111111-1111 2022-07-01  2022-07-31  2022-07-31  1
111111-1111 2022-07-01  2022-07-31  2022-07-31  1
111111-1111 2022-08-01  2022-08-31  2022-07-31  1
111111-1111 2022-09-01  2022-09-30  2022-08-31  1
111111-1111 2022-11-01  2022-11-30  2022-09-30  0
111111-1111 2022-11-01  2022-11-30  2022-11-30  1
111111-1111 2022-12-01  2022-12-31  2022-11-30  1
111111-1111 2022-12-01  2022-12-31  2022-12-31  1
111111-1111 2022-12-01  2022-12-31  2022-12-31  1

I'm trying to locate continuous groups. A line belong to the same group if Period_End has the same value at in the previous line or Period_start is the day after Period_End

In this test we have two groups: Line 1-7 and line 8-12

I have tried to describe it in the TEST column


Solution

  • You need the following conditional running count

    ;with CTE_BASE as
    (
        SELECT 
            CPR,
            Funktion,
            Gruppe,
            DATEFROMPARTS(YEAR(Period_start), MONTH(Period_start), 1) Period_start,
            EOMONTH(Period_End) Period_End,
            EOMONTH(LAG(Period_End) OVER (PARTITION BY o.CPR, Funktion, o.Gruppe ORDER BY Period_start, Period_End)) Previous_EOM_Period_End
        FROM 
            TEST AS O
    )
    SELECT
      * ,  
      COUNT(CASE WHEN Previous_EOM_Period_End IS NULL OR Previous_EOM_Period_End < DATEADD(Day, -1, Period_start) then 1 END)
        OVER (PARTITION BY CPR, Funktion, Gruppe ORDER BY Period_start, Period_End ROWS UNBOUNDED PRECEDING)   
    FROM 
        CTE_BASE;
    

    db<>fiddle