Search code examples
t-sqlsql-server-2016

Group Rows by Date


I have been asked to group transactions that have occurred within two minutes of one another into separate "date groups". I'm having a tough time wrapping my head around how to split the data by minutes.

I wrote a test table below along with expected "date groups"

CREATE TABLE #tmpData
(ID int,actionDt datetime)

INSERT INTO #tmpData
(ID,actionDt)
VALUES
(1, '7/22/2021 9:51'),
(1, '7/22/2021 9:52'),
(1, '7/22/2021 9:55'),
(1, '7/22/2021 9:56'),
(1, '7/22/2021 9:57'),
(1, '7/22/2021 9:58'),
(1, '7/22/2021 10:00'),
(1, '7/22/2021 10:10'),
(2, '7/22/2021 8:38'),
(2, '7/22/2021 8:39'),
(2, '7/22/2021 8:40'),
(2, '7/22/2021 12:05')

the expected "date groups" should be

id DtGroup
1 7/22/2021 9:51
1 7/22/2021 9:55
1 7/22/2021 9:58
1 7/22/2021 10:10
2 7/22/2021 8:38
2 7/22/2021 12:05

I wrote the below which gets close but the timestamps 9:58 and 10:00 for ID 1 should be their own date group.

;with resultSet AS
(
    SELECT a.id, a.actiondt, a.diff  FROM
    (
        SELECT 
            id, 
            actiondt, 
            diff = datediff(mi,lag(actiondt) OVER (PARTITION BY id ORDER BY actiondt),actiondt)
        FROM #tmpdata
    ) AS a 
    WHERE diff IS NULL OR diff > 2
)


SELECT 
    t.id, 
    t.actiondt AS currDt, 
    resultset.actiondt AS DtGrp
from #tmpdata t
left JOIN resultset
on t.id = resultset.id 
    and t.actiondt between dateadd(mi,-2,resultset.actiondt) and dateadd(mi,2,resultset.actiondt)

Solution

  • Is the logic 2 minutes or greater between actionDt? If so should the expected results be the following? Otherwise, I don't understand your expected results to try to help.

    (1, '7/22/2021 9:51'),
    (1, '7/22/2021 9:55'),
    (1, '7/22/2021 9:57'),
    (1, '7/22/2021 10:00'),
    (1, '7/22/2021 10:10'),
    
    (2, '7/22/2021 8:38'),
    (2, '7/22/2021 8:40'),
    (2, '7/22/2021 12:05')
    

    This will give you the above results.

    declare @Count int, 
    @LoopCount int,
    @PrevMinutes int,
    @CurrentMinutes int,
    @NextMinutes int,
    @PrevFlag int,
    @Flag int
    
    ;with dID as (
        select distinct id,
        actiondt
        from #tmpData
    )
    
    select 
    identity(int,1,1) RowNo, 
    d.id as Grp,
    d.actiondt, 
    lag(DATEPART(mi, d.actiondt), 1,0) over (order by d.actiondt) as PrevMinutes,
    DATEPART(mi, d.actiondt) as CurrentMinutes,
    lead(DATEPART(mi, d.actiondt), 1,0) over (order by d.actiondt) as NextMinutes,
    0 as  flag
    into #Temp
    from #tmpData d
    full outer join dID on d.id = dID.id
                        and d.actiondt = dID.actiondt
    order by d.id, actiondt 
    
    select @Count = @@RowCount 
    set @LoopCount = 1 
    
    while @LoopCount <= @Count 
    begin 
        set @PrevMinutes = (select PrevMinutes from #Temp where RowNo = @LoopCount)
        set @CurrentMinutes = (select CurrentMinutes from #Temp where RowNo = @LoopCount)
        set @NextMinutes = (select NextMinutes from #Temp where RowNo = @LoopCount)
        set @Flag = (select flag from #Temp where RowNo = @LoopCount)
        set @PrevFlag = (select flag from #Temp where RowNo = @LoopCount - 1)
        
        if (@LoopCount = 1)
        begin
            update #Temp
            set flag = 1 
            where RowNo = 1
        end
        else if (abs(@PrevMinutes - @CurrentMinutes) >= 2)
        begin
            update #Temp
            set flag = 1 
            where RowNo = @LoopCount
        end
        else if (@PrevFlag = 0 and @Flag = 0)
        begin
            update #Temp
            set flag = 1 
            where RowNo = @LoopCount
        end
    
        set @LoopCount=@LoopCount + 1 
    end 
    
    select Grp as id,
    actiondt
    from #Temp
    where flag = 1