Search code examples
sql-servergroupinggaps-and-islands

Grouping in SQL Server


I have data similar to below. I need to group the status column based on the occurrence of data.

Id  Status  Value
1     K      1
2     K      3
3     K      2
4     B      2
5     B      3
6     K      6
7     J      5
8     J      2

I want data as below

Status  Value
K       6
B       5
K       6
J       7

I need the cumulative sum of the value column.In the status column if the data is same consecutively, then I need to add the value columns. I cannot apply group by. In the example given K is repeated twice, because they are not consecutive.

I have tried below query, but it doesn't work as required.

select Status,
       (select sum(value)
        from table t2
        where 
          t2.Status = t.Status and
           t2.SNO <= t.SNO
       ) as total
      
from table t;

Solution

  • This is a Gaps and Islands Question

    I tackle these by using the incrementing Id and combing this with ROW_NUMBER window function

    --Using a CTE just to replicate the sample data
    ;WITH cteX (Id,Status,Value)
    AS(
        SELECT 1,'K', 1 UNION ALL
        SELECT 2,'K', 3 UNION ALL
        SELECT 3,'K', 2 UNION ALL
        SELECT 4,'B', 2 UNION ALL
        SELECT 5,'B', 3 UNION ALL
        SELECT 6,'K', 6 UNION ALL
        SELECT 7,'J', 5 UNION ALL
        SELECT 8,'J', 2
    )
    SELECT
          Grp = Id - ROW_NUMBER()OVER( PARTITION BY X.Status ORDER BY X.Id) 
        , X.Id
        , X.Status
        , X.Value
    FROM 
        cteX X
    ORDER BY
        X.Id
    

    This gives this result set, note the Grp column

    Grp     Id      Status  Value
    ------- ------- ------- -------
    0       1       K       1
    0       2       K       3
    0       3       K       2
    3       4       B       2
    3       5       B       3
    2       6       K       6
    6       7       J       5
    6       8       J       2
    

    Then combine with a CTE or derived table you can get your expected output

    --Using a CTE just to replicate the sample data
    ;WITH cteX (Id,Status,Value)
    AS(
        SELECT 1,'K', 1 UNION ALL
        SELECT 2,'K', 3 UNION ALL
        SELECT 3,'K', 2 UNION ALL
        SELECT 4,'B', 2 UNION ALL
        SELECT 5,'B', 3 UNION ALL
        SELECT 6,'K', 6 UNION ALL
        SELECT 7,'J', 5 UNION ALL
        SELECT 8,'J', 2
    )
    SELECT Y.Status
         , Value = SUM(Y.Value)
    FROM
    (
        SELECT TOP 100 PERCENT
              Grp = Id - ROW_NUMBER()OVER( PARTITION BY X.Status ORDER BY X.Id) 
            , X.Id
            , X.Status
            , X.Value
        FROM 
            cteX X
        ORDER BY
            X.Id
    ) Y
    GROUP BY
        Y.Grp, Y.Status
    

    Output

    Status  Value
    ------- -------
    B       5
    J       7
    K       6
    K       6
    

    Update Question include "Preserve order" solution

    Just include an Order by MIN(Id)

    --Using a CTE just to replicate the sample data
    ;WITH cteX (Id,Status,Value)
    AS(
        SELECT 1,'K', 1 UNION ALL
        SELECT 2,'K', 3 UNION ALL
        SELECT 3,'K', 2 UNION ALL
        SELECT 4,'B', 2 UNION ALL
        SELECT 5,'B', 3 UNION ALL
        SELECT 6,'K', 6 UNION ALL
        SELECT 7,'J', 5 UNION ALL
        SELECT 8,'J', 2
    )
        SELECT 
             Y.[Status]
            ,[Value] = SUM(Y.[Value])
        FROM
        (
            SELECT
                  Grp = Id - ROW_NUMBER()OVER( PARTITION BY X.[Status] ORDER BY X.Id) 
                , X.Id
                , X.[Status]
                , X.[Value]
            FROM 
                cteX X
        ) Y
        GROUP BY
            Y.Grp, Y.[Status]
        ORDER BY
            MIN(Y.Id) --preserve the Status Order
    

    Output

    Status  Value
    ------- -------
    K       6
    B       5
    K       6
    J       7