Search code examples
sqlsql-serverwhile-loopcursor

Can someone help me with replacing the cursor in the following SQL code


This is the SQL code that goes through a table that contains various records with a start and end data like a slowly changing dimension. While checking the dimension we found out that sometimes the start date is not following the end date. So we loop the table and recreate the start/end dates to make sure the start/end date is actually the min/max of the available dates. But the process takes too long to work through 9 million rows. I would love to use a while loop but can't figure out to retain the min/max value and detect the switch in ID.

DECLARE @MINDDEB Datetime
DECLARE @DDEB Datetime
DECLARE @DFIN Datetime
DECLARE @CECV float
DECLARE @AIDNEBHNPRPEP int
DECLARE @CETA float
DECLARE @AIDNEBHNPRAFI int
DECLARE @MAXDFIN Datetime
DECLARE @OLDCECV float
DECLARE @OLDAIDNEBHNPRPEP int
DECLARE @OLDCETA float
DECLARE @OLDAIDNEBHNPRAFI int

SET @MINDDEB=9999-12-31
SET @MAXDFIN=1835-12-31
drop table dbo.TBYDWHTEBHNPRAFIVRS_CURSORTABLE
--T2
SELECT DDEB, DFIN, CETA,AIDNEBHNPRAFI 
into dbo.TBYDWHTEBHNPRAFIVRS_CURSORTABLE
FROM dbo.TBYDWHTEBHNPRAFIVRS
WHERE  AIDNEBHNPRAFI=-1
ORDER BY AIDNEBHNPRAFI,DDEB

DECLARE T2_CURSOR CURSOR FOR 
select   DDEB, DFIN, CETA,AIDNEBHNPRAFI 
from TBYDWHTEBHNPRAFIVRS
where DFIN>DDEB and BRRDDEL<>1 and CSTUVRS = 1 --and AIDNEBHNPRAFI in (1080,1033143,2311904)
order by AIDNEBHNPRAFI,DDEB,CETA,CECV


OPEN T2_CURSOR  

FETCH NEXT FROM T2_CURSOR   
INTO @DDEB,  @DFIN, @CETA,@AIDNEBHNPRAFI
SET @OLDAIDNEBHNPRAFI=@AIDNEBHNPRAFI
SET @OLDCETA=@CETA  
--SET   @OLDCECV=@CECV

WHILE @@FETCH_STATUS = 0  
BEGIN  

    if ( @OLDCETA=@CETA and @OLDAIDNEBHNPRAFI=@AIDNEBHNPRAFI) 
    BEGIN
        --print 'entering first if'
        IF (@MINDDEB>@DDEB) 
        BEGIN
            SET @MINDDEB=@DDEB
        END
        IF (@MAXDFIN<@DFIN) 
        BEGIN       
            SET @MAXDFIN=@DFIN
        END
    END
    --print ' OLD CECV '+convert(varchar(10),@OLDCECV) +' new CECV '+ convert(varchar(10),@CECV) +' OLDAID '+ convert(varchar(10),@OLDAIDNEBHNPRPEP) +' NEWAID '+ convert(varchar(10),@AIDNEBHNPRPEP)
    -- +' NEWDDEB '+ convert(varchar(10),@DDEB,101) +' MINDDEB '+ convert(varchar(10),@MINDDEB,101) +' NEWDFIn '+ convert(varchar(10),@DFIN,101) +' MAXDFIN '+ convert(varchar(10),@MAXDFIN,101)



 FETCH NEXT FROM T2_CURSOR   
    INTO   @DDEB,  @DFIN, @CETA,@AIDNEBHNPRAFI

    if ( @OLDCETA<>@CETA or @OLDAIDNEBHNPRAFI<>@AIDNEBHNPRAFI) 
    BEGIN
        INSERT INTO TBYDWHTEBHNPRAFIVRS_CURSORTABLE (DDEB, DFIN, CETA,AIDNEBHNPRAFI )
        VALUES (@MINDDEB,@MAXDFIN,@OLDCETA,@OLDAIDNEBHNPRAFI)
        SET @OLDAIDNEBHNPRAFI=@AIDNEBHNPRAFI
        --SET   @OLDCECV=@CECV
        SET @OLDCETA=@CETA
        SET @MINDDEB=@DDEB
        SET @MAXDFIN=@DFIN
    END


END   
INSERT INTO TBYDWHTEBHNPRAFIVRS_CURSORTABLE (DDEB, DFIN, CETA,AIDNEBHNPRAFI )
        VALUES (@MINDDEB,@MAXDFIN,@OLDCETA,@OLDAIDNEBHNPRAFI)
CLOSE T2_CURSOR;  
DEALLOCATE T2_CURSOR;  

This is the source data
DDEB DFIN CETA AIDNEBHNPRAFI
2006-03-01 00:00:00.000 2006-04-30 23:59:59.000 1 231272
2006-05-01 00:00:00.000 2006-11-30 23:59:59.000 1 231272
2006-12-01 00:00:00.000 2007-04-30 23:59:59.000 1 231272
2007-05-01 00:00:00.000 2008-04-30 23:59:59.000 1 231272
2008-05-01 00:00:00.000 2008-08-31 23:59:59.000 1 231272
2008-09-01 00:00:00.000 2008-10-31 23:59:59.000 2 231272
2008-11-01 00:00:00.000 2009-04-30 23:59:59.000 1 231272
2009-05-01 00:00:00.000 2010-01-31 23:59:59.000 1 231272
2010-02-01 00:00:00.000 9999-12-31 23:59:59.000 14 231272

and this is the result we get with the cursor

DDEB DFIN CETA AIDNEBHNPRAFI
2006-03-01 00:00:00.000 2008-08-31 23:59:59.000 1 231272
2008-09-01 00:00:00.000 2008-10-31 23:59:59.000 2 231272
2008-11-01 00:00:00.000 2010-01-31 23:59:59.000 1 231272
2010-02-01 00:00:00.000 9999-12-31 23:59:59.000 14 231272


Solution

  • Here is an attempt. The results returned match, but you'll want to test it with more data. I also made assumptions on the datatypes. If yours are different, you'll need to update accordingly. The CTEs identify the intervals that do not have a sequential partner (different of 1 second between the stop and the next start) for the start and stop times. It then assigns sequence numbers to those times and joins the start & stop times based on the sequence.

    In detail, the 3/1/2006 start time does not have an immediately preceding stop time so it is included and is the first start sequence for its group. 11/1/2008 meets the same criteria and is the next start sequence for its group. 8/31/2008 is the first stop time that does not have a proceeding start time and is the first stop sequence for its group. Since 3/1/2006 & 8/31/2008 are both the first sequence in their group, they mark the start and stop for the first unbroken interval for the group.

    IF OBJECT_ID('tempdb..#temp') IS NOT NULL
      DROP TABLE #temp;
    GO
    
    CREATE TABLE #temp (
                         DDEB DATETIME,
                         DFIN DATETIME,
                         CETA INT,
                         AIDNEBHNPRAFI INT
                       );
    
    INSERT INTO #temp
    VALUES ('2006-03-01 00:00:00.000', '2006-04-30 23:59:59.000', '1', '231272'),
    (' 2006-05-01 00:00:00.000', '2006-11-30 23:59:59.000', '1', '231272'),
    (' 2006-12-01 00:00:00.000', '2007-04-30 23:59:59.000', '1', '231272'),
    (' 2007-05-01 00:00:00.000', '2008-04-30 23:59:59.000', '1', '231272'),
    (' 2008-05-01 00:00:00.000', '2008-08-31 23:59:59.000', '1', '231272'),
    (' 2008-09-01 00:00:00.000', '2008-10-31 23:59:59.000', '2', '231272'),
    (' 2008-11-01 00:00:00.000', '2009-04-30 23:59:59.000', '1', '231272'),
    (' 2009-05-01 00:00:00.000', '2010-01-31 23:59:59.000', '1', '231272'),
    (' 2010-02-01 00:00:00.000', '9999-12-31 23:59:59.000', '14', '231272');
    WITH Starts AS (
    SELECT  *, RowSeq = ROW_NUMBER() OVER(PARTITION BY ceta, t.AIDNEBHNPRAFI ORDER BY ddeb)
    FROM    #temp AS t
    WHERE   NOT EXISTS ( SELECT 1
                         FROM   #temp AS It
                         WHERE  t.CETA = It.CETA
                                AND t.AIDNEBHNPRAFI = It.AIDNEBHNPRAFI
                                AND It.DFIN = DATEADD(SECOND, -1, t.DDEB)
                       )
    ), Finishes AS (
    
    SELECT  *, RowSeq = ROW_NUMBER() OVER(PARTITION BY ceta, t.AIDNEBHNPRAFI ORDER BY dfin)
    FROM    #temp AS t
    WHERE   NOT EXISTS ( SELECT 1
                         FROM   #temp AS It
                         WHERE  t.CETA = It.CETA
                                AND t.AIDNEBHNPRAFI = It.AIDNEBHNPRAFI
                                AND t.DFIN = DATEADD(SECOND, -1, it.DDEB)
                       )
    )
    SELECT Starts.ddeb, Finishes.DFIN,  starts.ceta, starts.AIDNEBHNPRAFI
    FROM Starts
    INNER JOIN Finishes
    ON Starts.AIDNEBHNPRAFI = Finishes.AIDNEBHNPRAFI
    AND Starts.CETA = Finishes.CETA
    AND Starts.RowSeq = Finishes.RowSeq
    ORDER BY Starts.ddeb
    

    Results:

    DDEB    DFIN    CETA    AIDNEBHNPRAFI
    
    2006-03-01 00:00:00.000 2008-08-31 23:59:59.000 1   231272
    
    2008-09-01 00:00:00.000 2008-10-31 23:59:59.000 2   231272
    
    2008-11-01 00:00:00.000 2010-01-31 23:59:59.000 1   231272
    
    2010-02-01 00:00:00.000 9999-12-31 23:59:59.000 14  231272