Search code examples
sqlsql-servert-sqlcommon-table-expressionrecursive-cte

SQL Server CTE loop; insert all record together


I have this situation:

drop table #t1;
drop table #t2

select * 
into #t1
from
    (select 'va1'c1,'vb1'c2,'vc1'c3 union all
     select 'va2'c1,'vb2'c2,'vc2'c3 union all
     select 'va3'c1,'vb3'c2,'vc3'c3 union all
     select 'va1'c1,'vb1'c2,'vc1'c3 union all
     select 'va2'c1,'vb2'c2,'vc2'c3 union all
     select 'va3'c1,'vb3'c2,'vc3'c3 union all
     select 'va1'c1,'vb1'c2,'vc1'c3 union all
     select 'va2'c1,'vb2'c2,'vc2'c3 union all
     select 'va3'c1,'vb3'c2,'vc3'c3 union all
     select 'va1'c1,'vb1'c2,'vc1'c3 union all
     select 'va2'c1,'vb2'c2,'vc2'c3 union all
     select 'va3'c1,'vb3'c2,'vc3'c3 union all
     select 'va4'c1,'vb4'c2,'vc4'c3) t

select *
into #t2
from #t1
where 0 = 1

;with tmp1 as
(
    select 
        t1.*,
        ROW_NUMBER() over (partition by t1.c1 order by (select null)) r
    from 
        #t1 t1
    left join 
        #t2 t2 on t1.c1 = t2.c1
    where 
        t2.c1 is null   
), tmp2 as
(
    select 
        0 n,*
    from 
        tmp1
    union all
    select 
        n+1 n, t1.c1, t1.c2, t1.c3, t1.r
    from 
        tmp2 t1
    join 
        tmp1 t2 on t1.c1 = t2.c1
                and t2.r = t1.r + 1
    where 
        n < 10
)
--insert #t2
select c1, c2, c3  --,r
from tmp2

When I run this, it selects everything just fine (103 records).

The problem is when I this code to insert into #t2 (13 records!!!)

I think SQL runs step by step and insert records during running and than my condition in tmp1 is over...

How to resolve it?

My goal is to check if data exists, than loop and insert results...but SQL stops after 1st cycle...


Solution

  • You could use MERGE:

    select * into #t1
    from(
    select 'va1'c1,'vb1'c2,'vc1'c3 union all
    select 'va2'c1,'vb2'c2,'vc2'c3 union all
    select 'va3'c1,'vb3'c2,'vc3'c3 union all
    select 'va1'c1,'vb1'c2,'vc1'c3 union all
    select 'va2'c1,'vb2'c2,'vc2'c3 union all
    select 'va3'c1,'vb3'c2,'vc3'c3 union all
    select 'va1'c1,'vb1'c2,'vc1'c3 union all
    select 'va2'c1,'vb2'c2,'vc2'c3 union all
    select 'va3'c1,'vb3'c2,'vc3'c3 union all
    select 'va1'c1,'vb1'c2,'vc1'c3 union all
    select 'va2'c1,'vb2'c2,'vc2'c3 union all
    select 'va3'c1,'vb3'c2,'vc3'c3 union all
    select 'va4'c1,'vb4'c2,'vc4'c3 
    )t;
    
    select * into #t2 from #t1 where 0=1;
    
    ;with tmp1 as(
        select t1.*, ROW_NUMBER()over(partition by t1.c1 order by(select null))r
        from #t1 t1
        left join #t2 t2 
          on t1.c1=t2.c1
        where t2.c1 is null 
    ),tmp2 as (
        select 0 n,*
        from tmp1
        union all
        select n+1 n,t1.c1,t1.c2,t1.c3,t1.r
        from tmp2 t1
        join tmp1 t2
          on t1.c1=t2.c1
         and t2.r=t1.r+1
        where n<10
    )
    MERGE #t2
    USING tmp2
      ON #t2.c1 = tmp2.c1
    WHEN NOT MATCHED THEN
      INSERT VALUES (tmp2.c1, tmp2.c2, tmp2.c3);
    
    SELECT @@ROWCOUNT;
    -- 103
    

    DBFiddle Demo


    EDIT:

    Kudos to Bartosz Ratajczyk for examining this case:

    It turns out it is related to lazy/eager table/index spooling. There are at least two more ways to force SQL Server to generate different execution plan:

    a) By using TOP (100) PERCENT

    DECLARE @n INT = 100;
    
    ;with tmp1 as (
        select t1.*,
               ROW_NUMBER()over(partition by t1.c1 order by(select null))r
        from #t1 t1
        left join #t2 t2 
          on t1.c1=t2.c1
        where t2.c1 is null 
    ),tmp2 as
    (
        select 0 n,*
        from tmp1
        union all
        select n+1 n,t1.c1,t1.c2,t1.c3,t1.r
        from tmp2 t1
        join tmp1 t2
          on t1.c1=t2.c1
         and t2.r=t1.r+1
        where n<10
    )
    insert #t2
    select TOP (@n) PERCENT c1, c2, c3  --,r
    from tmp2
    
    SELECT @@ROWCOUNT;
    

    b) By using ORDER BY .. OFFSET 0 ROWS:

    ;with tmp1 as(
        select t1.*,
               ROW_NUMBER()over(partition by t1.c1 order by(select null))r
        from #t1 t1
        left join #t2 t2 
          on t1.c1=t2.c1
        where t2.c1 is null 
    ),tmp2 as
    (
        select 0 n,*
        from tmp1
        union all
        select n+1 n,t1.c1,t1.c2,t1.c3,t1.r
        from tmp2 t1
        join tmp1 t2
          on t1.c1=t2.c1
         and t2.r=t1.r+1
        where n<10
    )
    insert #t2
    select c1, c2, c3  --,r
    from tmp2
    ORDER BY 1 OFFSET 0 ROWS;
    
    SELECT @@ROWCOUNT;
    

    db<>fiddle demo2


    Addendum: How does the recursive CTE work? by Bartosz Ratajczyk