Search code examples
sql-servert-sqlduplicatestemp-tables

Delete duplicate rows and its dependencies in other tables by using Temporary tables


I have a product table wish contains a duplicated rows and its uniq Id is referenced in other 6 tables.

I want to delete these duplicate rows in the product table and inside the other tables that depends on my product.

I think about using temporary tables to:

  • create global temporary table for each table related to my product
  • get duplicate records
  • get its dependencies
  • save the first rows with min Id
  • delete the other rows

I have this idea, but I don't know how to well implement it.

I don't know, if the choice of temp tables is right or not.

Thanks for your help and advises.


Solution

  • create table dbo.hasduplicates
    (
        id int identity,
        --assume colA, colB is the entity/unique combo
        colA varchar(10),
        colB int,
        someOtherColumn varchar(40)
    );
    
    
    insert into dbo.hasduplicates(colA, colB, someOtherColumn)
    values
    ('A', 1, 'A1 - 1'),
    ('A', 1, 'A1 - 2'),
    ('A', 1, 'A1 - 3'),
    --
    ('A', 2, 'A2 - 1'),
    ('A', 2, 'A2 - 2'),
    --
    ('B', 1, 'B1 - 1'),
    ('B', 1, 'B1 - 2'),
    ('B', 1, 'B1 - 3');
    
    
    select *
    from dbo.hasduplicates;
    
    
    --temp table holding the to-be-deleted ids (of the duplicates)
    create table #ToBedeleted(IdToDelete int);
    
    with dup
    as
    (
        select *, row_number() over (partition by colA, colB /*<--cols of your entity go here*/ order by id) as RowNum
        from dbo.hasduplicates
    )
    insert into #ToBedeleted(IdToDelete)
    select Id
    from dup
    where RowNum >= 2;
    
    --contains the ids for deletion
    select * from #ToBedeleted;
    
    --cleanup the referencing tables
    /*
    DELETE FROM dbo.Table1 WHERE Table1Id IN (SELECT IdToDelete FROM #ToBedeleted);
    DELETE FROM dbo.Table2 WHERE Table2Id IN (SELECT IdToDelete FROM #ToBedeleted);
    .............
    DELETE FROM dbo.Table6 WHERE Table6Id IN (SELECT IdToDelete FROM #ToBedeleted);
    --finally cleanup your products table
    DELETE FROM dbo.hasduplicates WHERE Id IN (SELECT IdToDelete FROM #ToBedeleted);
    */
    
    --/*
    drop table #ToBedeleted;
    drop table dbo.hasduplicates;
    --*/