Search code examples
sql-servert-sqlquery-performance

Improving Query looking at multiple tables for existence


Given the following table definition:

CREATE TABLE [dbo].[ConsolidatedRecords]
(
    [SessionKey]   UNIQUEIDENTIFIER NOT NULL PRIMARY KEY CLUSTERED
  , [EntityID]     UNIQUEIDENTIFIER NOT NULL FOREIGN KEY REFERENCES [dbo].        [EntityList] ( EntityID ) 
  , [EntityName]   NVARCHAR(128)    NOT NULL
  , [SurrogateKey] UNIQUEIDENTIFIER NOT NULL
  , [RecordID]     UNIQUEIDENTIFIER NOT NULL
  , PRIMARY KEY ( [SessionKey], [EntityName], [RecordID], [SurrogateKey] ) CLUSTERED
);
GO

CREATE #CurrentSession TABLE ([SessionKey] UNIQUEIDENTIFIER NOT NULL);

INSERT INTO #CurrentSession VALUES (NEWID());

-- ... long, involved process to populate ConsolidatedRecords

I have a query against ConsolidatedRecords table that checks for existence of specific entity records which works, but is hard to maintain and frankly is ugly. I am attempting to fix these issues but am not being vary successful, so I am turning to you:

SELECT [SessionKey]   = records.[SessionKey]
     , [SurrogateKey] = records.[SurrogateKey]
  FROM [dbo].[ConsolidatedRecords] records
  JOIN #CurrentSession             session ON records.[SessionKey] = session.[SessionKey]
 WHERE ( EXISTS( SELECT 1 FROM [dbo].[Entity1] one   WHERE records.RecordID = one.[Entity1ID] AND records.[EntityName] = N'Entity1' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity2] two   WHERE records.RecordID = two.[Entity2ID] AND records.[EntityName] = N'Entity2' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity3] three WHERE records.RecordID = two.[Entity3ID] AND records.[EntityName] = N'Entity3' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity4] four  WHERE records.RecordID = two.[Entity4ID] AND records.[EntityName] = N'Entity4' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity5] five  WHERE records.RecordID = two.[Entity5ID] AND records.[EntityName] = N'Entity5' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity6] six   WHERE records.RecordID = two.[Entity6ID] AND records.[EntityName] = N'Entity6' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity7] seven WHERE records.RecordID = two.[Entity7ID] AND records.[EntityName] = N'Entity7' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity8] eight WHERE records.RecordID = two.[Entity8ID] AND records.[EntityName] = N'Entity8' )
      OR EXISTS( SELECT 1 FROM [dbo].[Entity9] nine  WHERE records.RecordID = two.[Entity9ID] AND records.[EntityName] = N'Entity9' )
       );

One of the issues with this is that in the actual case, there are quite a few more than nine entities in question.

I have tried the following, but the performance is worse - some of these entity tables are quite large - 100,000 records or more. In all cases I am querying against the primary key.

       SELECT [SessionKey]   = records.[SessionKey]
            , [SurrogateKey] = records.[SurrogateKey]     
           FROM [dbo].[ConsolidatedRecords] records
           JOIN #CurrentSession             session ON records.[SessionKey] = session.[SessionKey]
LEFT OUTER JOIN [dbo].[Entity1]             one     ON records.RecordID     = one.[Entity1ID]      AND records.[EntityName] = N'Entity1'
LEFT OUTER JOIN [dbo].[Entity2]             two     ON records.RecordID     = two.[Entity2ID]      AND records.[EntityName] = N'Entity2'
LEFT OUTER JOIN [dbo].[Entity3]             three   ON records.RecordID     = three.[Entity3ID]    AND records.[EntityName] = N'Entity3'
LEFT OUTER JOIN [dbo].[Entity4]             four    ON records.RecordID     = four.[Entity4ID]     AND records.[EntityName] = N'Entity4'
LEFT OUTER JOIN [dbo].[Entity5]             five    ON records.RecordID     = five.[Entity5ID]     AND records.[EntityName] = N'Entity5'
LEFT OUTER JOIN [dbo].[Entity6]             six     ON records.RecordID     = six.[Entity6D]       AND records.[EntityName] = N'Entity6'
LEFT OUTER JOIN [dbo].[Entity7]             seven   ON records.RecordID     = seven.[Entity7ID]    AND records.[EntityName] = N'Entity7'
LEFT OUTER JOIN [dbo].[Entity8]             eight   ON records.RecordID     = eight.[Entity8ID]    AND records.[EntityName] = N'Entity8'
LEFT OUTER JOIN [dbo].[Entity9]             nine    ON records.RecordID     = nine.[Entity9ID]     AND records.[EntityName] = N'Entity9'
          WHERE one.[Entity1]   IS NOT NULL
             OR two.[Entity2]   IS NOT NULL
             OR three.[Entity2] IS NOT NULL
             OR four.[Entity2]  IS NOT NULL
             OR five.[Entity2]  IS NOT NULL
             OR six.[Entity2]   IS NOT NULL
             OR seven.[Entity2] IS NOT NULL
             OR eight.[Entity2] IS NOT NULL
             OR nine.[Entity2]  IS NOT NULL;

Solution

  • You could use UNION ALL:

    SELECT [SessionKey]   = records.[SessionKey]
         , [SurrogateKey] = records.[SurrogateKey]
      FROM [dbo].[ConsolidatedRecords] records
      JOIN #CurrentSession             session 
       ON records.[SessionKey] = session.[SessionKey]
     WHERE EXISTS(SELECT 1 
                 FROM [dbo].[Entity1] one   
                 WHERE records.RecordID = one.[Entity1ID] 
                   AND records.[EntityName] = N'Entity1' 
                 UNION ALL
                 SELECT 1 
                 FROM [dbo].[Entity2] two   
                 WHERE records.RecordID = two.[Entity2ID] 
                   AND records.[EntityName] = N'Entity2'
                 UNION ALL 
                  ...
                  );