Search code examples
sql-serverperformance

SQL Server INSERT into huge table is slow


I have a large table in a SQL Server 2008 database, it has about 570 million records. Every day we run a batch job that takes a file of 200,000 or so transaction records, does a group by and sum against this data and inserts it into the large table.

Recently I have experimented with changing the clustered index of the large table to an identity int column, which has brought the insert down from 3 hours to one hour, but I am still puzzled why this simple query should take so long to run (regardless of the size of the table)

This is the table with 570 million rows

CREATE TABLE [dbo].[POINTS_EARNED](
[POINTS_EARNED_ID]int identity not null,
[CARD_ID] [int] NOT NULL,
[CYCLE_ID] [int] NOT NULL,
[POINTS_CODE] [int] NOT NULL,
[NO_POINTS] [int] NULL,
[ACCOUNT_ID] [int] NOT NULL,
[CREATED_DATE] [datetime] NULL,
[CREATED_BY] [varchar](20) NULL,
[LAST_MODIFIED_DATE] [datetime] NULL,
[LAST_MODIFIED_BY] [varchar](20) NULL,
[DELETED] [bit] NULL,
CONSTRAINT [PK_POINTS_EARNED] PRIMARY KEY CLUSTERED 
(
[POINTS_EARNED_ID] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, 
IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]

) ON [PRIMARY]

It also has some constraints (defaults and foreign keys) and indexes, and I am wondering if these are what is causing the problem.

The actual SQL that takes an hour to run is:

insert into points_earned (
    card_id,
    cycle_id,
    points_code,
    no_points,
    account_id
    ) 

select pe.card_id, pe.cycle_id, pe.points_code, sum(pe.no_points),pe.account_id
from #points_earned pe 
group by pe.card_id, pe.cycle_id, pe.points_code,pe.account_id

and the temp table #points_earned has about 200,000 rows, and has the following structure (with no indexes)

create table #points_earned (
        card_id           int,
        cycle_id    int,
        points_code int,
        card_type   varchar(5),
        no_points   int,
        account_id int
        )

So, I would like some opinions on whether I should

  • Add indexes on the temp table
  • Drop non clustered indexes on the large table before adding the data, then recreating them
  • Any other options?

Update - as requested a bit more info

  • The select statement runs without the insert in 2 seconds, so this doesn't appear to be the problem, so probably don't need o worry about indexing the temp table

Indexes, (update) trigger, and constraints are:

    CREATE NONCLUSTERED INDEX [IDX_CYCLE_ID] ON [dbo].[POINTS_EARNED] 
(
    [CYCLE_ID] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IX_ACCOUNT_ID] ON [dbo].[POINTS_EARNED] 
(
    [ACCOUNT_ID] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [IX_ACCOUNT_ID_POINTS_CODE] ON [dbo].[POINTS_EARNED] 
(
    [ACCOUNT_ID] ASC,
    [POINTS_CODE] ASC
)
INCLUDE ( [CARD_ID],
[CYCLE_ID],
[NO_POINTS]) WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [RELATION_151_FK] ON [dbo].[POINTS_EARNED] 
(
    [CARD_ID] ASC,
    [CYCLE_ID] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
GO
CREATE NONCLUSTERED INDEX [RELATION_152_FK] ON [dbo].[POINTS_EARNED] 
(
    [POINTS_CODE] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
GO
/****** Object:  Trigger [update_points_earned]    Script Date: 09/13/2013 13:20:54 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TRIGGER [dbo].[update_points_earned] ON [dbo].[POINTS_EARNED]
FOR UPDATE
AS
 BEGIN
  UPDATE points_earned 
  SET Last_Modified_By = USER,
   Last_Modified_Date = GETDATE()  
  FROM
   points_earned t,
   inserted i
  WHERE
   t.card_id = i.card_id AND
   t.cycle_id = i.cycle_id AND
   t.points_code = i.points_code AND
   t.account_id = i.account_id
 END
GO
/****** Object:  Default [DF_POINTS_EARNED_ACCOUNT_ID]    Script Date: 09/13/2013 13:20:54 ******/
ALTER TABLE [dbo].[POINTS_EARNED] ADD  CONSTRAINT [DF_POINTS_EARNED_ACCOUNT_ID]  DEFAULT ((0)) FOR [ACCOUNT_ID]
GO
/****** Object:  Default [DF_POINTS_EARNED_CREATED_DATE]    Script Date: 09/13/2013 13:20:54 ******/
ALTER TABLE [dbo].[POINTS_EARNED] ADD  CONSTRAINT [DF_POINTS_EARNED_CREATED_DATE]  DEFAULT (getdate()) FOR [CREATED_DATE]
GO
/****** Object:  Default [DF_POINTS_EARNED_CREATED_BY]    Script Date: 09/13/2013 13:20:54 ******/
ALTER TABLE [dbo].[POINTS_EARNED] ADD  CONSTRAINT [DF_POINTS_EARNED_CREATED_BY]  DEFAULT (user_name()) FOR [CREATED_BY]
GO
/****** Object:  Default [DF_POINTS_EARNED_DELETED]    Script Date: 09/13/2013 13:20:54 ******/
ALTER TABLE [dbo].[POINTS_EARNED] ADD  CONSTRAINT [DF_POINTS_EARNED_DELETED]  DEFAULT ((0)) FOR [DELETED]
GO
/****** Object:  ForeignKey [FK_POINTS_E_REFERENCE_CYCLE_CA]    Script Date: 09/13/2013 13:20:54 ******/
ALTER TABLE [dbo].[POINTS_EARNED]  WITH CHECK ADD  CONSTRAINT [FK_POINTS_E_REFERENCE_CYCLE_CA] FOREIGN KEY([CARD_ID], [CYCLE_ID])
REFERENCES [dbo].[CYCLE_CARD] ([CARD_ID], [CYCLE_ID])
GO
ALTER TABLE [dbo].[POINTS_EARNED] CHECK CONSTRAINT [FK_POINTS_E_REFERENCE_CYCLE_CA]
GO
/****** Object:  ForeignKey [FK_POINTS_E_REFERENCE_POINTS_C]    Script Date: 09/13/2013 13:20:54 ******/
ALTER TABLE [dbo].[POINTS_EARNED]  WITH NOCHECK ADD  CONSTRAINT [FK_POINTS_E_REFERENCE_POINTS_C] FOREIGN KEY([POINTS_CODE])
REFERENCES [dbo].[POINTS_CODE] ([POINTS_CODE])
GO
ALTER TABLE [dbo].[POINTS_EARNED] CHECK CONSTRAINT [FK_POINTS_E_REFERENCE_POINTS_C]
GO
/****** Object:  ForeignKey [FK_POINTS_EARNED_REF_ACCOUNT]    Script Date: 09/13/2013 13:20:54 ******/
ALTER TABLE [dbo].[POINTS_EARNED]  WITH NOCHECK ADD  CONSTRAINT [FK_POINTS_EARNED_REF_ACCOUNT] FOREIGN KEY([ACCOUNT_ID])
REFERENCES [dbo].[ACCOUNT] ([ACCOUNT_ID])
GO
ALTER TABLE [dbo].[POINTS_EARNED] CHECK CONSTRAINT [FK_POINTS_EARNED_REF_ACCOUNT]

Edit 2, query plan for the insert statement

  |--Sequence
   |--Index Insert(OBJECT:([Progressive_Points].[dbo].[POINTS_EARNED].[IDX_CYCLE_ID]), SET:([POINTS_EARNED_ID1040] = [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID],[CYCLE_ID1041] = [Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID]) WITH ORDERED PREFETCH)
   |    |--Sort(ORDER BY:([Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID] ASC, [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID] ASC))
   |         |--Table Spool
   |              |--Clustered Index Insert(OBJECT:([Progressive_Points].[dbo].[POINTS_EARNED].[PK_POINTS_EARNED]), SET:([Progressive_Points].[dbo].[POINTS_EARNED].[CARD_ID] = RaiseIfNullInsert([tempdb].[dbo].[#points_earned].[card_id] as [pe].[card_id]),[Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID] = RaiseIfNullInsert([tempdb].[dbo].[#points_earned].[cycle_id] as [pe].[cycle_id]),[Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_CODE] = RaiseIfNullInsert([tempdb].[dbo].[#points_earned].[points_code] as [pe].[points_code]),[Progressive_Points].[dbo].[POINTS_EARNED].[NO_POINTS] = [Expr1006],[Progressive_Points].[dbo].[POINTS_EARNED].[ACCOUNT_ID] = RaiseIfNullInsert([tempdb].[dbo].[#points_earned].[account_id] as [pe].[account_id]),[Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID] = [Expr1007],[Progressive_Points].[dbo].[POINTS_EARNED].[CREATED_DATE] = [Expr1008],[Progressive_Points].[dbo].[POINTS_EARNED].[CREATED_BY] = [Expr1009],[Progressive_Points].[dbo].[POINTS_EARNED].[DELETED] = [Expr1010],[Progressive_Points].[dbo].[POINTS_EARNED].[LAST_MODIFIED_DATE] = NULL,[Progressive_Points].[dbo].[POINTS_EARNED].[LAST_MODIFIED_BY] = NULL) WITH UNORDERED PREFETCH)
   |                   |--Compute Scalar(DEFINE:([Expr1008]=getdate(), [Expr1009]=CONVERT_IMPLICIT(varchar(20),user_name(),0), [Expr1010]=(0)))
   |                        |--Compute Scalar(DEFINE:([Expr1007]=getidentity((1243867498),(8),NULL)))
   |                             |--Top(ROWCOUNT est 0)
   |                                  |--Parallelism(Gather Streams)
   |                                       |--Compute Scalar(DEFINE:([Expr1006]=CASE WHEN [Expr1062]=(0) THEN NULL ELSE [Expr1063] END))
   |                                            |--Hash Match(Aggregate, HASH:([pe].[card_id], [pe].[cycle_id], [pe].[points_code], [pe].[account_id]), RESIDUAL:([tempdb].[dbo].[#points_earned].[card_id] as [pe].[card_id] = [tempdb].[dbo].[#points_earned].[card_id] as [pe].[card_id] AND [tempdb].[dbo].[#points_earned].[cycle_id] as [pe].[cycle_id] = [tempdb].[dbo].[#points_earned].[cycle_id] as [pe].[cycle_id] AND [tempdb].[dbo].[#points_earned].[points_code] as [pe].[points_code] = [tempdb].[dbo].[#points_earned].[points_code] as [pe].[points_code] AND [tempdb].[dbo].[#points_earned].[account_id] as [pe].[account_id] = [tempdb].[dbo].[#points_earned].[account_id] as [pe].[account_id]) DEFINE:([Expr1062]=COUNT_BIG([tempdb].[dbo].[#points_earned].[no_points] as [pe].[no_points]), [Expr1063]=SUM([tempdb].[dbo].[#points_earned].[no_points] as [pe].[no_points])))
   |                                                 |--Parallelism(Repartition Streams, Hash Partitioning, PARTITION COLUMNS:([pe].[card_id], [pe].[cycle_id], [pe].[points_code], [pe].[account_id]))
   |                                                      |--Clustered Index Scan(OBJECT:([tempdb].[dbo].[#points_earned] AS [pe]))
   |--Index Insert(OBJECT:([Progressive_Points].[dbo].[POINTS_EARNED].[IX_ACCOUNT_ID]), SET:([POINTS_EARNED_ID1042] = [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID],[ACCOUNT_ID1043] = [Progressive_Points].[dbo].[POINTS_EARNED].[ACCOUNT_ID]) WITH ORDERED PREFETCH)
   |    |--Sort(ORDER BY:([Progressive_Points].[dbo].[POINTS_EARNED].[ACCOUNT_ID] ASC, [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID] ASC))
   |         |--Table Spool
   |--Assert(WHERE:(CASE WHEN [Expr1050] IS NULL THEN (0) ELSE NULL END))
   |    |--Nested Loops(Left Semi Join, OUTER REFERENCES:([Progressive_Points].[dbo].[POINTS_EARNED].[ACCOUNT_ID], [Expr1068]) WITH UNORDERED PREFETCH, DEFINE:([Expr1050] = [PROBE VALUE]))
   |         |--Index Insert(OBJECT:([Progressive_Points].[dbo].[POINTS_EARNED].[IX_ACCOUNT_ID_POINTS_CODE]), SET:([POINTS_EARNED_ID1044] = [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID],[CARD_ID1045] = [Progressive_Points].[dbo].[POINTS_EARNED].[CARD_ID],[CYCLE_ID1046] = [Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID],[POINTS_CODE1047] = [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_CODE],[NO_POINTS1048] = [Progressive_Points].[dbo].[POINTS_EARNED].[NO_POINTS],[ACCOUNT_ID1049] = [Progressive_Points].[dbo].[POINTS_EARNED].[ACCOUNT_ID]) WITH UNORDERED PREFETCH)
   |         |    |--Table Spool
   |         |--Clustered Index Seek(OBJECT:([Progressive_Points].[dbo].[ACCOUNT].[PK_ACCOUNT]), SEEK:([Progressive_Points].[dbo].[ACCOUNT].[ACCOUNT_ID]=[Progressive_Points].[dbo].[POINTS_EARNED].[ACCOUNT_ID]) ORDERED FORWARD)
   |--Assert(WHERE:(CASE WHEN [Expr1054] IS NULL THEN (0) ELSE NULL END))
   |    |--Nested Loops(Left Semi Join, OUTER REFERENCES:([Progressive_Points].[dbo].[POINTS_EARNED].[CARD_ID], [Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID], [Expr1070]) WITH UNORDERED PREFETCH, DEFINE:([Expr1054] = [PROBE VALUE]))
   |         |--Index Insert(OBJECT:([Progressive_Points].[dbo].[POINTS_EARNED].[RELATION_151_FK]), SET:([POINTS_EARNED_ID1051] = [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID],[CARD_ID1052] = [Progressive_Points].[dbo].[POINTS_EARNED].[CARD_ID],[CYCLE_ID1053] = [Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID]) WITH ORDERED PREFETCH)
   |         |    |--Sort(ORDER BY:([Progressive_Points].[dbo].[POINTS_EARNED].[CARD_ID] ASC, [Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID] ASC, [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID] ASC))
   |         |         |--Table Spool
   |         |--Row Count Spool
   |              |--Index Seek(OBJECT:([Progressive_Points].[dbo].[CYCLE_CARD].[IDX_NCLST_CARD_ID_CYCLE_ID]), SEEK:([Progressive_Points].[dbo].[CYCLE_CARD].[CARD_ID]=[Progressive_Points].[dbo].[POINTS_EARNED].[CARD_ID] AND [Progressive_Points].[dbo].[CYCLE_CARD].[CYCLE_ID]=[Progressive_Points].[dbo].[POINTS_EARNED].[CYCLE_ID]) ORDERED FORWARD)
   |--Assert(WHERE:(CASE WHEN [Expr1057] IS NULL THEN (0) ELSE NULL END))
        |--Merge Join(Left Semi Join, MANY-TO-MANY MERGE:([Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_CODE])=([Progressive_Points].[dbo].[POINTS_CODE].[POINTS_CODE]), RESIDUAL:([Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_CODE]=[Progressive_Points].[dbo].[POINTS_CODE].[POINTS_CODE]))
             |--Index Insert(OBJECT:([Progressive_Points].[dbo].[POINTS_EARNED].[RELATION_152_FK]), SET:([POINTS_EARNED_ID1055] = [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID],[POINTS_CODE1056] = [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_CODE]) WITH ORDERED PREFETCH)
             |    |--Sort(ORDER BY:([Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_CODE] ASC, [Progressive_Points].[dbo].[POINTS_EARNED].[POINTS_EARNED_ID] ASC))
             |         |--Table Spool
             |--Index Scan(OBJECT:([Progressive_Points].[dbo].[POINTS_CODE].[POINTS_CODES_PK]), ORDERED FORWARD)

Solution

  • OK, here's what I would do:

    1. Check to see if you need both indexes [IX_ACCOUNT_ID_POINTS_CODE] and [IX_ACCOUNT_ID] as they may be redundant.

    2. Before you do the INSERT, Disable the Trigger and drop the Foreign Keys.

    3. Do the INSERT setting the fields normally set by the Trigger, and insuring that the FK Column's values are valid.

    4. Re-Enable the trigger, and re-create the Foreign Keys WITH NOCHECK.

    I would leave the indexes on as you are inserting less than 0.2% of the total row count so it's probably faster to update them in-place rather than to drop and rebuild them.