I have been trying to upload around 18 million records from Oracle to SQL Server. The problem is the loading task which take days to complete even though the data is not so much. I have marked parts of the code relative to their respective time taken to completely execute.
declare @v_sql VARCHAR(MAX)
declare @column_name VARCHAR(MAX)
declare @column_name2 VARCHAR(MAX) =''
declare @column_name3 VARCHAR(MAX) =''
declare @tablename VARCHAR(MAX) ='TITLE_BLOCK_PARTS' --input parameter
declare @v_sql2 VARCHAR(MAX)=''
declare @temp_table VARCHAR(500) = '##'+@tablename+'_T'
declare @temp_table2 VARCHAR(500)
declare @v_entityid int = 14 --input parameter
declare @vcount INT = 1
declare @v_total_records bigint
DECLARE @CountResults TABLE (CountReturned bigint)
declare @v_total_ei bigint
declare @v_max_ei bigint
declare @v_total_no_col int
declare @col_to_exclude VARCHAR(MAX) = ',ID,CLASS,'
declare @stg_table vARCHAR(500) = '##'+@tablename+'_TEMP'
declare @v_success int = -1
declare @v_tablename VARCHAR(MAX) = @tablename
declare @itemnumber varchar(500) ='';
declare @rev varchar(500) ='';
declare @v_stg_sql VARCHAR(MAX) = ''
declare @v_temp_table vARCHAR(500) = '##'+@tablename+'_T_ORI'
declare @v_sql3 VARCHAR(MAX) = ''
declare @v_countrownum bigint = 1
declare @v_rownum bigint
DECLARE @v_rownumtab TABLE (ROWCountReturned bigint)
BEGIN
--SET @col_to_exclude = ''''+REPLACE(@col_to_exclude,',',''',''') +''''
--Start: Following code takes maximum 5 to 7 minutes to complete
IF EXISTS (SELECT * FROM sys.tables WHERE name = @v_tablename)
Begin
set @v_sql = ' DROP TABLE ' + @v_tablename
exec (@v_sql)
END
set @v_sql = 'SELECT *
into ' + @tablename +'
FROM [ORCLTST]..[AGILE].'+@v_tablename;
exec (@v_sql)
--End: Following code takes maximum 5 to 7 minutes to complete
--Start: Following code takes maximum 4 minutes to complete
IF OBJECT_ID('tempdb..'+@stg_table) IS NOT NULL
exec ('drop table ' + @stg_table)
SET @v_sql = 'SELECT *
INTO '+@stg_table+
'
FROM ' + @tablename;
EXEC (@v_sql);
set @v_sql = ''
set @v_total_no_col = (SELECT COUNT(COLUMN_NAME)
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = @tablename
AND COLUMN_NAME
NOT IN
(
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = @tablename
AND
charindex(',' + CAST(COLUMN_NAME as nvarchar(20)) + ',', @col_to_exclude) > 0
)
)
DECLARE get_columns CURSOR FOR
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = @tablename
AND COLUMN_NAME
NOT IN
(
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = @tablename
AND
charindex(',' + CAST(COLUMN_NAME as nvarchar(20)) + ',', @col_to_exclude) > 0
)
--SELECT COLUMN_NAME
--FROM INFORMATION_SCHEMA.COLUMNS
--WHERE TABLE_NAME = @tablename
--AND COLUMN_NAME NOT IN (@col_to_exclude)
--@tablename
--print @sql3
OPEN get_columns
FETCH NEXT FROM get_columns INTO @column_name
WHILE @@FETCH_STATUS = 0
BEGIN
--PRINT @column_name
SET @column_name2 = replace(REPLACE(REPLACE(LTRIM(RTRIM(REPLACE(@column_name,'"',''))),')',''),'(',''),' ','')
--replace(@column_name2
--print 'ALTER TABLE ' + @sql5+ ' DROP constraint ' + @sql3
set @v_sql2+='CAST('+'['+@column_name+']'+' AS VARCHAR(8000))'+ 'AS '+ @column_name2 + ',
'
+
'
'
SET @v_sql3 += 'CAST('+'ISNULL(['+@column_name+'],'''')'+' AS VARCHAR(8000))'+ 'AS '+ @column_name2 + ',
'
+
'
'
SET @column_name3 += @column_name2 + ',
'+
'
'
FETCH NEXT FROM get_columns INTO @column_name
end
CLOSE get_columns
DEALLOCATE get_columns
--Print @v_sql2
set @v_sql2 = left(@v_sql2, len(@v_sql2)-5)
set @v_sql3 = left(@v_sql3, len(@v_sql3)-5)
SET @column_name3 = left(@column_name3, len(@column_name3)-7)
IF OBJECT_ID('tempdb..'+@temp_table) IS NOT NULL
exec ('drop table ' + @temp_table)
IF OBJECT_ID('tempdb..'+@temp_table+'2') IS NOT NULL
exec ('drop table ' + @temp_table+'2')
IF OBJECT_ID('tempdb..'+@v_temp_table) IS NOT NULL
exec ('drop table ' + @v_temp_table)
IF OBJECT_ID('tempdb..'+@v_temp_table+'2') IS NOT NULL
exec ('drop table ' + @v_temp_table+'2')
SET @v_sql = 'Select
ROW_NUMBER()
OVER (ORDER BY number, REV) AS Row,
'
+
@v_sql2
+
'
'
+
' INTO ' + @v_temp_table + '
from
'
+
@tablename
PRINT @v_sql
EXEC (@V_SQL)
SET @v_sql = 'Select
ROW_NUMBER()
OVER (ORDER BY number, REV) AS Row,
'
+
@v_sql3
+
'
'
+
' INTO ' + @v_temp_table+'2' + '
from
'
+
@v_temp_table
PRINT @v_sql
EXEC (@V_SQL)
SET @v_sql ='
Select ROW
,ColumnName
, ColumnValue
INTO ' + @temp_table + '
FROM
(
Select
ROW,
'
+
@v_sql3
+
'
'
+
'
'
+
' from
'
+
@v_temp_table+'2'
+
'
'
+
'
)t
UNPIVOT (ColumnValue for ColumnName
IN (
'
+
@column_name3
+
'
'
+
'
)
)UP
';
PRINT @v_sql
--END;
--/*
EXEC (@V_SQL)
set @v_sql = ''
/*set @v_sql = 'with q1 as (
SELECT t2.Id, t2.displayorder,t2.DisplayName ,T2.[Group],t1.*
'
+
'
FROM '+@temp_table+ ' t1
INNER JOIN PLMColumn t2
ON t1.ColumnName = t2.name
where t2.EntityId = ' + cast(@v_entityid as varchar(100))
+
')
,q2 as(
select Id, ColumnName, displayorder,[Group], count(*) as c_nnum
from q1
group by Id,ColumnName,displayorder,[Group])
select t1.id, t1.displayorder, t1.ColumnName, t2.columnvalue
into '+ @temp_table+'2'+
'
from q2 t1
INNER JOIN '+@temp_table+ ' t2
ON t1.ColumnName = t2.ColumnName
WHERE T1.[Group] <> ''PAGE THREE''
' */
--
set @temp_table2 = replace(@temp_table+'2', '##','');
IF EXISTS (SELECT * FROM sys.tables WHERE name = @temp_table2)
Begin
set @v_sql = ' DROP TABLE ' + @temp_table2
exec (@v_sql)
END
set @v_Sql = 'SELECT t1.Row,t2.Id, t2.displayorder,t2.DisplayName ,T2.[Group],t1.ColumnName, t1.ColumnValue
into '+ @temp_table2 +
' FROM '+@temp_table+ ' t1
INNER JOIN PLMColumn t2
ON t1.ColumnName = t2.name
where t2.EntityId = ' + cast(@v_entityid as varchar(100)) + '
and T2.[Group] <> ''PAGE THREE'''
print @v_sql
exec(@v_sql)
set @v_sql = ' CREATE INDEX idx_'+@temp_table2+'_row
ON '+ @temp_table2 + ' (row)';
print @v_sql
exec(@v_sql)
--end
--/*
--set @v_sql = 'select COUNT(*) from ' + @tablename
SELECT @v_sql = N'SELECT COUNT(*) FROM' + QUOTENAME(@tablename);
INSERT @CountResults
EXEC(@v_sql)
set @v_total_records = (SELECT CountReturned FROM @CountResults)
print '@v_total_records '+ cast(@v_total_records as varchar(5000))
set @v_max_ei = (SELECT IDENT_CURRENT ('PLMENTITYINSTANCE'))
set @v_max_ei = @v_max_ei + 1
--(select ISNULL(MAX (id),0) from PLMEntityInstance)
print '@v_max_ei '+ cast(@v_max_ei as varchar(5000))
set @v_total_ei = ISNULL(@v_total_records,0) + @v_max_ei
print '@v_total_ei '+ cast(@v_total_ei as varchar(5000))
--END;
IF EXISTS (SELECT * FROM sys.tables WHERE name = 'ITEM_STG')
Begin
set @v_sql = ' DROP TABLE ITEM_STG'
exec (@v_sql)
END
set @v_sql = 'CREATE TABLE [dbo].[ITEM_STG](
[INSTANCE_ID] [bigint] NOT NULL,
[ITEM_NUMBER] [varchar](max) NOT NULL,
[REV] [varchar](max) NOT NULL)'
exec (@v_sql)
set @v_sql = ''
--END
--/*
WHILE (@vcount <= @v_total_records)
BEGIN
INSERT INTO PLMEntityInstance (EntityId)
VALUES (@v_entityid)
SET @vcount = @vcount + 1
--print
END
set @v_sql = ''
set @v_sql = N'with c AS
(select DISTINCT ROW from ' + QUOTENAME(@temp_table2) +')
select count(*) from c
'
INSERT @v_rownumtab
EXEC(@v_sql)
set @v_rownum = (select * from @v_rownumtab)
print @v_rownum
--End: Following code takes maximum 4 minutes to complete
--end
--/*
--Start: Following code takes days to complete
while (@v_max_ei <= @v_total_ei AND @v_countrownum <=@v_rownum)
BEGIN
set @v_sql = ''
--top('+CAST(@v_total_no_col AS varchar(100))+')
set @v_sql = '
with q2
as(
select
* from '
+ @temp_table2
+
'
where row = '+ CAST(@v_countrownum AS varchar(5000)) + '
)
INSERT INTO PLMColumnValue(EntityInstanceId, ColumnID, Value)
select '+ CAST(@v_max_ei AS varchar(5000)) + ' as Entityinstanceid, t.id as column_id, t.columnvalue from q2 t
order by DisplayOrder
'
print @v_countrownum
print @v_sql
exec (@v_sql)
set @itemnumber = (select Value from PLMColumnValue where columnid=(select id from PLMColumn where EntityId = @v_entityid and Name = 'number') and EntityInstanceId = @v_max_ei)
set @rev = (select Value from PLMColumnValue where columnid=(select id from PLMColumn where EntityId = @v_entityid and Name = 'rev') and EntityInstanceId = @v_max_ei)
print @itemnumber
print @rev
SET @v_stg_sql = ''
set @v_stg_sql = '
INSERT INTO ITEM_STG (INSTANCE_ID,ITEM_NUMBER,REV)
select ' + CAST(@v_max_ei AS varchar(5000)) + ' , ' + ''''+@itemnumber+'''' + ' , ' +''''+ @rev+'''' + ''
print @v_stg_sql
exec (@v_stg_sql)
set @itemnumber = '';
set @rev = '';
SET @v_stg_sql = ''
set @v_sql = ''
SET @v_max_ei = @v_max_ei + 1;
set @v_countrownum = @v_countrownum + 1;
END
--End: Following code takes days to complete
END;
The main tables under use are PlmEntityInstance, PlmColumnValue and ITEM_STG. PlmColumnValue table is where all the migrated data is loaded, this table has an EAV model and here the code take ages to load the data. How can I optimize this process? Creating an index on the table where all the data is stored after all the required modifications and before loading (its name is stored in variable @temp_table2) did help a lot but still its not good enough. I initiated loading process yesterday at 2100 local time and its been 15 hours and it has loaded only 149088 records out of 18 million records. Please Help!
If there is not enough information provided in question please let me know.
Please see below wait stats
WaitType Wait_S Resource_S Signal_S WaitCount Percentage AvgWait_S AvgRes_S AvgSig_S
CXPACKET 657100.57 636945.69 20154.88 338749067 59.49 0.0019 0.0019 0.0001
PREEMPTIVE_OLEDBOPS 268405.70 268405.70 0.00 24041 24.30 11.1645 11.1645 0.0000
OLEDB 91213.76 91213.76 0.00 34867 8.26 2.6160 2.6160 0.0000
ASYNC_NETWORK_IO 33316.35 32352.43 963.93 4060699 3.02 0.0082 0.0080 0.0002
Sample data is placed on the following link: https://www.dropbox.com/home/Public/Query/
The data in the csv provided in the above link has 8531 records.
You can initialize parameter @temp_table2 with 'TITLE_BLOCK_PARTS'
Replace all WHILE
loops with set based INSERT
using tally table. For example replace
declare @vcount INT = 1
WHILE (@vcount <= @v_total_records)
BEGIN
INSERT INTO PLMEntityInstance (EntityId)
VALUES (@v_entityid)
SET @vcount = @vcount + 1
END
with
INSERT INTO PLMEntityInstance (EntityId)
SELECT @v_entityid
FROM Tally
WHERE N <= @v_total_records
Tally can be created in multiple ways, for example, 20 million rows tally:
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
),
E2(N) AS (SELECT 1 FROM E1 a, E1 b),
E4(N) AS (SELECT 1 FROM E2 a, E2 b) --10E+4 or 10,000 rows max
SELECT TOP 20000000
IDENTITY(INT,1,1) AS N
INTO dbo.Tally
FROM E4 a, E4 b; --100,000,000 rows max
ALTER TABLE dbo.Tally
ADD CONSTRAINT PK_Tally_N
PRIMARY KEY CLUSTERED (N) WITH FILLFACTOR = 100;
EDIT
Concerning refactoring your last WHILE
to set based code. I'm not sure i fully understand what the whole script is doing, but it's too long to comment. So first look at original code of the last loop:
@v_countrownum = 1
...
@v_max_ei = ...
while (@v_max_ei <= @v_total_ei AND @v_countrownum <=@v_rownum)
BEGIN
...
SET @v_max_ei = @v_max_ei + 1;
set @v_countrownum = @v_countrownum + 1;
END
Let’s refactor it to iterator query:
...
@v_max_ei = ...
WITH iterator AS (
SELECT v_max_ei = @v_max_ei + N - 1
,v_countrownum = N
FROM tally
WHERE @v_max_ei + N - 1 <= @v_total_ei AND N <= @v_rownum
)
SELECT *
FROM iterator
Next, your loop unconditionally adds rows to PLMColumnValue
and ITEM_STG
at every iteration, so we must use LEFT JOIN
of iterator and source data tables and take care of NULLs. I see your code defaults missing data to ''.
Also I need some test data, which are generated this way using subset of PLMColumn
fields from your dataset.
-- test data generator
DROP TABLE title_block3;
GO
SELECT row = N, id, DisplayOrder
,columnvalue = c.NAME + '_val_0' + cast(N AS VARCHAR(3))
INTO title_block3
FROM PLMColumn c
JOIN tally ON tally.N <= 3
WHERE c.NAME IN ('number','rev' ) OR c.NAME LIKE 'text%'
SELECT * FROM title_block3;
So finally the script to replace your last WHILE. I use arbitrary variables values instead of computed by the first part of your script. @v_rownum = 3
according to test data.
declare @temp_table2 varchar(100);
declare @v_sql nvarchar(max);
declare @v_max_ei int;
declare @v_entityid int;
declare @v_total_ei int;
declare @v_rownum int;
set @temp_table2 = 'title_block3'; -- see generator
set @v_max_ei = 120000;
set @v_total_ei = 200000;
set @v_rownum = 3;
set @v_entityid = 14;
--Start: Following code takes days to complete
set @v_sql =
'WITH iterator AS (
SELECT v_max_ei = '+ CAST(@v_max_ei AS varchar(5000)) + ' + N - 1
,v_countrownum = N
FROM tally
WHERE '+ CAST(@v_max_ei AS varchar(5000)) + ' + N - 1 <= '+ CAST(@v_total_ei AS varchar(5000)) +
' AND N <= '+ CAST(@v_rownum AS varchar(5000)) + '
)
INSERT INTO PLMColumnValue(EntityInstanceId, ColumnID, Value)
SELECT i.v_max_ei, isnull(t.id,''''), isnull(t.columnvalue,'''')
FROM iterator i
LEFT JOIN ' + @temp_table2 + ' t ON t.row = i.v_countrownum
ORDER BY i.v_countrownum, t.DisplayOrder ';
print @v_sql;
exec (@v_sql);
WITH iterator AS (
SELECT v_max_ei = @v_max_ei + N - 1
,v_countrownum = N
FROM tally
WHERE @v_max_ei + N - 1 <= @v_total_ei AND N <= @v_rownum
)
INSERT INTO ITEM_STG (INSTANCE_ID, ITEM_NUMBER, REV)
SELECT i.v_max_ei
-- unpivot
, isnull(max(CASE c.Name WHEN 'number' THEN Value END),'')
, isnull(max(CASE c.Name WHEN 'rev' THEN Value END),'')
FROM iterator i
LEFT JOIN PLMColumnValue cv ON cv.EntityInstanceId = i.v_max_ei
LEFT JOIN PLMColumn c ON cv.columnid = c.id
AND c.EntityId = @v_entityid
AND c.Name IN ( 'number' , 'rev')
GROUP BY i.v_max_ei;
-- check it
SELECT * FROM ITEM_STG;
SELECT * FROM PLMColumnValue;