I have the stored procedure shown below, which reads data from a table that contains two columns: the first column row_id
which contains the id of the row and the second column row_data
which contains a JSON that contains the data then it convert that JSON to columns and insert it to a new table.
The stored procedure reads from one table that contains JSON and normalize that JSON to a new table with deleting the duplicated rows:
CREATE OR ALTER PROCEDURE dbo.basic_json_normalization_incr
@tableNamep NVARCHAR(MAX),
@tableName NVARCHAR(MAX),
@rows_count INT OUTPUT
AS
BEGIN
SET NOCOUNT ON;
DECLARE @raw_tableName NVARCHAR(MAX);
DECLARE @chargement NVARCHAR(MAX);
DECLARE @truncateQuery NVARCHAR(MAX);
DECLARE @columns_list NVARCHAR(MAX);
DECLARE @json_query_part NVARCHAR(MAX);
DECLARE @json_query NVARCHAR(MAX) = '';
DECLARE @separator NVARCHAR(2) = '';
DECLARE @sqldelete NVARCHAR(MAX);
DECLARE @sqlinsert NVARCHAR(MAX);
SET @raw_tableName = 'dbo.raw' + @tableNamep;
SELECT @columns_list = COALESCE(@columns_list + ', ' + '[' + column_name + ']', '[' + column_name + ']')
FROM dbo.schemaSource
WHERE _enabled = 1 AND table_name = @tableNamep;
DECLARE json_cursor CURSOR FOR
SELECT CASE
WHEN single_mult = 'M'
THEN CONCAT('[', column_name, '] NVARCHAR(MAX) ', 'AS JSON')
ELSE CONCAT('[', column_name, '] NVARCHAR(MAX) ', '''' + '$.', column_name, '''')
END
FROM dbo.schemaSource WITH (NOLOCK)
WHERE _enabled = 1
AND table_name = @tableNamep;
OPEN json_cursor;
FETCH NEXT FROM json_cursor INTO @json_query_part;
WHILE @@FETCH_STATUS = 0
BEGIN
SET @json_query = @json_query + @separator + @json_query_part;
SET @separator = ',';
FETCH NEXT FROM json_cursor INTO @json_query_part;
END;
CLOSE json_cursor;
DEALLOCATE json_cursor;
-- Determine the duplicate check column based on table name
DECLARE @duplicateCheckColumn NVARCHAR(MAX);
SET @duplicateCheckColumn = '_id';
-- Build the dynamic SQL query for insertion with duplicate and CURR_NO check
SET @sqldelete = N'DELETE FROM ' + @tableName + ' WHERE _id COLLATE French_CI_AS IN (SELECT raw_id COLLATE French_CI_AS FROM raw' + @tableNamep + ');';
SET @sqlinsert = N'
INSERT INTO dbo.' + @tableName + '(_id, ORIGINAL_ID, ' + @columns_list + ')
SELECT j.[_id], j.[ORIGINAL_ID], ' + @columns_list + '
FROM ' + @raw_tableName + ' r
CROSS APPLY OPENJSON(r.raw_data) WITH (
_id nvarchar(155) '$._id' STRICT,
ORIGINAL_ID nvarchar(150) '$.ORIGINAL_ID',
' + @json_query + '
) AS j
';
BEGIN
BEGIN TRANSACTION;
EXECUTE sp_executesql @sqldelete;
EXECUTE sp_executesql @sqlinsert;
SELECT @rows_count = @@ROWCOUNT;
COMMIT;
END;
END;
GO
The problem is that I'm dealing with tables with a large amount of data, so this stored procedure is very slow.
here is a sample data for the Table ACCOUNT
:
table rawACCOUNT
:
|raw_id|raw_data
|001 |{"_id":"001","ORIGINAL_ID":"001","CATEGORY":"65","ACCOUNT_OFFICER":"1","OPENING_DATE":"20100322","CURR_NO":"3","DATE_TIME":"2112091237","DEPT_CODE":"13"}|
expected table ACCOUNT
:
|ORIGINAL_ID|_id|INSERT_DATE |ACCOUNT_OFFICER|CATEGORY|CURR_NO|CUSTOMER|DATE_LAST_UPDATE|DATE_TIME |DEPT_CODE|OPENING_DATE|INACTIV_MARKER|
|001 |001|20230906085135|1 |65 |3 |NULL |NULL |2112091237|13 |20100322 |NULL |
Is there any optimizations that I apply to make it run faster?
PS: I used to use STRING_AGG instead of CURSOR but it showed me this error:
The result of STRING_AGG aggregation has exceeded the 8,000-byte limit. Use LOB types to avoid truncation of the result.
You don't need a cursor here. Just use STRING_AGG
.
You probably also want an index on the _id
column.
Furthermore:
sysname
.QUOTENAME
for correct quoting.STRING_AGG
instead.NOLOCK
it has serious data integrity implications and is not a go-faster switch.CREATE OR ALTER PROCEDURE dbo.basic_json_normalization_incr
@tableNamep sysname,
@tableName sysname,
@rows_count INT OUTPUT
AS
SET NOCOUNT, XACT_ABORT ON;
DECLARE @raw_tableName NVARCHAR(1000) = 'dbo.' + QUOTENAME('raw' + @tableNamep);
DECLARE @main_tableName NVARCHAR(1000) = 'dbo.' + QUOTENAME(@tableName);
DECLARE @columns_list NVARCHAR(MAX);
DECLARE @json_query NVARCHAR(MAX);
DECLARE @sql NVARCHAR(MAX);
SET @raw_tableName
SELECT @columns_list =
STRING_AGG(CAST(QUOTENAME(column_name) AS nvarchar(max)), ', ')
FROM dbo.schemaSource
WHERE _enabled = 1
AND table_name = @tableNamep;
SELECT @json_query =
STRING_AGG(
CAST(
QUOTENAME(column_name)
+ ' nvarchar(max)'
+ CASE WHEN single_mult = 'M' THEN ' AS JSON' END
AS nvarchar(max)),
','
)
FROM dbo.schemaSource
WHERE _enabled = 1
AND table_name = @tableNamep;
SET @sql = N'
BEGIN TRAN;
DELETE FROM dbo.' + @main_tableName + '
WHERE _id IN (
SELECT raw_id
FROM ' + raw_tableName + '
);
INSERT INTO dbo.' + @main_tableName + '
(_id, ORIGINAL_ID, ' + @columns_list + ')
SELECT j.[_id], j.[ORIGINAL_ID], ' + @columns_list + '
FROM ' + @raw_tableName + ' r
CROSS APPLY OPENJSON(r.raw_data)
WITH (
_id nvarchar(155) '$._id' STRICT,
ORIGINAL_ID nvarchar(150),
' + @json_query + '
) AS j;
SET @rows_count = @@ROWCOUNT;
COMMIT;
';
PRINT @sql; -- Your friend
EXECUTE sp_executesql @sql,
N'@rows_count int OUTPUT',
@rows_count = @rows_count OUTPUT;