I have a TVP+SP insert strategy implemented as i need to insert big amounts of rows (probably concurrently) while being able to get some info in return like Id
and stuff. Initially I'm using EF code first approach to generate the DB structure. My entities:
FacilityGroup
public class FacilityGroup
{
public int Id { get; set; }
[Required]
public string Name { get; set; }
public string InternalNotes { get; set; }
public virtual List<FacilityInstance> Facilities { get; set; } = new List<FacilityInstance>();
}
FacilityInstance
public class FacilityInstance
{
public int Id { get; set; }
[Required]
[Index("IX_FacilityName")]
[StringLength(450)]
public string Name { get; set; }
[Required]
public string FacilityCode { get; set; }
//[Required]
public virtual FacilityGroup FacilityGroup { get; set; }
[ForeignKey(nameof(FacilityGroup))]
[Index("IX_FacilityGroupId")]
public int FacilityGroupId { get; set; }
public virtual List<DataBatch> RelatedBatches { get; set; } = new List<DataBatch>();
public virtual HashSet<BatchRecord> BatchRecords { get; set; } = new HashSet<BatchRecord>();
}
BatchRecord
public class BatchRecord
{
public long Id { get; set; }
//todo index?
public string ItemName { get; set; }
[Index("IX_Supplier")]
[StringLength(450)]
public string Supplier { get; set; }
public decimal Quantity { get; set; }
public string ItemUnit { get; set; }
public string EntityUnit { get; set; }
public decimal ItemSize { get; set; }
public decimal PackageSize { get; set; }
[Index("IX_FamilyCode")]
[Required]
[StringLength(4)]
public string FamilyCode { get; set; }
[Required]
public string Family { get; set; }
[Index("IX_CategoryCode")]
[Required]
[StringLength(16)]
public string CategoryCode { get; set; }
[Required]
public string Category { get; set; }
[Index("IX_SubCategoryCode")]
[Required]
[StringLength(16)]
public string SubCategoryCode { get; set; }
[Required]
public string SubCategory { get; set; }
public string ItemGroupCode { get; set; }
public string ItemGroup { get; set; }
public decimal PurchaseValue { get; set; }
public decimal UnitPurchaseValue { get; set; }
public decimal PackagePurchaseValue { get; set; }
[Required]
public virtual DataBatch DataBatch { get; set; }
[ForeignKey(nameof(DataBatch))]
public int DataBatchId { get; set; }
[Required]
public virtual FacilityInstance FacilityInstance { get; set; }
[ForeignKey(nameof(FacilityInstance))]
[Index("IX_FacilityInstance")]
public int FacilityInstanceId { get; set; }
[Required]
public virtual Currency Currency { get; set; }
[ForeignKey(nameof(Currency))]
public int CurrencyId { get; set; }
}
DataBatch
public class DataBatch
{
public int Id { get; set; }
[Required]
public string Name { get; set; }
public DateTime DateCreated { get; set; }
public BatchStatus BatchStatus { get; set; }
public virtual List<FacilityInstance> RelatedFacilities { get; set; } = new List<FacilityInstance>();
public virtual HashSet<BatchRecord> BatchRecords { get; set; } = new HashSet<BatchRecord>();
}
And then my SQL Server related code, TVP Structure:
CREATE TYPE dbo.RecordImportStructure
AS TABLE (
ItemName VARCHAR(MAX),
Supplier VARCHAR(MAX),
Quantity DECIMAL(18, 2),
ItemUnit VARCHAR(MAX),
EntityUnit VARCHAR(MAX),
ItemSize DECIMAL(18, 2),
PackageSize DECIMAL(18, 2),
FamilyCode VARCHAR(4),
Family VARCHAR(MAX),
CategoryCode VARCHAR(MAX),
Category VARCHAR(MAX),
SubCategoryCode VARCHAR(MAX),
SubCategory VARCHAR(MAX),
ItemGroupCode VARCHAR(MAX),
ItemGroup VARCHAR(MAX),
PurchaseValue DECIMAL(18, 2),
UnitPurchaseValue DECIMAL(18, 2),
PackagePurchaseValue DECIMAL(18, 2),
FacilityCode VARCHAR(MAX),
CurrencyCode VARCHAR(MAX)
);
Insert stored procedure:
CREATE PROCEDURE dbo.ImportBatchRecords (
@BatchId INT,
@ImportTable dbo.RecordImportStructure READONLY
)
AS
SET NOCOUNT ON;
DECLARE @ErrorCode int
DECLARE @Step varchar(200)
--Clear old stuff?
--TRUNCATE TABLE dbo.BatchRecords;
INSERT INTO dbo.BatchRecords (
ItemName,
Supplier,
Quantity,
ItemUnit,
EntityUnit,
ItemSize,
PackageSize,
FamilyCode,
Family,
CategoryCode,
Category,
SubCategoryCode,
SubCategory,
ItemGroupCode,
ItemGroup,
PurchaseValue,
UnitPurchaseValue,
PackagePurchaseValue,
DataBatchId,
FacilityInstanceId,
CurrencyId
)
OUTPUT INSERTED.Id
SELECT
ItemName,
Supplier,
Quantity,
ItemUnit,
EntityUnit,
ItemSize,
PackageSize,
FamilyCode,
Family,
CategoryCode,
Category,
SubCategoryCode,
SubCategory,
ItemGroupCode,
ItemGroup,
PurchaseValue,
UnitPurchaseValue,
PackagePurchaseValue,
@BatchId,
--FacilityInstanceId,
--CurrencyId
(SELECT TOP 1 f.Id from dbo.FacilityInstances f WHERE f.FacilityCode=FacilityCode),
(SELECT TOP 1 c.Id from dbo.Currencies c WHERE c.CurrencyCode=CurrencyCode)
FROM @ImportTable;
And finally my quick, test only solution to execute this stuff on .NET side.
public class BatchRecordDataHandler : IBulkDataHandler<BatchRecordImportItem>
{
public async Task<int> ImportAsync(SqlConnection conn, SqlTransaction transaction, IEnumerable<BatchRecordImportItem> src)
{
using (var cmd = new SqlCommand())
{
cmd.CommandText = "ImportBatchRecords";
cmd.Connection = conn;
cmd.Transaction = transaction;
cmd.CommandType = CommandType.StoredProcedure;
cmd.CommandTimeout = 600;
var batchIdParam = new SqlParameter
{
ParameterName = "@BatchId",
SqlDbType = SqlDbType.Int,
Value = 1
};
var tableParam = new SqlParameter
{
ParameterName = "@ImportTable",
TypeName = "dbo.RecordImportStructure",
SqlDbType = SqlDbType.Structured,
Value = DataToSqlRecords(src)
};
cmd.Parameters.Add(batchIdParam);
cmd.Parameters.Add(tableParam);
cmd.Transaction = transaction;
using (var res = await cmd.ExecuteReaderAsync())
{
var resultTable = new DataTable();
resultTable.Load(res);
var cnt = resultTable.AsEnumerable().Count();
return cnt;
}
}
}
private IEnumerable<SqlDataRecord> DataToSqlRecords(IEnumerable<BatchRecordImportItem> src)
{
var tvpSchema = new[] {
new SqlMetaData("ItemName", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("Supplier", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("Quantity", SqlDbType.Decimal),
new SqlMetaData("ItemUnit", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("EntityUnit", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("ItemSize", SqlDbType.Decimal),
new SqlMetaData("PackageSize", SqlDbType.Decimal),
new SqlMetaData("FamilyCode", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("Family", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("CategoryCode", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("Category", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("SubCategoryCode", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("SubCategory", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("ItemGroupCode", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("ItemGroup", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("PurchaseValue", SqlDbType.Decimal),
new SqlMetaData("UnitPurchaseValue", SqlDbType.Decimal),
new SqlMetaData("PackagePurchaseValue", SqlDbType.Decimal),
new SqlMetaData("FacilityInstanceId", SqlDbType.VarChar, SqlMetaData.Max),
new SqlMetaData("CurrencyId", SqlDbType.VarChar, SqlMetaData.Max),
};
var dataRecord = new SqlDataRecord(tvpSchema);
foreach (var importItem in src)
{
dataRecord.SetValues(importItem.ItemName,
importItem.Supplier,
importItem.Quantity,
importItem.ItemUnit,
importItem.EntityUnit,
importItem.ItemSize,
importItem.PackageSize,
importItem.FamilyCode,
importItem.Family,
importItem.CategoryCode,
importItem.Category,
importItem.SubCategoryCode,
importItem.SubCategory,
importItem.ItemGroupCode,
importItem.ItemGroup,
importItem.PurchaseValue,
importItem.UnitPurchaseValue,
importItem.PackagePurchaseValue,
importItem.FacilityCode,
importItem.CurrencyCode);
yield return dataRecord;
}
}
}
Import entity structure:
public class BatchRecordImportItem
{
public string ItemName { get; set; }
public string Supplier { get; set; }
public decimal Quantity { get; set; }
public string ItemUnit { get; set; }
public string EntityUnit { get; set; }
public decimal ItemSize { get; set; }
public decimal PackageSize { get; set; }
public string FamilyCode { get; set; }
public string Family { get; set; }
public string CategoryCode { get; set; }
public string Category { get; set; }
public string SubCategoryCode { get; set; }
public string SubCategory { get; set; }
public string ItemGroupCode { get; set; }
public string ItemGroup { get; set; }
public decimal PurchaseValue { get; set; }
public decimal UnitPurchaseValue { get; set; }
public decimal PackagePurchaseValue { get; set; }
public int DataBatchId { get; set; }
public string FacilityCode { get; set; }
public string CurrencyCode { get; set; }
}
Please don't mind useless reader at the end, doesn't really do much. So without the reader inserting 2.5kk rows took around 26 minutes while SqlBulkCopy
took around 6+- minutes. Is there something I'm doing fundamentally wrong? I’m using IsolationLevel.Snapshot
if this matters. Using SQL Server 2014, free to change DB structure and indices.
UPD 1
Done a couple of adjustments/improvement attempts described by @Xedni, specifically:
VARCHAR(MAX)
to VARCHAR(*SomeValue*)
DataTable
instead of IEnumerable<SqlDataRecord>
My structure is now like this:
CREATE TYPE dbo.RecordImportStructure
AS TABLE (
ItemName VARCHAR(4096),
Supplier VARCHAR(450),
Quantity DECIMAL(18, 2),
ItemUnit VARCHAR(2048),
EntityUnit VARCHAR(2048),
ItemSize DECIMAL(18, 2),
PackageSize DECIMAL(18, 2),
FamilyCode VARCHAR(16),
Family VARCHAR(512),
CategoryCode VARCHAR(16),
Category VARCHAR(512),
SubCategoryCode VARCHAR(16),
SubCategory VARCHAR(512),
ItemGroupCode VARCHAR(16),
ItemGroup VARCHAR(512),
PurchaseValue DECIMAL(18, 2),
UnitPurchaseValue DECIMAL(18, 2),
PackagePurchaseValue DECIMAL(18, 2),
FacilityCode VARCHAR(450),
CurrencyCode VARCHAR(4)
);
So far no noticeable performance gains unfortunately, 26-28 min as before
UPD 2
Checked the execution plan - indices are my bane?
UPD 3
Added OPTION (RECOMPILE);
at the end of my SP, gained a minor boost, now sitting at ~25m for 2.5kk
Try with the following stored procedure:
CREATE PROCEDURE dbo.ImportBatchRecords (
@BatchId INT,
@ImportTable dbo.RecordImportStructure READONLY
)
AS
SET NOCOUNT ON;
DECLARE @ErrorCode int
DECLARE @Step varchar(200)
CREATE TABLE #FacilityInstances
(
Id int NOT NULL,
FacilityCode varchar(512) NOT NULL UNIQUE WITH (IGNORE_DUP_KEY=ON)
);
CREATE TABLE #Currencies
(
Id int NOT NULL,
CurrencyCode varchar(512) NOT NULL UNIQUE WITH (IGNORE_DUP_KEY = ON)
)
INSERT INTO #FacilityInstances(Id, FacilityCode)
SELECT Id, FacilityCode FROM dbo.FacilityInstances
WHERE FacilityCode IS NOT NULL AND Id IS NOT NULL;
INSERT INTO #Currencies(Id, CurrencyCode)
SELECT Id, CurrencyCode FROM dbo.Currencies
WHERE CurrencyCode IS NOT NULL AND Id IS NOT NULL
INSERT INTO dbo.BatchRecords (
ItemName,
Supplier,
Quantity,
ItemUnit,
EntityUnit,
ItemSize,
PackageSize,
FamilyCode,
Family,
CategoryCode,
Category,
SubCategoryCode,
SubCategory,
ItemGroupCode,
ItemGroup,
PurchaseValue,
UnitPurchaseValue,
PackagePurchaseValue,
DataBatchId,
FacilityInstanceId,
CurrencyId
)
OUTPUT INSERTED.Id
SELECT
ItemName,
Supplier,
Quantity,
ItemUnit,
EntityUnit,
ItemSize,
PackageSize,
FamilyCode,
Family,
CategoryCode,
Category,
SubCategoryCode,
SubCategory,
ItemGroupCode,
ItemGroup,
PurchaseValue,
UnitPurchaseValue,
PackagePurchaseValue,
@BatchId,
F.Id,
C.Id
FROM
#FacilityInstances F RIGHT OUTER HASH JOIN
(
#Currencies C
RIGHT OUTER HASH JOIN @ImportTable IT
ON C.CurrencyCode = IT.CurrencyCode
)
ON F.FacilityCode = IT.FacilityCode
This enforces the execution plan to use hash match joins instead of nested loops. I think the culprit of bad performance is the first nested loop that performs an index scan for each row in @ImportTable
I don't know if CurrencyCode
is unique in Currencies
table, so I create the temporal table #Currencies with unique currency codes.
I don't know if FacilityCode
is unique in Facilities
table, so I create the temporal table #FacilityInstances with unique facility codes.
If they are unique you don't need the temporal tables, you can use the permanent tables directly.
Assuming CurrencyCode and FacilityCode are unique the following stored procedure would be better because it doesn't create unnecessary temporary tables:
CREATE PROCEDURE dbo.ImportBatchRecords (
@BatchId INT,
@ImportTable dbo.RecordImportStructure READONLY
)
AS
SET NOCOUNT ON;
DECLARE @ErrorCode int
DECLARE @Step varchar(200)
INSERT INTO dbo.BatchRecords (
ItemName,
Supplier,
Quantity,
ItemUnit,
EntityUnit,
ItemSize,
PackageSize,
FamilyCode,
Family,
CategoryCode,
Category,
SubCategoryCode,
SubCategory,
ItemGroupCode,
ItemGroup,
PurchaseValue,
UnitPurchaseValue,
PackagePurchaseValue,
DataBatchId,
FacilityInstanceId,
CurrencyId
)
OUTPUT INSERTED.Id
SELECT
ItemName,
Supplier,
Quantity,
ItemUnit,
EntityUnit,
ItemSize,
PackageSize,
FamilyCode,
Family,
CategoryCode,
Category,
SubCategoryCode,
SubCategory,
ItemGroupCode,
ItemGroup,
PurchaseValue,
UnitPurchaseValue,
PackagePurchaseValue,
@BatchId,
F.Id,
C.Id
FROM
dbo.FacilityInstances F RIGHT OUTER HASH JOIN
(
dbo.Currencies C
RIGHT OUTER HASH JOIN @ImportTable IT
ON C.CurrencyCode = IT.CurrencyCode
)
ON F.FacilityCode = IT.FacilityCode