Search code examples
.netsql-server.net-coreado.net

ADO.NET looking for Bulk Insert - row-by-row is slow as Molasses


With 20K records to insert, this is taking about an hour. Is there a way to do a ADO.NET insert in bulk instead of a loop that goes forever?

private async Task InsertReprocessTsk(List<ReprocessTskChngs> reprocessTskChngs)
{
    using (SqlConnection conn = new SqlConnection(_connectionString))
    {
        await conn.OpenAsync();

        string insertSql = @"insert into REPROCESS_TSK(CRS, TskLOC, PROCESSED, DATE_ENTERED)
                                 Values(@CRS, @TskLOC, @PROCESSED, GETDATE())";

        using SqlTransaction trans = conn.BeginTransaction();
        try
        {

            foreach (ReprocessTskChngs reprocessTskChng in reprocessTskChngs)
            {
                using (SqlCommand cmd = new SqlCommand(insertSql, conn, trans))
                {
                    cmd.Parameters.Add("@CRS", SqlDbType.SmallInt).Value = reprocessTskChng.CRS;
                    cmd.Parameters.Add("@TskLOC", SqlDbType.VarChar).Value = reprocessTskChng.TskLOC;
                    cmd.Parameters.Add("@PROCESSED", SqlDbType.Int).Value = reprocessTskChng.PROCESSED;

                    await cmd.ExecuteNonQueryAsync();

                }
            }

            await trans.CommitAsync();
        }
        catch
        {
            trans.Rollback();
            throw;
        }
    }
}

I could find ADO.NET bulk copy operations but nothing specific to insert


Solution

  • Well, there is a way to insert multiple items at once into a table, but it is exposed to SQL Injection. Instead of creating one insert command for each line you can create bulks of insert statements in a string and execute them all at once. Here is how it would look based on your example:

    var sqlStatements = new List<string>();
    string sqlStatement = string.Empty;
    int index = 0;
    // process all items
    foreach (var reprocessTskChng in reprocessTskChngs)
    {
        index++;
        sqlStatement += "insert into REPROCESS_TSK(CRS, TskLOC, PROCESSED, DATE_ENTERED)" +
                        $"Values('{reprocessTskChng.CRS}', '{reprocessTskChng.TskLOC}', '{reprocessTskChng.PROCESSED}', GETDATE());";
        // create a bulk of inserts for every 1000 items
        if (index == 1000)
        {
            sqlStatements.Add(sqlStatement);
            sqlStatement = string.Empty;
            index = 0;
        }
    }
    
    // if there are any items left in the bulk insert query
    if (index > 0)
    {
        sqlStatements.Add(sqlStatement);
        sqlStatement = string.Empty;
    }
    
    using (SqlConnection conn = new SqlConnection(connectionString))
    {
        await conn.OpenAsync();
        
        using SqlTransaction trans = conn.BeginTransaction();
        try
        {
            // process all bulks of insert statements and execute
            foreach (var statement in sqlStatements)
            {
                using (SqlCommand cmd = new SqlCommand(statement, conn, trans))
                {
                    await cmd.ExecuteNonQueryAsync();
                }
            }
            await trans.CommitAsync();
        }
        catch
        {
            trans.Rollback();
            throw;
        }
    }
    

    For 20k records, in this way, you will execute only 20 queries. The issue is, as mentioned, you won't use SQL command parameters anymore.