I have 2 DataTables: 1st DataTable is created today and 2nd Datatable was created yesterday. Each DataTable contains over 100K rows and 30 columns. I have a unique column - "Master_ID"
I want to compare and subtract (Today's DataTable - Yesterday's DataTable) and get the rows that are updated, deleted, and newly created. I want to perform a complete row-to-row comparison.
Output -> Dictionary <string, List DataRow >
I am looking for a time-efficient approach, probably using LINQ.
Probably one of the most efficient approaches is to use a dictionary-like collection to find out if a row-identifier is new, i'd use ILookup<TKey, TValue>
:
public static Dictionary<string, List<DataRow>?> GetChanges(DataTable dtNew, DataTable dtOld, string masterKeyName, StringComparer masterKeyComparer = null)
{
IEqualityComparer<string> comparer = masterKeyComparer ?? StringComparer.Ordinal;
ILookup<string, DataRow> newKeyLookup = dtNew.AsEnumerable().ToLookup(r => r.Field<string>(masterKeyName), comparer!)!;
ILookup<string, DataRow> oldKeyLookup = dtOld.AsEnumerable().ToLookup(r => r.Field<string>(masterKeyName), comparer!)!;
List<DataRow> updatedRows = new();
List<DataRow> newRows = new();
List<DataRow> deletedRows = new();
foreach (var x in newKeyLookup)
{
List<DataRow> existingRows = oldKeyLookup[x.Key].ToList();
if (existingRows.Any())
{
// potential update, check if there are changes:
var allChangedRows = x.Where(r => !existingRows.Contains(r, DataRowComparer.Default));
updatedRows.AddRange(allChangedRows);
}
if (!existingRows.Any())
{
newRows.AddRange(x);
}
}
foreach (var x in oldKeyLookup)
{
if (!newKeyLookup[x.Key].Any())
{
deletedRows.AddRange(x);
}
}
return new Dictionary<string, List<DataRow>?>
{
{"New Data Rows", newRows},
{"Updated Data Rows", updatedRows},
{"Deleted Data Rows", deletedRows},
};
}