Search code examples
excelvbacomparison

Comparing two datasets with different number of records and different order


I have two datasets in one workbook (Master data and BAZA OLD). The records in each of the two sheets have 11 columns A:K. The Master data worksheet is updated from time to time using Power Query. I would like to compare the new updated data (Master data) with the old data (BAZA OLD) and all mismatching records to be copied and pasted to the worksheet called Output with information in column L "new invoice". I have found some macro which partially do what I need but the problem is the order of records in the Master data sheet can change once uploaded via Power Query. Below is the code I currently have. Public Sub Comparison()

Dim dumpSheet, icdSheet, outputSheet As Worksheet
Dim startRow, outputRow, tempDumpRow, tempICDRow, icdRowCount, finishedICDIndex As Integer
Dim finishedICD() As String
Dim isExist As Boolean

'Set sheets
Set dumpSheet = Sheets("BAZA OLD")
Set icdSheet = Sheets("Master data")
Set outputSheet = Sheets("Output")

'Start row of each sheet for data
startRow = 2
outputRow = 2

'Get row count from ICD sheet
icdRowCount = icdSheet.Range("A:K").End(xlDown).Row

'Index
finishedICDIndex = 0

'Re-define array
ReDim finishedICD(0 To icdRowCount - 1)

'Start row
tempDumpRow = startRow

'Here I looped with OR state, you can modify it to AND start if you want
Do While dumpSheet.Range("A" & tempDumpRow) <> "" Or dumpSheet.Range("B" & tempDumpRow) <> "" Or dumpSheet.Range("C" & tempDumpRow) <> "" And _
    ("D" & tempDumpRow) <> "" Or dumpSheet.Range("E" & tempDumpRow) <> "" Or dumpSheet.Range("F" & tempDumpRow) <> "" And _
    ("G" & tempDumpRow) <> "" Or dumpSheet.Range("H" & tempDumpRow) <> "" Or dumpSheet.Range("I" & tempDumpRow) <> "" And _
    ("J" & tempDumpRow) <> "" Or dumpSheet.Range("K" & tempDumpRow) <> ""
    
    
    'Reset exist flag
    isExist = False

    'loop all row in ICD sheet
    For tempICDRow = 1 To icdRowCount Step 1

        'If row is not finished for checking.
        If UBound(Filter(finishedICD, tempICDRow)) < 0 Then

            'If all cell are equal
            If dumpSheet.Range("A" & tempDumpRow) = icdSheet.Range("A" & tempICDRow) And _
               dumpSheet.Range("B" & tempDumpRow) = icdSheet.Range("B" & tempICDRow) And _
               dumpSheet.Range("C" & tempDumpRow) = icdSheet.Range("C" & tempICDRow) And _
               dumpSheet.Range("D" & tempDumpRow) = icdSheet.Range("D" & tempICDRow) And _
               dumpSheet.Range("E" & tempDumpRow) = icdSheet.Range("E" & tempICDRow) And _
               dumpSheet.Range("F" & tempDumpRow) = icdSheet.Range("F" & tempICDRow) And _
               dumpSheet.Range("G" & tempDumpRow) = icdSheet.Range("G" & tempICDRow) And _
               dumpSheet.Range("H" & tempDumpRow) = icdSheet.Range("H" & tempICDRow) And _
               dumpSheet.Range("I" & tempDumpRow) = icdSheet.Range("I" & tempICDRow) And _
               dumpSheet.Range("I" & tempDumpRow) = icdSheet.Range("J" & tempICDRow) And _
               dumpSheet.Range("J" & tempDumpRow) = icdSheet.Range("K" & tempICDRow) Then
               
                'Set true to exist flag
                isExist = True

                'Store finished row
                finishedICD(finishedICDIndex) = tempICDRow

                finishedICDIndex = finishedICDIndex + 1

                'exit looping
                Exit For

            End If

        End If

    Next tempICDRow

    'Show result
    outputSheet.Range("A" & outputRow) = dumpSheet.Range("A" & tempDumpRow)
    outputSheet.Range("B" & outputRow) = dumpSheet.Range("B" & tempDumpRow)
    outputSheet.Range("C" & outputRow) = dumpSheet.Range("C" & tempDumpRow)
    outputSheet.Range("D" & outputRow) = dumpSheet.Range("D" & tempDumpRow)
    outputSheet.Range("E" & outputRow) = dumpSheet.Range("E" & tempDumpRow)
    outputSheet.Range("F" & outputRow) = dumpSheet.Range("F" & tempDumpRow)
    outputSheet.Range("G" & outputRow) = dumpSheet.Range("G" & tempDumpRow)
    outputSheet.Range("H" & outputRow) = dumpSheet.Range("H" & tempDumpRow)
    outputSheet.Range("I" & outputRow) = dumpSheet.Range("I" & tempDumpRow)
    outputSheet.Range("J" & outputRow) = dumpSheet.Range("J" & tempDumpRow)
    outputSheet.Range("K" & outputRow) = dumpSheet.Range("K" & tempDumpRow)

    If isExist Then
        outputSheet.Range("L" & outputRow) = ""
    Else
        outputSheet.Range("L" & outputRow) = "Item found in ""BAZA OLD"" but not in ""Saldeo"""
    End If

    'increase output row
    outputRow = outputRow + 1

    'go next row
    tempDumpRow = tempDumpRow + 1

Loop

'loop all row in ICD sheet
For tempICDRow = 1 To icdRowCount Step 1

    'If row is not finished for checking.
    If UBound(Filter(finishedICD, tempICDRow)) < 0 Then

        'Show result
        outputSheet.Range("A" & outputRow) = icdSheet.Range("A" & tempICDRow)
        outputSheet.Range("B" & outputRow) = icdSheet.Range("B" & tempICDRow)
        outputSheet.Range("C" & outputRow) = icdSheet.Range("C" & tempICDRow)
        outputSheet.Range("D" & outputRow) = icdSheet.Range("D" & tempICDRow)
        outputSheet.Range("E" & outputRow) = icdSheet.Range("E" & tempICDRow)
        outputSheet.Range("F" & outputRow) = icdSheet.Range("F" & tempICDRow)
        outputSheet.Range("G" & outputRow) = icdSheet.Range("G" & tempICDRow)
        outputSheet.Range("H" & outputRow) = icdSheet.Range("H" & tempICDRow)
        outputSheet.Range("I" & outputRow) = icdSheet.Range("I" & tempICDRow)
        outputSheet.Range("J" & outputRow) = icdSheet.Range("J" & tempICDRow)
        outputSheet.Range("K" & outputRow) = icdSheet.Range("K" & tempICDRow)
        
        'outputSheet.Range("P" & outputRow) = "Item found in ""Baza Faktur Saldeo"" but not in ""BAZA OLD"""
        outputSheet.Range("L" & outputRow) = "NEW INVOICE"


        'increase output row
        outputRow = outputRow + 1

    End If

Next tempICDRow

End Sub


Solution

  • Create a key string for each row by concatenating the cell values and compare the keys on the 2 sheets using a Dictionary Object

    Sub Comparison()
    
        Const COL_MATCH = 11
        Const SEP = "~"
    
        Dim dumpSheet As Worksheet, icdSheet As Worksheet, outputSheet As Worksheet
        Dim rng As Range
        Dim i As Long, lastrow As Long, outrow As Long
        
        'Set sheets
        With ThisWorkbook
            Set dumpSheet = .Sheets("BAZA OLD")
            Set icdSheet = .Sheets("Master data")
            Set outputSheet = .Sheets("Output")
        End With
        
        ' dump sheet
        Dim dict As Object, k as String, ar
        Set dict = CreateObject("Scripting.Dictionary")
        With dumpSheet
            lastrow = .UsedRange.Row + .UsedRange.Rows.Count - 1
            For i = 2 To lastrow
                Set rng = .Range("A" & i).Resize(, COL_MATCH) 'A-K
                ar = Application.Transpose(rng)
                k = Join(Application.Transpose(ar), SEP) ' key
                ' check not blank
                If Len(k) >= COL_MATCH Then
                    If dict.exists(k) Then
                        MsgBox "Key not unique '" & k & "'", vbCritical, .Name & " Row " & i & " and Row " & dict(k)
                    Else
                        dict.Add k, i
                    End If
                End If
            Next
        End With
        
        ' compare master to dump sheet
        outrow = 2
        With icdSheet
            lastrow = .UsedRange.Row + .UsedRange.Rows.Count - 1
            For i = 2 To lastrow
                Set rng = .Range("A" & i).Resize(, COL_MATCH)
                ar = Application.Transpose(rng) 'A-K
                k = Join(Application.Transpose(ar), SEP) ' key
                If Len(k) >= COL_MATCH And Not dict.exists(k) Then
                    outputSheet.Range("A" & outrow).Resize(, COL_MATCH).Value2 = rng.Value2
                    outputSheet.Cells(outrow, COL_MATCH + 1) = "new invoice"
                    outrow = outrow + 1
                End If
            Next
        End With
        
        ' result
        If outrow = 2 Then
            MsgBox "No new invoices", vbInformation
        Else
            MsgBox outrow - 2 & " new invoices", vbInformation
        End If
        
    End Sub