Search code examples
excelvbaexcel-formulavbscripthp-uft

Fast way to compare two excel files?


I want to compare 2 excels files [Having only 1 sheet in both] having 10-15 columns and rows will be more than 30K. We got one excel macro file which complete the comparison within 5-10Mins. Limitation of this macro is that it can compare only 2-3 columns at a time. So every time we need to run this macro multiple times which is time consuming process. So I created one utility file [.vbs file] which perform this task in one go but it takes around 1-3Hrs. Is there any other way to perform this comparison in short time in one go?

startTime=Timer()
Set objExcel=Createobject("Excel.application")
objExcel.Visible=True
Set objWorkbook=objExcel.Workbooks.Open("E:\QTP trial version\Data.xls")

'Set deleteAnalysis_CopySheet=objWorkbook.sheets("Analysis_Copy")
'deleteAnalysis_CopySheet.delete
'Set deleteSummarySheet=objWorkbook.sheets("Summary")
'deleteSummarySheet.delete

Set objAnalysis_Copy=objWorkbook.sheets.add
objAnalysis_Copy.name="Analysis_Copy"

Set objSummary=objWorkbook.sheets.add
objSummary.name="Summary"
objSummary.Cells(1,1)="Analysis Row Count"
objSummary.Cells(2,1)="Reporting Row Count"
objSummary.Cells(3,1)="Analysis Column Count"
objSummary.Cells(4,1)="Reporting Column Count"
objSummary.Cells(5,1)="Difference of Row Count"
objSummary.Cells(6,1)="Difference of Column Count"
objSummary.Cells(7,1)="False Count"

' ------------------------1st Check - Verify the position of ''Metrics' in Analysis and Reporting tab. It must be same---------------------
'Get the control of Analysis tab
Set objAnalysis=objExcel.Worksheets.Item("Analysis")
intAnalysisRowCount=objAnalysis.Usedrange.rows.count
objSummary.Cells(1,2)=intAnalysisRowCount
intAnalysisColCount=objAnalysis.Usedrange.Columns.count
objSummary.Cells(3,2)=intAnalysisColCount

'Get Column number of 'Metric' Column from Analysis tab
For intMetricAnalysis=1 to intAnalysisColCount
        If(Trim(Lcase(objAnalysis.Cells(1,intMetricAnalysis)))=Trim(Lcase("Metrics"))) Then
            Exit for
        End If
Next

'Get all Analysis columns in 1 string
strAnalysisColumnOrder=""
For intAnalysisColumnOrder=1 to intAnalysisColCount
        strAnalysisColumnOrder=strAnalysisColumnOrder&"*"&objAnalysis.Cells(1,intAnalysisColumnOrder)               

        If(intAnalysisColumnOrder=1) then
            strAnalysisColumnOrder=Replace(strAnalysisColumnOrder,"*","")
        End If      
Next

Set objReporting=objExcel.Worksheets.Item("Reporting")
intReportingRowCount=objReporting.Usedrange.rows.count
objSummary.Cells(2,2)=intReportingRowCount
intReportingColCount=objReporting.Usedrange.Columns.count
objSummary.Cells(4,2)=intReportingColCount

''Get Column number of 'Metric' Column from Reporting tab
For intMetricReporting=1 to intReportingColCount
        If(Trim(Lcase(objReporting.Cells(1,intMetricReporting)))=Trim(Lcase("Metrics"))) Then
            Exit for
        End If
Next

'Get all Reporting columns in 1 string
strReportingColumnOrder=""
For intReportingColumnOrder=1 to intAnalysisColCount
            strReportingColumnOrder=strReportingColumnOrder&"*"&objReporting.Cells(1,intReportingColumnOrder)               

        If(intReportingColumnOrder=1) then
            strReportingColumnOrder=Replace(strReportingColumnOrder,"*","")
        End If      
Next


''Metric' column number must be same
If(intMetricAnalysis<>intMetricReporting) then
    msgbox "Merics column is  at  "&intMetricAnalysis&" position in 'Analysis' Tab And  at "&intMetricReporting&" position in 'Reporting' tab. 'Metrics' column should be at same position in both tab."
    strMetricsFlag=False
Else
    strMetricsFlag=True
End IF

'-----------2nd Check, Verify count of columns in 'Analysis'  And 'Reporting' tab . It Must be same
If  intAnalysisColCount<>intReportingColCount Then
    msgbox "Column count of 'Reporting' Tab is not same as of 'Analysis tab'."
    strAnalysisColCount=False
Else
    strAnalysisColCount=True
End If

''---------------3rd Check , Verify Order of columns in 'Analysis'  And 'Reporting' tab . It Must be same
If Trim(Lcase(strAnalysisColumnOrder))<>Trim(Lcase(strReportingColumnOrder)) then
    msgbox "Column order of 'Reporting' Tab is not same as of 'Analysis tab'. Reporting column order should be  "&strAnalysisColumnOrder
    strAnalysisColumnOrderFlag=False
Else
    strAnalysisColumnOrderFlag=True
End IF

'Creare 'Analysis_Copy' tab and add headers
Set objAnalysisCopy=objExcel.Worksheets.Item("Analysis_Copy")

strFirstCoulmn_AggKeys=""
For intHeaderAggkey=1 to intMetricAnalysis-1
        strFirstCoulmn_AggKeys=strFirstCoulmn_AggKeys&"*"&objAnalysis.Cells(1,intHeaderAggkey)              

        If(intHeaderAggkey=1) then
            strFirstCoulmn_AggKeys=Replace(strFirstCoulmn_AggKeys,"*","")
        End If      
Next

objAnalysisCopy.Cells(1,1)=strFirstCoulmn_AggKeys

strSecondCoulmn_AnalysisMetrics=""

For intHeaderAnalysisMetrics=intMetricAnalysis+1 to intAnalysisColCount
        strSecondCoulmn_AnalysisMetrics=strSecondCoulmn_AnalysisMetrics&"*"&objAnalysis.Cells(1,intHeaderAnalysisMetrics)   

        If(intHeaderAnalysisMetrics=intMetricAnalysis+1 ) then
            strSecondCoulmn_AnalysisMetrics=Replace(strSecondCoulmn_AnalysisMetrics,"*","")
        End If                          
Next

objAnalysisCopy.Cells(1,2)="Analysis_"&strSecondCoulmn_AnalysisMetrics
objAnalysisCopy.Cells(1,3)="Reporting_"&strSecondCoulmn_AnalysisMetrics
objAnalysisCopy.Cells(1,4)="Status"

objWorkbook.Save

'$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

If  strAnalysisColumnOrderFlag=False OR strMetricsFlag=False OR strAnalysisColCount=False Then

            msgbox "So Data Comparision can not be done"
            objWorkbook.Save
            objWorkbook.Close
            objExcel.Quit

Else

        intFalseCount=0

        For intAnalysisRow=2 to intAnalysisRowCount
        
        '   ------ Get the control of  ''Analysis' tab and the string of  Aggrecate Keys [strAnalysisAggrData]  and  respective metrics [strAnalysisMetricsData]
                      Set objAnalysis=objExcel.Worksheets.Item("Analysis")
                       ' Append all data of  each row  which is before 'Metrics' column 
                        strAnalysisAggrData=""
                        For intAnalysisColumn=1 to intMetricAnalysis-1
                                strAnalysisAggrData=strAnalysisAggrData&"*"&objAnalysis.Cells(intAnalysisRow,intAnalysisColumn)             
                    
                                If(intAnalysisColumn=1) then
                                    strAnalysisAggrData=Replace(strAnalysisAggrData,"*","")
                                End If      
                        Next
                    
        '              ' Append all data of  each row  which is after 'Metrics' column 
                        strAnalysisMetricsData=""
                        For intFromMetric=intMetricAnalysis+1 to intAnalysisColCount

                                strAnalysisMetricsData=strAnalysisMetricsData&"*"&objAnalysis.Cells(intAnalysisRow,intFromMetric)   
                    
                                If(intFromMetric=intMetricAnalysis+1 ) then
                                    strAnalysisMetricsData=Replace(strAnalysisMetricsData,"*","")
                                End If                          
                        Next
                    
                '   ------ Get the control of  ''Reporting' tab and the string of  Aggrecate Keys [strAnalysisAggrData]  and  respective metrics [strAnalysisMetricsData]
                    Set objReporting=objExcel.Worksheets.Item("Reporting")
            
                    For  intReportingRow=1 to intReportingRowCount
            
                            ' Append all data of  each row  which is before 'Metrics' column 
                            strReportingAggrData=""
                            For intBeforeMetricReporting=1 to intMetricReporting-1
                                    strReportingAggrData=strReportingAggrData&"*"&objReporting.Cells(intReportingRow,intBeforeMetricReporting)              
                        
                                    If(intBeforeMetricReporting=1) then
                                        strReportingAggrData=Replace(strReportingAggrData,"*","")
                                    End If      
                            Next
                        
                            ' Append all data of  each row  which is after 'Metrics' column 
                            strReportingMetricsData=""
                            For intFromReportingMetric=intMetricReporting+1 to intReportingColCount

                                  strReportingMetricsData=strReportingMetricsData&"*"&objReporting.Cells(intReportingRow,intFromReportingMetric)    
                        
                                    If(intFromReportingMetric=intMetricReporting+1 ) then
                                        strReportingMetricsData=Replace(strReportingMetricsData,"*","")
                                    End If                          
                            Next

        '------------------------------------------------------------  Actual Comparision will be from here ------------------------------------------
            
                            If  Trim(LCase(strAnalysisAggrData))=Trim(LCase(strReportingAggrData)) Then
        
                                    objAnalysisCopy.Cells(intAnalysisRow,1)=strAnalysisAggrData
                                    objAnalysisCopy.Cells(intAnalysisRow,2)=strAnalysisMetricsData
                                    objAnalysisCopy.Cells(intAnalysisRow,3)=strReportingMetricsData

                                    'Compare Metrics Data
                                    If  Trim(LCase(strAnalysisMetricsData))=Trim(LCase(strReportingMetricsData)) Then                                   
                                            objAnalysisCopy.Cells(intAnalysisRow,4)="PASS"
                                            objAnalysisCopy.Cells(intAnalysisRow,4).font.color=vbGreen  
                                    Else
                                            objAnalysisCopy.Cells(intAnalysisRow,4)="FAIL"
                                            intFalseCount=intFalseCount+1
                                            objAnalysisCopy.Cells(intAnalysisRow,4).font.color=vbRed    
                                    End If

                                    Exit For
                                
                            End If                          
        
                    Next        
        Next

            objSummary.Cells(5,2)=intAnalysisRowCount-intReportingRowCount
            objSummary.Cells(6,2)=intAnalysisColCount-intReportingColCount
            objSummary.Cells(7,2)=intFalseCount
            objSummary.Cells(7,2).font.color=vbRed      

            objWorkbook.Save
            objWorkbook.Close
            objExcel.Quit

            EndTime=Timer()

            TotalTime=EndTime-startTime
            
            msgbox "Data Comparision is Completed. Comparision time is "&TotalTime&"Secs"

End If

Solution

  • Use a dictionary and you avoid the nested loops and only scan each sheet once. For example as a VBA macro (untested)

    Sub compare()
    
         Dim wb As Workbook
         Dim ws(2) As Worksheet, wsSum As Worksheet, wsCopy As Worksheet
         Dim rowCount(2) As Long, colCount(2) As Integer, colMetric(2) As Integer
         Dim colsMetric(2) As String, colsAll(2) As String, colsKeys(2) As String
         Dim bMetricsFlag As Boolean, bColCountFlag As Boolean, bColOrderFlag As Boolean
         Dim i As Long, ar, msg As String, intFalseCount As Long
         
         Dim t0 as Single
         t0 = Timer
    
         Set wb = ThisWorkbook
         Set ws(1) = wb.Sheets("Analysis")
         Set ws(2) = wb.Sheets("Reporting")
         Set wsSum = wb.Sheets("Summary")
         wsSum.Cells.Clear
         wsSum.Range("A1:A7") = WorksheetFunction.Transpose(Array("Analysis Row Count", _
               "Reporting Row Count", "Analysis Column Count", "Reporting Column Count", _
               "Difference of Row Count", "Difference of Column Count", "False Count"))
    
         Set wsCopy = wb.Sheets("Analysis_Copy")
         wsCopy.Cells.Clear
    
         ' get stats for each sheet 1-Analyis 2=Reporting
         For i = 1 To 2
             ar = Stats(ws(i))
             rowCount(i) = ar(0)
             colCount(i) = ar(1)
             colMetric(i) = ar(2)
             colsAll(i) = ar(3)
             colsMetric(i) = ar(4)
             colsKeys(i) = ar(5)
         Next
    
         ' summary
         With wsSum
             .Cells(1, 2) = rowCount(1)
             .Cells(2, 2) = rowCount(2)
             .Cells(3, 2) = colCount(1)
             .Cells(4, 2) = colCount(2)
         End With
    
         ' check stats
         'Metric' column number must be same
         If colMetric(1) = 0 Or colMetric(2) = 0 Or colMetric(1) <> colMetric(2) Then
             msg = "Metrics columns not the same or missing : " & vbCr & _
             "Analysis : " & colMetric(1) & vbCr & _
             "Reporting : " & colMetric(2)
             MsgBox msg, vbCritical
             bMetricsFlag = False
         Else
             bMetricsFlag = True
         End If
    
         ' Verify count of columns
         If colCount(1) <> colCount(2) Then
             msg = "Column counts not the same : " & vbCr & _
             "Analysis : " & colCount(1) & vbCr & _
             "Reporting : " & colCount(2)
             MsgBox msg, vbCritical
             bColCountFlag = False
         Else
             bColCountFlag = True
         End If
    
        'Verify Order of columns
         If colsAll(1) <> colsAll(2) Then
             msg = "Column order not the same : " & vbCr & _
             "Analysis : " & colsAll(1) & vbCr & _
             "Reporting : " & colsAll(2)
             MsgBox msg, vbCritical
             bColOrderFlag = False
         Else
             bColOrderFlag = True
         End If
    
         With wsCopy
             .Cells(1, 1) = colsKeys(1)
             .Cells(1, 2) = "Analysis_" & colsMetric(1)
             .Cells(1, 3) = "Reporting_" & colsMetric(2)
             .Cells(1, 4) = "Status"
         End With
    
         ' checks OK ?
         If bColOrderFlag And bMetricsFlag And bColCountFlag Then
             ' ok
         Else
             MsgBox "So Data Comparision can not be done", vbCritical
             Exit Sub
         End If
    
         ' start comparison
         Dim dict As Object, m As Long, c As Long, s As String
         Dim sKey As String, sMetric As String
         Set dict = CreateObject("Scripting.Dictionary")
    
         ' scan Reporting sheet to build dictionary
         m = colMetric(2)
         For i = 1 To rowCount(2)
             'join cols up to and after metric col
             sMetric = "": sKey = ""
             For c = 1 To colCount(2)
                 s = Trim(ws(2).Cells(i, c))
                 If c < m Then
                     If sMetric <> "" Then sMetric = sMetric & "*"
                     sMetric = sMetric & s
                 ElseIf c > m Then
                     If sKey <> "" Then sKey = sKey & "*"
                     sKey = sKey & s
                 End If
             Next
             dict(sKey) = sMetric
         Next
    
        ' scan Analysis sheet to compare dictionary
         m = colMetric(1)
         For i = 2 To rowCount(1)
             'join cols up to and after metric col
             sMetric = "": sKey = ""
             For c = 1 To colCount(1)
                 s = Trim(ws(1).Cells(i, c))
                 If c < m Then
                     If sMetric <> "" Then sMetric = sMetric & "*"
                     sMetric = sMetric & s
                 ElseIf c > m Then
                     If sKey <> "" Then sKey = sKey & "*"
                     sKey = sKey & s
                 End If
             Next
    
             ' result
             wsCopy.Cells(i, 1) = sKey
             wsCopy.Cells(i, 2) = sMetric
             wsCopy.Cells(i, 3) = dict(sKey)
    
             ' pass or fail
             If sMetric = dict(sKey) Then
                 wsCopy.Cells(i, 4) = "PASS"
                 wsCopy.Cells(i, 4).Font.Color = vbGreen
             Else
                 wsCopy.Cells(i, 4) = "FAIL"
                 wsCopy.Cells(i, 4).Font.Color = vbRed
                 intFalseCount = intFalseCount + 1
             End If
         Next
    
         With wsSum
             .Cells(5, 2) = rowCount(1) - rowCount(2)
             .Cells(6, 2) = colCount(1) - colCount(2)
             .Cells(7, 2) = intFalseCount
             .Cells(7, 2).Font.Color = vbRed
         End With
    
         MsgBox i - 2 & " rows scanned " & vbCrLf & _
             intFalseCount & " FAILED", vbInformation, Int(Timer - t0) & "seconds"
    
    End Sub
    
    Function Stats(ws As Worksheet) As Variant
    
         Dim c As Integer, ar(5) As Variant, s As String
         ar(0) = ws.UsedRange.Rows.Count
         ar(1) = ws.UsedRange.Columns.Count
         ar(2) = 0 'metric column
         ar(3) = "" ' col aggregated
         ar(4) = "" ' cols upto not including metric
         ar(5) = "" ' cols after metric
    
         For c = 1 To ar(1)
            s = LCase(Trim(ws.Cells(1, c)))
            If s = "metric" Then
                ar(2) = c
            End If
    
            ' aggregate headers before/after metric
            If ar(2) = 0 Then
                If ar(4) <> "" Then ar(4) = ar(4) & "*"
                ar(4) = ar(4) & s
            ElseIf c > ar(2) Then
                If ar(5) <> "" Then ar(5) = ar(5) & "*"
                ar(5) = ar(5) & s
            End If
    
            ' aggregate all
            If ar(3) <> "" Then ar(3) = ar(3) & "*"
            ar(3) = ar(3) & s
         Next
         Stats = ar
    End Function
    

    Test data generator

    Sub testdata()
       Dim ws As Worksheet, n, r, c, ar
       ar = Array("", "Analysis", "Reporting")
       For n = 1 To 2
            Set ws = Sheets(ar(n))
            For r = 1 To 30000
               For c = 1 To 15
                   ws.Cells(r, c) = Chr(64 + c) & r & "_abcdefghijklmnopqrstuvwxyz_"
               Next
            Next
            ws.Cells(1, 10) = "metric" ' col J
       Next
       MsgBox "test data created"
    End Sub