Search code examples
vbacoreldraw

How to fast compare Corel file by its content using Corel VBA not to open files


I am trying to compare files by objects to find dublicate. I have 2900 files in folder and i need to check them all. In other words I have to run compare methods 2900*2900 times and every time when comparing two file I need to open and close 1 of those. If there is a way to work with Corel files not to open them? or is it posible to get metadata\metadata.xml from Corel VBA files to check and compare some parametrs from it such as Objects(shapes) count? I am in despered...

I am using this logic system

Private Sub CommandButton1_Click()

    Dim Folder As String
    MousePointer = fmMousePointerHourGlass
    Folder = BrowseForFolderDlg("o:\", "Select Source Folder", GetWindowHandle("ThunderDFrame", Me.Caption))
    tb_inputFolder.text = Folder

End Sub

Private Sub CommandButton2_Click()

    Dim fso As Object
    Dim objFolder As Object
    Dim objFileList As Object
    Dim vFile, vFile1 As Variant
    Dim inputFolder As String, outputFolder As String

    inputFolder = tb_inputFolder.text 'input folder

    If (inputFolder = "") Then
        Exit Sub
    End If


    Set fso = CreateObject("Scripting.FileSystemObject")
    Set objFolder = fso.GetFolder(inputFolder)
    Set objFileList = objFolder.Files

    Dim currentFile As String

    Dim dunFiles() As String
    Dim arrLength As Integer
    ReDim Preserve dunFiles(1)
    arrLength = 1
    dunFiles(0) = ""



    For Each vFile In objFileList

        Dim doc As Document, doc1 As Document, buf As String
        Dim fName As String
        fName = (Left(vFile.name, Len(vFile.name) - 4))

        buf = Right(vFile.path, 3)

        If (buf = "cdr" And findArrayElement(dunFiles, arrLength, vFile.name) = -1) Then

            Set doc = OpenDocument(vFile.path) 'document opend

            dunFiles(arrLength - 1) = vFile.name
            ReDim Preserve dunFiles((arrLength + 1))
            arrLength = arrLength + 1


            For Each vFile1 In objFileList
                buf = Right(vFile1.path, 3)

                If (vFile1.name = currentFile Or findArrayElement(dunFiles, arrLength, vFile1.name) <> -1 Or buf <> "cdr") Then
                    GoTo nextElement
                End If

                'Set doc1 = OpenDocument(vFile1.path) 'document opend
                Dim res As Variant


                res = writeFile(doc.FileName + " VS " + vFile1.name + " " + Str(Now), doc.FilePath + "compare.log")

                If (compareDocs(doc, vFile1.path)) Then

                    dunFiles(arrLength - 1) = fName + "_" + vFile1.name
                    ReDim Preserve dunFiles((arrLength + 1))
                    arrLength = arrLength + 1

                    Dim name As String
                    name = vFile.ParentFolder.path + "\" + fName + "_" + vFile1.name
                    Name vFile1.path As name


                    res = writeFile(vFile.ParentFolder.path + "\" + fName + " the same as " + name, doc.FilePath + "rename.log")

                End If

                'doc1.Close
nextElement:

            Next vFile1
            doc.Close

        End If




       ' doc.Close 'close document

    Next vFile

    lb_info.Caption = "Finished! Press exit"

End Sub

Private Function findArrayElement(inputArray() As String, inputLen As Integer, element As String)
    Dim e As String
    Dim i As Integer

    findArrayElement = -1

    For i = 0 To inputLen - 1
        If (inputArray(i) = element) Then
            findArrayElement = i
            Exit Function
        End If
    Next i


End Function

Private Function compareDocs(doc As Document, path2 As String)
    Dim doc1 As Document
    Dim e1 As Shape, e2 As Shape, elements() As String
    Dim sameShapesCount As Integer
    sameShapesCount = 0
    ReDim elements(1) As String
    elements(0) = ""

    Set doc1 = OpenDocument(path2) 'document opend
    compareDocs = False


    lb_info.Caption = "Comapre " + doc.FullFileName + " with " + path2

    For Each e1 In doc.SelectableShapes
        e1.UngroupAll
    Next e1

    For Each e2 In doc1.SelectableShapes
        e2.UngroupAll
    Next e2

    If (doc.SelectableShapes.Count <> doc1.SelectableShapes.Count) Then
        doc1.Close
        Exit Function
    End If


    For Each e1 In doc.SelectableShapes




        'If (findArrayElement(elements, (UBound(elements) + 1), Str(e1.StaticID)) = -1) Then
            'ReDim Preserve elements(UBound(elements) + 1) As String
            'elements(UBound(elements)) = e1.StaticID

            For Each e2 In doc1.SelectableShapes
                If (findArrayElement(elements, (UBound(elements) + 1), "2_" + Str(e2.StaticID)) = -1) Then



                    If (e1.CompareTo(e2, cdrCompareShapeType, cdrCompareEquals)) Then
                        If (e1.CompareTo(e2, cdrCompareFillType, cdrCompareEquals)) Then
                            If (e1.CompareTo(e2, cdrCompareOutline, cdrCompareEquals)) Then
                                If (e1.CompareTo(e2, cdrCompareOutlineColor, cdrCompareEquals)) Then
                                    If (e1.CompareTo(e2, cdrCompareOutlineWidth, cdrCompareEquals)) Then
                                        'If (e1.CompareTo(e2, cdrCompareShapeHeight, cdrCompareEquals)) Then
                                            If (e1.CompareTo(e2, cdrCompareFil, cdrCompareEquals)) Then
                                                'If (e1.CompareTo(e2, cdrCompareShapeWidth, cdrCompareEquals)) Then
                                                    ReDim Preserve elements(UBound(elements) + 1) As String
                                                    elements(UBound(elements)) = "2_" + Str(e2.StaticID)
                                                    sameShapesCount = sameShapesCount + 1
                                                    GoTo nextElement1
                                                'End If
                                            End If
                                        'End If
                                    End If
                                End If
                            End If
                        End If
                    End If

                End If
            Next e2

        'End If
nextElement1:
    Next e1

    If (doc.SelectableShapes.Count = sameShapesCount) Then
        compareDocs = True
    End If

    doc1.Close

End Function


Private Function writeFile(text As String, path As String)
    Dim fso As Object
    Set fso = CreateObject("Scripting.FileSystemObject")
    Dim oFile As Object


    If Not Dir(path, vbDirectory) = vbNullString Then
        Set oFile = fso.OpenTextFile(path, 8)
    Else
        Set oFile = fso.CreateTextFile(path, 0)
    End If


    oFile.WriteLine text
    oFile.Close

    Set fso = Nothing
    Set oFile = Nothing
End Function

The main problem is that the "open process" can last up to few minutes and to check 2k corel fiels I need a YEAR


Solution

  • In a first pass, open each file once.

    Go over the data you care about -- object count or whatever -- that must be equal.

    From this data, build a hash -- a pseudo-random value that combines information from each of them.

    Build a table that maps from the hash to a set of draw files that match the hash.

    Now you only have to compare files which have a the same hash value, not every pair of files. A well designed hash and data to feed it should reduce your collision rate to nearly zero.

    This should speed up your program by a factor of 1000 to 3000 or so.

    To ensure that the hash/collision works well, your first pass should just hash and print out the lists of collisions.