I am trying to compare files by objects to find dublicate. I have 2900 files in folder and i need to check them all. In other words I have to run compare methods 2900*2900 times and every time when comparing two file I need to open and close 1 of those. If there is a way to work with Corel files not to open them? or is it posible to get metadata\metadata.xml from Corel VBA files to check and compare some parametrs from it such as Objects(shapes) count? I am in despered...
I am using this logic system
Private Sub CommandButton1_Click()
Dim Folder As String
MousePointer = fmMousePointerHourGlass
Folder = BrowseForFolderDlg("o:\", "Select Source Folder", GetWindowHandle("ThunderDFrame", Me.Caption))
tb_inputFolder.text = Folder
End Sub
Private Sub CommandButton2_Click()
Dim fso As Object
Dim objFolder As Object
Dim objFileList As Object
Dim vFile, vFile1 As Variant
Dim inputFolder As String, outputFolder As String
inputFolder = tb_inputFolder.text 'input folder
If (inputFolder = "") Then
Exit Sub
End If
Set fso = CreateObject("Scripting.FileSystemObject")
Set objFolder = fso.GetFolder(inputFolder)
Set objFileList = objFolder.Files
Dim currentFile As String
Dim dunFiles() As String
Dim arrLength As Integer
ReDim Preserve dunFiles(1)
arrLength = 1
dunFiles(0) = ""
For Each vFile In objFileList
Dim doc As Document, doc1 As Document, buf As String
Dim fName As String
fName = (Left(vFile.name, Len(vFile.name) - 4))
buf = Right(vFile.path, 3)
If (buf = "cdr" And findArrayElement(dunFiles, arrLength, vFile.name) = -1) Then
Set doc = OpenDocument(vFile.path) 'document opend
dunFiles(arrLength - 1) = vFile.name
ReDim Preserve dunFiles((arrLength + 1))
arrLength = arrLength + 1
For Each vFile1 In objFileList
buf = Right(vFile1.path, 3)
If (vFile1.name = currentFile Or findArrayElement(dunFiles, arrLength, vFile1.name) <> -1 Or buf <> "cdr") Then
GoTo nextElement
End If
'Set doc1 = OpenDocument(vFile1.path) 'document opend
Dim res As Variant
res = writeFile(doc.FileName + " VS " + vFile1.name + " " + Str(Now), doc.FilePath + "compare.log")
If (compareDocs(doc, vFile1.path)) Then
dunFiles(arrLength - 1) = fName + "_" + vFile1.name
ReDim Preserve dunFiles((arrLength + 1))
arrLength = arrLength + 1
Dim name As String
name = vFile.ParentFolder.path + "\" + fName + "_" + vFile1.name
Name vFile1.path As name
res = writeFile(vFile.ParentFolder.path + "\" + fName + " the same as " + name, doc.FilePath + "rename.log")
End If
'doc1.Close
nextElement:
Next vFile1
doc.Close
End If
' doc.Close 'close document
Next vFile
lb_info.Caption = "Finished! Press exit"
End Sub
Private Function findArrayElement(inputArray() As String, inputLen As Integer, element As String)
Dim e As String
Dim i As Integer
findArrayElement = -1
For i = 0 To inputLen - 1
If (inputArray(i) = element) Then
findArrayElement = i
Exit Function
End If
Next i
End Function
Private Function compareDocs(doc As Document, path2 As String)
Dim doc1 As Document
Dim e1 As Shape, e2 As Shape, elements() As String
Dim sameShapesCount As Integer
sameShapesCount = 0
ReDim elements(1) As String
elements(0) = ""
Set doc1 = OpenDocument(path2) 'document opend
compareDocs = False
lb_info.Caption = "Comapre " + doc.FullFileName + " with " + path2
For Each e1 In doc.SelectableShapes
e1.UngroupAll
Next e1
For Each e2 In doc1.SelectableShapes
e2.UngroupAll
Next e2
If (doc.SelectableShapes.Count <> doc1.SelectableShapes.Count) Then
doc1.Close
Exit Function
End If
For Each e1 In doc.SelectableShapes
'If (findArrayElement(elements, (UBound(elements) + 1), Str(e1.StaticID)) = -1) Then
'ReDim Preserve elements(UBound(elements) + 1) As String
'elements(UBound(elements)) = e1.StaticID
For Each e2 In doc1.SelectableShapes
If (findArrayElement(elements, (UBound(elements) + 1), "2_" + Str(e2.StaticID)) = -1) Then
If (e1.CompareTo(e2, cdrCompareShapeType, cdrCompareEquals)) Then
If (e1.CompareTo(e2, cdrCompareFillType, cdrCompareEquals)) Then
If (e1.CompareTo(e2, cdrCompareOutline, cdrCompareEquals)) Then
If (e1.CompareTo(e2, cdrCompareOutlineColor, cdrCompareEquals)) Then
If (e1.CompareTo(e2, cdrCompareOutlineWidth, cdrCompareEquals)) Then
'If (e1.CompareTo(e2, cdrCompareShapeHeight, cdrCompareEquals)) Then
If (e1.CompareTo(e2, cdrCompareFil, cdrCompareEquals)) Then
'If (e1.CompareTo(e2, cdrCompareShapeWidth, cdrCompareEquals)) Then
ReDim Preserve elements(UBound(elements) + 1) As String
elements(UBound(elements)) = "2_" + Str(e2.StaticID)
sameShapesCount = sameShapesCount + 1
GoTo nextElement1
'End If
End If
'End If
End If
End If
End If
End If
End If
End If
Next e2
'End If
nextElement1:
Next e1
If (doc.SelectableShapes.Count = sameShapesCount) Then
compareDocs = True
End If
doc1.Close
End Function
Private Function writeFile(text As String, path As String)
Dim fso As Object
Set fso = CreateObject("Scripting.FileSystemObject")
Dim oFile As Object
If Not Dir(path, vbDirectory) = vbNullString Then
Set oFile = fso.OpenTextFile(path, 8)
Else
Set oFile = fso.CreateTextFile(path, 0)
End If
oFile.WriteLine text
oFile.Close
Set fso = Nothing
Set oFile = Nothing
End Function
The main problem is that the "open process" can last up to few minutes and to check 2k corel fiels I need a YEAR
In a first pass, open each file once.
Go over the data you care about -- object count or whatever -- that must be equal.
From this data, build a hash -- a pseudo-random value that combines information from each of them.
Build a table that maps from the hash to a set of draw files that match the hash.
Now you only have to compare files which have a the same hash value, not every pair of files. A well designed hash and data to feed it should reduce your collision rate to nearly zero.
This should speed up your program by a factor of 1000 to 3000 or so.
To ensure that the hash/collision works well, your first pass should just hash and print out the lists of collisions.