Search code examples
excelvbadirectory

Speed up processing time to loop through files in a folder


I have working code that looks through values of cells C3:C, then tries to find the matching string within the files, in newest to oldest, with a .txt extension.

I would like to speed up processing time to loop through 20,000 files in the folder.

It looks like the code runs each loop with default sort by ascending Name.
I want it to do this in order of priorities:

  1. Find any files starting with datemodified being today with the .txt extension.
  2. Check to see if the title of the file name has partial match to the contents of each cell.
  3. Scan through file.txt for any and all matches to each C3:C cell values.
  4. Process string manipulation with a lot of if-then statements per user requirements.
  5. Move on to next file from today, repeat.
  6. Move onto yesterday, repeat.
  7. Go back 20 days max until either there are no more .txt files, or all cells have been found.

All the .txt files combined are about 10% of the 20,000 files.

' ======================= Sorting Algorithm ======================================
fileDate = 0
For fileDate = 0 To 20 'days back to search
    file = Dir(folderPath & "*.txt")
    For Each file In fs.getfolder(folderPath).files
    SRName = Replace(Left(Mid(file, 35), 6), "_", "") 'works if all names are 1234XY, trims underscores
        If file.datelastmodified >= Now - fileDate And LCase(Right(file.Name, 4)) = ".prl" Then  ' FILE MATCH
        ' =========== Part List Prep Array ============
            For Each cell In Range("C3:C" & LastRow) ' CELLS NAME MATCH TO CHECK IN FILE
            ToTestIndex = cell.Row - 2
            Homework = "PNL1," & cell
            If PartToTestArray(ToTestIndex) <> True And InStr(1, Homework, SRName, vbTextCompare) > 0 Then
                If InStr(1, Homework, SRName, vbTextCompare) > 0 Then
                    Cells(cell.Row, 6).Value = "Searching in " & "'" & Mid(file, 35, Len(file) - 38) & "'"
                    PartToTestArray(ToTestIndex) = True
                    Cells(cell.Row, 7).Value = file.datelastmodified
                End If
                End If
            Next cell
           ' =========== Part List Prep Array ============
            fileNumber = FreeFile ' Read the content of the file
            Open file For Input As fileNumber
            fileContent = Input$(LOF(fileNumber), fileNumber)
            Close fileNumber
            lines = Split(fileContent, vbCrLf) ' Split the content into an array of lines
           
            For i = 4 To UBound(lines) - 1 Step 3 ' new
                    For Each cell In Range("C3:C" & LastRow) ' Cells matching file testing
                    FoundIndex = cell.Row - 2
                    ToTestIndex = cell.Row - 2
                    If FoundPartArray(FoundIndex) <> True And PartToTestArray(ToTestIndex) = True Then
                    Homework = "PNL1," & cell
                    If lines(i) = "PNL4,0=" Then 'voodoo adjustment
                    i = i + 1
                    End If
                    If InStr(1, lines(i), Homework, vbTextCompare) > 0 Then ' Check if the line contains the Part Number
                        For QtySearch = 1 To 20 ' very small factor on the amount of total time for process
                            PartMultiplier = Cells(cell.Row, cell.Column + 2).Value
                            If InStr(1, lines(i), "0,0,0,0,0000,0", vbTextCompare) > 0 Then
                                PartMultiplier = PartMultiplier & ",0,0,0,0000,0"
                                ShotInTheDark = "0,0,0,0,0000,0"
                            Else
                                ShotInTheDark = "," & QtySearch & ",0,"
                                PartMultiplier = "," & PartMultiplier & ",0,"
                            End If
                            If InStr(1, lines(i), ShotInTheDark, vbTextCompare) > 0 Then
                                lines(i) = Replace(lines(i), ShotInTheDark, PartMultiplier)
                                Exit For ' Exiting QtySearch
                            End If
                        Next QtySearch
                        AddPart = lines(i) & vbCrLf & lines(i + 1) & vbCrLf & lines(i + 2) & vbCrLf ' Concatenate the current, previous, and next lines
                        FoundPartArray(FoundIndex) = True ' PART IS FOUND
                                Cells(cell.Row, 6).Value = Mid(file, 35, Len(file) - 38)
                                Cells(cell.Row, 7).Value = file.datelastmodified
                                cell.Interior.Color = RGB(0, 255, 0) ' green
                                outputContent = outputContent & AddPart
                        Exit For ' cell exit
                    End If
                    End If
                Next cell ' cell row increment
            Next i ' row in prl file increment
            file = Dir ' Get the next file in the folder
        End If
    Next file ' next file in folder increment
    AllPartsFound = True
  For FoundIndex = LBound(FoundPartArray) To UBound(FoundPartArray)
        If Not FoundPartArray(FoundIndex) Then
        AllPartsFound = False
Exit For
        End If
        Next FoundIndex
        If AllPartsFound = True Then
        fileDate = 20
        End If
Next fileDate
' ======================= Sorting Algorithm ======================================

I tried:
A. combining/splitting If/then statements like "datemodified and .txt extension" to see which way works best. B. looking through a file for all cells vs taking one cell and looking through each file before moving onto next cell.

The fastest I've gotten it to work is .007 seconds per file. That is a 2+minute process.

Are there faster ways, like adding all 20,000 files to an array first?

I considered adding a subfolder that can be scripted with Windows Task Scheduler to update 2-3 times a day that will have .txt files copied only, narrowing the scope of the search by 90%.


Solution

  • This might get you started on a different approach.
    For testing, I created 10k txt files in a folder, and assigned them random last-modified dates within the last 50 days.

    The code below loops over them all one time, and collects the qualifying files according to age in days, in an array of collections.

    The whole process of collection took about 4 seconds (and that was probably collecting many more files than you would be processing, since all of my 10k files had the same extension and the age was maybe limited compared to your actual use case).

    Sub Tester()
        Const FPATH As String = "C:\Temp\VBA\Folder1\"
        
        Dim f, t, dt, ns As Object, pth As String, oFile As Object
        Dim days(1 To 20) As Collection, d As Long, lmdays
        
    'Code below was used to create some dummy files and set random last-modified date
    '    Set ns = CreateObject("Shell.Application").Namespace(FPATH)
    '    For i = 1 To 10000
    '        PutContent FPATH & "File_" & Format(i, "000000") & ".txt", "testing " & i
    '        Set oFile = ns.ParseName("File_" & Format(i, "000000") & ".txt")
    '        oFile.ModifyDate = Date - (Rnd() * 50) 'last modified within 50 days
    '    Next i
        
        For d = 1 To 20
            Set days(d) = New Collection 'initialize array of collections
        Next d
        
        t = Timer
        f = Dir(FPATH & "*.txt")
        Do While Len(f) > 0
            dt = FileDateTime(FPATH & f)
            lmdays = Application.Ceiling(Now - dt, 1) 'last mod age in whole days
            If lmdays <= 20 Then                      'within our window?
                days(lmdays).Add FPATH & f    'collect this file
            End If
            f = Dir()
        Loop
        
        'check how many files for each day
        For d = 1 To 20
            Debug.Print d, days(d).count
            '## Process the files from this Collection... ##
        Next d
        
        Debug.Print Timer - t
    
    End Sub
    
    'used when creating test files
    Sub PutContent(f As String, content As String)
        CreateObject("scripting.filesystemobject"). _
                      OpenTextFile(f, 2, True).Write content
    End Sub