Search code examples
excelvba

Compile data based on fixed column values


I have multiple workbooks for each date of the month e.g. Workbook_20230505 with data as below:

Staff ID Contract Date Time Name Group
(Blank)
80001 20230501 9:00:35 AM SYSTEM A
80002 20230501 5:32:05 AM SYSTEM A
80003 20230503 4:15:35 AM ALEX A
80004 20230503 4:09:30 AM CLARE B
80005 20230504 2:54:18 AM JOHN C
80006 20230505 3:23:10 AM SANDY D
80007 20230502 4:32:09 AM MANDA C
80008 20230505 7:15:23 AM VOID F
80009 20230505 6:15:23 AM ALEX A
80010 20230501 3:46:15 AM KEN B
80011 20230501 7:08:23 AM SYSTEM B

eg. Workbook_20230506 with data as below:

Staff ID Contract Date Time Name Group
(Blank)
90001 20230503 3:00:35 AM SYSTEM A
90002 20230504 11:32:05 AM SYSTEM A
90003 20230508 1:15:30 AM ALEX C
90004 20230519 3:09:32 AM CLARE B
90005 20230523 1:33:18 AM SYSTEM C
90006 20230522 3:26:10 AM SANDY A
90007 20230521 4:32:09 AM MANDA A
90008 20230522 7:15:45 AM SYSTEM A
90009 20230504 6:18:12 AM ALEX A
90010 20230503 3:46:15 AM SYSTEM B
90011 20230529 7:04:22 AM SYSTEM B

This is the intended outcome in a new workbook for compilation:

Date Latest Time Staff ID
01/05/2023
02/05/2023
03/05/2023
04/05/2023
05/05/2023
: :
31/05/2023

I have the following code: (obtains the latest time and staff ID given Name is System or Alex and Group is A)

    Sub GetMax()
    Dim FolderPath As String
    Dim FileName As String
    Dim wb As Workbook
    Dim ws As Worksheet
    Dim MaxValue As Date
    Dim MaxID As String
    Dim DatePart As String
    Dim StaffID As String
    Dim LastRow As Long
    Dim arr, dic, rngRes As Range

    Set dic = CreateObject("scripting.dictionary")
    Application.ScreenUpdating = False

    ' Update your folder name
    FolderPath = "D:\Temp\"
    ' Retrieve files with "Workbook_*.xlsx"
    FileName = Dir(FolderPath & "Workbook_*.xlsx")
    Sheets("Sheet1").Select

    Do While FileName <> ""
        DatePart = Split(FileName, "_")(1)
        Set wb = Workbooks.Open(FolderPath & FileName)
        Set ws = wb.Sheets(1)
        ' Get max value of column C and value from column A
        arr = ws.UsedRange.Value
        MaxValue = 0
    MaxID = 0
        If UBound(arr, 2) >= 3 Then
            For i = 3 To UBound(arr) 'start from row 3
                    If InStr("SYSTEM|ALEX", UCase(arr(i, 4))) = 0 And _
                InStr("A", UCase(arr(i, 5))) = 0 And _
                CStr(arr(i,2)) = DatePart Then
                arr(i,3) = cDate(arr(i,3))
                            If MaxValue < arr(i, 3) Then MaxValue = arr(i, 3): MaxID = arr(i,1)
            End If
            Next
        End If

    'store MaxValue in Dictionary
    dic(DatePart) = MaxValue
    dic(StaffID) = MaxID
    wb.Close SaveChanges:=False
        FileName = Dir
    Loop

    With ActiveSheet
    LastRow = .Cells(.Rows.Count, "B").End(xlUp).Row
    Set rngRes = .Range("B17:D" & LastRow)
    rngRes.Columns(2).NumberFormat = "h:mm"
    arr = rngRes.Value
    For i = 2 to UBound(arr)
        If VBA.IsDate(arr(i,1)) Then
            DatePart = Format(arr(i,1), "yyyymmdd")
            If dic.exists(DatePart) Then
                arr(i,2) = dic(DatePart): arr(i,3) = dic(StaffID)
            Else
                arr(i,2) = ""
            End If
        End If
    Next
    rngRes.Value = arr
    End With
    Set dic = Nothing
    Application.ScreenUpdating = True
End Sub

Which gives the following result:

Date Latest Time Staff ID
01/05/2023
02/05/2023
03/05/2023
04/05/2023
05/05/2023 9:00:35 90002
06/05/2023 11:32:05 90002
: :
31/05/2023

This is the intended outcome: enter image description here

The code is unable to input the Staff ID correctly as it displays the same Staff ID from the latest iteration for all dates.

Appreciate the help to correct the code for any mistakes!

FYI, this is a follow up question from Filter on multiple columns and compile value to main workbook Sorry for asking so many times.


Solution

    • DatePart = Split(FileName, "_")(1) doesn't extract date from file name. eg. if filename is Workbook_20230505.xlsx then DatePart is 20230505.xlsx.

    • InStr("SYSTEM|ALEX", UCase(arr(i, 4))) = 0 means UCase(arr(i, 4)) is NOT SYSTEM or ALEX.

    • The 2nd condition equals to "A" = UCase(arr(i, 5)).

    • The 3rd condition should be removed.

    If InStr("SYSTEM|ALEX", UCase(arr(i, 4))) = 0 And _
                    InStr("A", UCase(arr(i, 5))) = 0 And _
                    CStr(arr(i,2)) = DatePart Then
    
    • store MaxValue in Dictionary section is incorrect. You need to use an array to store multiple values in the Dict object.

    Microsoft documentation:

    InStr function

    Option Explicit
    Sub GetMax()
        Dim FolderPath As String
        Dim FileName As String
        Dim wb As Workbook
        Dim ws As Worksheet
        Dim MaxValue As Date
        Dim MaxID As String
        Dim DatePart As String
        Dim StaffID As String
        Dim LastRow As Long
        Dim arr, dic, rngRes As Range
        Dim i As Long
        Set dic = CreateObject("scripting.dictionary")
        Application.ScreenUpdating = False
        ' Update your folder name
        FolderPath = "D:\Temp\"
        ' Retrieve files with "Workbook_*.xlsx"
        FileName = Dir(FolderPath & "Workbook_*.xlsx")
        Do While FileName <> ""
            DatePart = Split(Replace(FileName, "Workbook_", ""), ".")(0)
            Set wb = Workbooks.Open(FolderPath & FileName)
            Set ws = wb.Sheets(1)
            ' Get max value of column C and value from column A
            arr = ws.UsedRange.Value
            MaxValue = 0
            MaxID = 0
            If UBound(arr, 2) >= 3 Then
                For i = 3 To UBound(arr) 'start from row 3
                    If InStr("SYSTEM|ALEX", UCase(arr(i, 4))) > 0 And "A" = UCase(arr(i, 5)) Then
                        arr(i, 3) = CDate(arr(i, 3))
                        If MaxValue < arr(i, 3) Then MaxValue = arr(i, 3): MaxID = arr(i, 1)
                    End If
                Next
            End If
            'store MaxValue in Dictionary
            dic(DatePart) = Array(MaxValue, MaxID)
            wb.Close SaveChanges:=False
            FileName = Dir
        Loop
        With ActiveSheet
            LastRow = .Cells(.Rows.Count, "B").End(xlUp).Row
            Set rngRes = .Range("B17:D" & LastRow)
             ' modify time format as needed
            rngRes.Columns(2).NumberFormat = "h:mm:ss"
            arr = rngRes.Value
            For i = 2 To UBound(arr)
                If VBA.IsDate(arr(i, 1)) Then
                    DatePart = Format(arr(i, 1), "yyyymmdd")
                    If dic.exists(DatePart) Then
                        arr(i, 2) = dic(DatePart)(0)
                        arr(i, 3) = dic(DatePart)(1)
                    Else
                        arr(i, 2) = ""
                        arr(i, 3) = ""
                    End If
                End If
            Next
            rngRes.Value = arr
        End With
        Set dic = Nothing
        Application.ScreenUpdating = True
    End Sub