I'm working on a project where I need to extract the intended recipients from product titles based on keywords provided in a reference sheet. The results are then written into the "Recipient From Title" column of the "PRODUCTS" sheet.
Here's what I am working with:
Scenario: Sheet "PRODUCTS": Contains product details, including a column for "Product Title" that needs to be matched to a recipient category. Sheet "REFERENCE": Contains a list of product titles in column A and the intended recipient(s) in column B. The goal is to fill in the "Recipient From Title" column in the "PRODUCTS" sheet by matching phrases in the product title against a reference list and then assigning the correct recipients. If a title matches multiple keywords, they should be concatenated and separated by a semicolon ;. sample of both sheets given below
ProductSheet (Data to be filled)
Reference sheet (Used for Lookup)
Below is my attempt at the code that aims to iterate through each product in the "PRODUCTS" sheet, find keywords from the "REFERENCE" sheet, and fill in the appropriate recipients:
Sub FillRecipientFromTitle()
Dim wsProducts As Worksheet, wsReference As Worksheet
Dim LastRowProducts As Long, LastRowReference As Long
Dim i As Long, j As Long
Dim title As String, recipient As String
' Set worksheets
Set wsProducts = ThisWorkbook.Sheets("PRODUCTS")
Set wsReference = ThisWorkbook.Sheets("REFERENCE")
' Get last rows for both sheets
LastRowProducts = wsProducts.Cells(wsProducts.Rows.Count, 1).End(xlUp).Row
LastRowReference = wsReference.Cells(wsReference.Rows.Count, 1).End(xlUp).Row
Application.ScreenUpdating = False
' Loop through each product row
For i = 2 To LastRowProducts
title = LCase(wsProducts.Cells(i, 1).Value) ' Product Title in Column A of PRODUCTS
recipient = ""
' Loop through reference list to find a match
For j = 2 To LastRowReference
If InStr(1, title, LCase(wsReference.Cells(j, 1).Value)) > 0 Then
If recipient = "" Then
recipient = wsReference.Cells(j, 2).Value
ElseIf InStr(1, recipient, wsReference.Cells(j, 2).Value) = 0 Then
recipient = recipient & ";" & wsReference.Cells(j, 2).Value
End If
End If
Next j
' Write Recipient to PRODUCTS Sheet Column B
wsProducts.Cells(i, 2).Value = recipient
Next i
Application.ScreenUpdating = True
MsgBox "Data has been processed successfully!"
End Subp
Issues I'm Facing:
I am happy to provide the excel sheet for whoever wants to take a look
Ideally your reference list only has one item per row in the first column, and you sort it in order of descending length. See below for how this restructuring might be done dynamically using your existing layout.
After you make a match, remove that match from title
(using Replace
for example) so you don't later make another partial match - eg matching and removing "god daughter" prevents a later match on "daughter".
You can use a regexp
object instead of Instr
to make sure your test respects word boundaries eg "son" doesn't match on "personalized".
Try this out. In my tests this processed 12k products in just over 3 sec.
Sub FillRecipientFromTitle()
Dim wsProducts As Worksheet, wsReference As Worksheet
Dim LastRowProducts As Long, LastRowReference As Long, term As String
Dim i As Long, j As Long, r As Long, arr, el, col As Collection, t
Dim title As String, data, refs, newRefs, newRefRows As Long
Set wsProducts = ThisWorkbook.Sheets("PRODUCTS")
Set wsReference = ThisWorkbook.Sheets("REFERENCE")
t = Timer
'get an array of all of the reference data
refs = wsReference.Range("A2:B" & LastRow(wsReference, "A")).Value
'Create a new array with one row per term to match on,
' and sort it descending by length
ReDim newRefs(1 To 5 * UBound(refs, 1), 1 To 2) 'should be large enough...
i = 0
For r = 1 To UBound(refs, 1)
arr = Split(refs(r, 1), ",") 'split on comma
For Each el In arr 'add each item to the new array
i = i + 1
newRefs(i, 1) = LCase(Trim(el))
newRefs(i, 2) = refs(r, 2)
Next el
Next r
newRefRows = i
Debug.Print Timer - t, "New reference array has " & newRefRows & " entries"
SortByLength newRefs, 1 'sort descending by length of first column value
Application.ScreenUpdating = False
'get all product data as array
data = wsProducts.Range("A2:A" & LastRow(wsProducts, "A")).Value
For i = 1 To UBound(data, 1)
If i Mod 1000 = 0 Then Debug.Print Timer - t, "Checking", i
title = LCase(data(i, 1))
recipient = ""
Set col = New Collection
' Loop through sorted reference array to look for match
For j = 1 To newRefRows
term = newRefs(j, 1)
If WordMatch(title, term) Then
On Error Resume Next 'ignore any duplicate key error
col.Add newRefs(j, 2), newRefs(j, 2)
On Error GoTo 0
'remove the matched term so it won't match later
' eg match on "God Daughter" won't also match "Daughter"
title = Replace(title, term, "", 1, compare:=vbTextCompare)
End If
Next j
data(i, 1) = ColToList(col) 'recipient(s)
Next i
'populate recipients to col B
wsProducts.Range("B2").Resize(UBound(data, 1)).Value = data
Application.ScreenUpdating = True
Debug.Print Timer - t, "Done"
MsgBox "Data has been processed successfully!"
End Sub
'See if `term` is in the string `txt` accounting for word boundaries
' So (eg) "son" does not return True if tested against "personalized"
'https://learn.microsoft.com/en-us/previous-versions/windows/internet-explorer/ie-developer/scripting-articles/ms974570(v=msdn.10)?redirectedfrom=MSDN
Function WordMatch(txt As String, term As String) As Boolean
Static re As Object 'faster if you don't create a new object for each call...
If re Is Nothing Then
Set re = CreateObject("vbscript.regexp")
re.ignorecase = True
End If
re.Pattern = "\b" & term & "\b" 'check word boundaries
WordMatch = re.Test(txt)
End Function
Function LastRow(ws As Worksheet, colRef As String) As Long
LastRow = ws.Cells(ws.Rows.Count, colRef).End(xlUp).Row
End Function
'concatenate collection items to a list
Function ColToList(col As Collection) As String
Dim i As Long, s As String, sep As String
For i = 1 To col.Count
ColToList = ColToList & sep & col(i)
sep = ";"
Next i
End Function
'Sort an array (in place) by item length (descending) using bubble sort algorithm
Sub SortByLength(data, colIndex As Long)
Dim FirstC As Integer, LastC As Long
Dim i As Long, j As Long, c As Long
Dim tmp
FirstC = LBound(data, 2)
LastC = UBound(data, 2)
For i = LBound(data, 1) To UBound(data, 1) - 1
For j = i + 1 To UBound(data, 1)
'compare data length in specified column
If Len(data(i, colIndex)) < Len(data(j, colIndex)) Then
'swap rows
For c = FirstC To LastC
tmp = data(j, c)
data(j, c) = data(i, c)
data(i, c) = tmp
Next c
End If
Next j
Next i
End Sub