I have an Excel workbook with more then 100.000 product names in column A.
I want to count every time three words repeat themselves in more than one product name.
I will be able to say for example: the words "blue hyundai car" appears in more than 1000 products.
I have code, from user taller https://stackoverflow.com/users/22192445/taller, for pairs of words.
I have a column with more than 100.000 product names, I want to Identify all the pair of words that repeat them selves through out the column
I tried to adapt this to count every time three words repeat themselves in different product names. The order of the words don't matter.
Sub everythingg()
Dim ws As Worksheet
Dim lastRowA As Long
Dim productNamesArray() As Variant
Dim i As Long
Dim oDic1 As Object, oDic2 As Object, oDic3 As Object
Dim sKey1 As String, sKey2 As String
Dim Word As Variant, ProductName As Variant, PN As Variant
Dim productWordsArray As Variant, PWordArray As Variant
' Set up dictionaries
Set oDic1 = CreateObject("scripting.dictionary")
Set oDic2 = CreateObject("scripting.dictionary")
Set oDic3 = CreateObject("scripting.dictionary")
' Set the worksheet and find the last row with data in column A
Set ws = ThisWorkbook.Sheets("Productos")
lastRowA = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------
'---------------Create Array of Product Names -----------------------------------------
' Resize the array to match the number of rows
ReDim productNamesArray(1 To lastRowA - 1)
' Fill the array with product names from column A, starting from the second row
For i = 2 To lastRowA
productNamesArray(i - 1) = ws.Cells(i, 1).Value
Debug.Print "productNamesArray(" & i - 1 & ") : "; productNamesArray(i - 1)
Next i
Debug.Print
'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------
'------------Create Array with Product Words ------------------------
For i = 1 To UBound(productNamesArray)
' Use the Split function to divide the string into words
productWordsArray = Split(productNamesArray(i), " ")
' Print the words in the Immediate Window
For j = LBound(productWordsArray) To UBound(productWordsArray)
Debug.Print "ProductWordsrray(" & j & "): " & productWordsArray(j)
Next j
Debug.Print
'---- Add Words to oDic1 and add Product Names from ProductNamesArray as Value ----
Debug.Print "Get every Word in productWordsArray and Add or Create in oDIC1"
For Each Word In productWordsArray
If oDic1.Exists(Word) Then
oDic1(Word) = oDic1(Word) & "," & productNamesArray(i)
Debug.Print "Updated: " & Word & " - " & oDic1(Word)
Else
oDic1(Word) = productNamesArray(i)
Debug.Print "Added: " & Word & " - " & oDic1(Word)
End If
Next
Debug.Print
Next i
Debug.Print
'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------
Debug.Print "Contents of oDic1 after the loop:"
For Each Key In oDic1.Keys
Debug.Print Key & ": " & oDic1(Key)
Next Key
Debug.Print
'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------
'<-- For each Key in oDic1, give me an array with the split values of that key-->
Debug.Print "Split values of each key on oDIC1 "
Debug.Print
For Each Key In oDic1.Keys
' Get product name
ProductName = Split(oDic1(Key), ",")
Debug.Print Key
For i = LBound(ProductName) To UBound(ProductName)
Debug.Print "ProductName(" & i & "): " & ProductName(i)
Next i
Debug.Print
'<----Create an array with words of ProductName"---->'
oDic2.RemoveAll 'para contar palabaras por Key Word.
Debug.Print "Create an array with words of ProductName"
' Create array Pword, with the words of the ProductName
For Each PN In ProductName
PWordArray = Split(PN)
' Debug Print each element in aWord
For i = LBound(PWordArray) To UBound(PWordArray)
Debug.Print "PWordArray(" & i & "): " & PWordArray(i)
Next i
Debug.Print
'<----- Add or Create into oDIC2 and count ----->
Debug.Print "Add or Create into oDIC2 and count"
For Each vWord In PWordArray
If oDic2.Exists(vWord) Then
oDic2(vWord) = oDic2(vWord) + 1
Debug.Print "Updated: " & vWord & " - Count: " & oDic2(vWord)
Else
oDic2(vWord) = 1
Debug.Print "Added: " & vWord & " - Count: 1"
End If
Next
Next
Debug.Print
'------------------------------------------------
Debug.Print "Content of oDic2 " & Key & " :"
For Each vWord In oDic2.Keys
Debug.Print vWord & ": " & oDic2(vWord)
Next
Debug.Print
'------------------------------------------------------------------------------------------
'------------------------------------------------------------------------------------------
'------------------------------------------------------------------------------------------
Debug.Print "Count Word Pair"
For Each vWord In oDic2.Keys
If vWord <> Key Then
sKey1 = Key & " " & vWord
sKey2 = vWord & " " & Key
Debug.Print "Current vWord: " & vWord
Debug.Print "sKey1: " & sKey1
Debug.Print "sKey2: " & sKey2
Debug.Print
Debug.Print "Add or Creat to oDic3"
If oDic3.Exists(sKey1) Then
If oDic2(vWord) > oDic3(sKey1) Then
Debug.Print "IF oDic2(vWord): " & oDic2(vWord) & "> skey1: " & oDic3(sKey1)
Debug.Print "Updating sKey1: " & sKey1 & " - Count: " & oDic2(vWord)
oDic3(sKey1) = oDic2(vWord)
Else
Debug.Print "IF oDic2(vWord): " & oDic2(vWord) & "> skey1: " & oDic3(sKey1)
Debug.Print "Not updating sKey1: " & sKey1
End If
ElseIf oDic3.Exists(sKey2) Then
If oDic2(vWord) > oDic3(sKey2) Then
Debug.Print "IF oDic2(vWord): " & oDic2(vWord) & "> skey2: " & oDic3(sKey2)
Debug.Print "Updating sKey2: " & sKey2 & " - Count: " & oDic2(vWord)
oDic3(sKey2) = oDic2(vWord)
Else
Debug.Print "Not updating sKey2: " & sKey2
End If
Else
Debug.Print "Adding new key: " & sKey1 & " - Count: " & oDic2(vWord)
oDic3(sKey1) = oDic2(vWord)
End If
End If
Next
Next
End Sub
For
loop.Option Explicit
Sub Count3Words()
Dim oDic1 As Object, oDic4 As Object, oDic5 As Object
Dim aProd, vProd, aWord, vWord, vKey, arrData
Dim i As Long, sKey As String
Set oDic1 = CreateObject("scripting.dictionary") ' product list by ONE word
Set oDic4 = CreateObject("scripting.dictionary") ' product list by TWO words
Set oDic5 = CreateObject("scripting.dictionary") ' product list by THREE words
arrData = Range("A1").CurrentRegion.Value
For i = LBound(arrData) + 1 To UBound(arrData)
aWord = Split(arrData(i, 1))
If UBound(aWord) > 1 Then
For Each vWord In aWord
If oDic1.exists(vWord) Then
oDic1(vWord) = oDic1(vWord) & "," & arrData(i, 1)
Else
oDic1(vWord) = arrData(i, 1)
End If
Next
End If
Next i
For Each vKey In oDic1.keys
aProd = Split(oDic1(vKey), ",")
For Each vProd In aProd
aWord = Split(vProd)
For Each vWord In aWord
If vWord <> vKey Then
sKey = SortWord(vKey & " " & vWord)
If oDic4.exists(sKey) Then
If InStr(1, oDic4(sKey), vProd, vbTextCompare) = 0 Then
oDic4(sKey) = oDic4(sKey) & "," & vProd
End If
Else
oDic4(sKey) = vProd
End If
End If
Next
Next
Next
For Each vKey In oDic4.keys
aProd = Split(oDic4(vKey), ",")
For Each vProd In aProd
aWord = Split(vProd)
For Each vWord In aWord
If InStr(1, vKey, vWord, vbTextCompare) = 0 Then
sKey = SortWord(vKey & " " & vWord)
If oDic5.exists(sKey) Then
If InStr(1, oDic5(sKey), vProd, vbTextCompare) = 0 Then
oDic5(sKey) = oDic5(sKey) & "," & vProd
End If
Else
oDic5(sKey) = vProd
End If
End If
Next
Next
Next
For Each vKey In oDic5.keys
oDic5(vKey) = UBound(Split(oDic5(vKey), ",")) + 1
Next
Range("D:E").Clear
Range("D1:E1").Value = Array("Word Pair", "Times")
Range("D2").Resize(oDic5.Count, 1) = Application.Transpose(oDic5.keys)
Range("E2").Resize(oDic5.Count, 1) = Application.Transpose(oDic5.items)
End Sub
Function SortWord(ByVal sText As String) As String
Dim i As Long, j As Long, aWord, sTmp As String
aWord = Split(sText)
If UBound(aWord) = 0 Then
SortWord = sText
Else
For i = LBound(aWord) To UBound(aWord) - 1
For j = i + 1 To UBound(aWord)
If aWord(i) > aWord(j) Then
sTmp = aWord(i): aWord(i) = aWord(j): aWord(j) = sTmp
End If
Next
Next
SortWord = Join(aWord)
End If
End Function