Search code examples
excelvba

Count three repeated words in product names with VBA


I have an Excel workbook with more then 100.000 product names in column A.

I want to count every time three words repeat themselves in more than one product name.

I will be able to say for example: the words "blue hyundai car" appears in more than 1000 products.

I have code, from user taller https://stackoverflow.com/users/22192445/taller, for pairs of words.
I have a column with more than 100.000 product names, I want to Identify all the pair of words that repeat them selves through out the column

I tried to adapt this to count every time three words repeat themselves in different product names. The order of the words don't matter.

Sub everythingg()
    Dim ws As Worksheet
    Dim lastRowA As Long
    Dim productNamesArray() As Variant
    Dim i As Long
    Dim oDic1 As Object, oDic2 As Object, oDic3 As Object
    Dim sKey1 As String, sKey2 As String
    Dim Word As Variant, ProductName As Variant, PN As Variant
    Dim productWordsArray As Variant, PWordArray As Variant

    ' Set up dictionaries
    Set oDic1 = CreateObject("scripting.dictionary")
    Set oDic2 = CreateObject("scripting.dictionary")
    Set oDic3 = CreateObject("scripting.dictionary")

    ' Set the worksheet and find the last row with data in column A
    Set ws = ThisWorkbook.Sheets("Productos")
    lastRowA = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
    
'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------

    '---------------Create Array of Product Names -----------------------------------------
    ' Resize the array to match the number of rows
    ReDim productNamesArray(1 To lastRowA - 1)
    
    ' Fill the array with product names from column A, starting from the second row
    For i = 2 To lastRowA
        productNamesArray(i - 1) = ws.Cells(i, 1).Value
        Debug.Print "productNamesArray(" & i - 1 & ") : "; productNamesArray(i - 1)
    Next i
    
    Debug.Print
 
    
'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------

    '------------Create Array with Product Words ------------------------
    For i = 1 To UBound(productNamesArray)
        ' Use the Split function to divide the string into words
        productWordsArray = Split(productNamesArray(i), " ")
        
         ' Print the words in the Immediate Window
        For j = LBound(productWordsArray) To UBound(productWordsArray)
            Debug.Print "ProductWordsrray(" & j & "): " & productWordsArray(j)
        Next j
        Debug.Print

        '---- Add Words to oDic1 and add Product Names from ProductNamesArray as Value ----
        Debug.Print "Get every Word in productWordsArray and Add or Create in oDIC1"
        For Each Word In productWordsArray
            If oDic1.Exists(Word) Then
                oDic1(Word) = oDic1(Word) & "," & productNamesArray(i)
                Debug.Print "Updated: " & Word & " - " & oDic1(Word)
            Else
                oDic1(Word) = productNamesArray(i)
                Debug.Print "Added: " & Word & " - " & oDic1(Word)
            End If
        Next
        Debug.Print
    Next i
    Debug.Print
    
'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------

    Debug.Print "Contents of oDic1 after the loop:"
    For Each Key In oDic1.Keys
        Debug.Print Key & ": " & oDic1(Key)
    Next Key
    Debug.Print
       

'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------
'----------------------------------------------------------------------------------------
    '<-- For each Key in oDic1, give me an array with the split values of that key-->
    Debug.Print "Split values of each key on oDIC1 "
    Debug.Print
    
    For Each Key In oDic1.Keys
        ' Get product name
        ProductName = Split(oDic1(Key), ",")
        Debug.Print Key

        For i = LBound(ProductName) To UBound(ProductName)
            Debug.Print "ProductName(" & i & "): " & ProductName(i)
        Next i
        Debug.Print

        '<----Create an array with words of ProductName"---->'
        oDic2.RemoveAll  'para contar palabaras por Key Word.
        Debug.Print "Create an array with words of ProductName"

        ' Create array Pword, with the words of the ProductName
        For Each PN In ProductName
            PWordArray = Split(PN)
            
            ' Debug Print each element in aWord
            For i = LBound(PWordArray) To UBound(PWordArray)
                Debug.Print "PWordArray(" & i & "): " & PWordArray(i)
            Next i
            Debug.Print

            '<----- Add or Create into oDIC2 and count ----->
            Debug.Print "Add or Create into oDIC2 and count"
            For Each vWord In PWordArray
                If oDic2.Exists(vWord) Then
                    oDic2(vWord) = oDic2(vWord) + 1
                    Debug.Print "Updated: " & vWord & " - Count: " & oDic2(vWord)
                Else
                    oDic2(vWord) = 1
                    Debug.Print "Added: " & vWord & " - Count: 1"
                End If
            Next
        Next
        Debug.Print
        '------------------------------------------------
        Debug.Print "Content of oDic2 " & Key & " :"
        For Each vWord In oDic2.Keys
            Debug.Print vWord & ": " & oDic2(vWord)
        Next
        Debug.Print
    
    
    
'------------------------------------------------------------------------------------------
'------------------------------------------------------------------------------------------
'------------------------------------------------------------------------------------------
 Debug.Print "Count Word Pair"
For Each vWord In oDic2.Keys
    If vWord <> Key Then
        sKey1 = Key & " " & vWord
        sKey2 = vWord & " " & Key
        Debug.Print "Current vWord: " & vWord
        Debug.Print "sKey1: " & sKey1
        Debug.Print "sKey2: " & sKey2
        Debug.Print

        Debug.Print "Add or Creat to oDic3"
        If oDic3.Exists(sKey1) Then
              
            If oDic2(vWord) > oDic3(sKey1) Then
            Debug.Print "IF oDic2(vWord): " & oDic2(vWord) & "> skey1: " & oDic3(sKey1)
                
                Debug.Print "Updating sKey1: " & sKey1 & " - Count: " & oDic2(vWord)
                oDic3(sKey1) = oDic2(vWord)
            Else
                Debug.Print "IF oDic2(vWord): " & oDic2(vWord) & "> skey1: " & oDic3(sKey1)
                Debug.Print "Not updating sKey1: " & sKey1
                
            End If
            
            
        ElseIf oDic3.Exists(sKey2) Then
        
            If oDic2(vWord) > oDic3(sKey2) Then
            
                Debug.Print "IF oDic2(vWord): " & oDic2(vWord) & "> skey2: " & oDic3(sKey2)
                Debug.Print "Updating sKey2: " & sKey2 & " - Count: " & oDic2(vWord)
                oDic3(sKey2) = oDic2(vWord)
            Else
                Debug.Print "Not updating sKey2: " & sKey2
            End If
            
        Else
            Debug.Print "Adding new key: " & sKey1 & " - Count: " & oDic2(vWord)
            oDic3(sKey1) = oDic2(vWord)
        End If
        
    End If
Next
Next
    
End Sub

Solution

    • The logic is same as the code for your previous post
    • Adding an UDF to sort words
    • You can revise the code to count 4 words combination by adding an Dictionary and nested For loop.
    Option Explicit
    
    Sub Count3Words()
        Dim oDic1 As Object, oDic4 As Object, oDic5 As Object
        Dim aProd, vProd, aWord, vWord, vKey, arrData
        Dim i As Long, sKey As String
        Set oDic1 = CreateObject("scripting.dictionary") ' product list by ONE word
        Set oDic4 = CreateObject("scripting.dictionary") ' product list by TWO words
        Set oDic5 = CreateObject("scripting.dictionary") ' product list by THREE words
        arrData = Range("A1").CurrentRegion.Value
        For i = LBound(arrData) + 1 To UBound(arrData)
            aWord = Split(arrData(i, 1))
            If UBound(aWord) > 1 Then
                For Each vWord In aWord
                    If oDic1.exists(vWord) Then
                        oDic1(vWord) = oDic1(vWord) & "," & arrData(i, 1)
                    Else
                        oDic1(vWord) = arrData(i, 1)
                    End If
                Next
            End If
        Next i
        For Each vKey In oDic1.keys
            aProd = Split(oDic1(vKey), ",")
            For Each vProd In aProd
                aWord = Split(vProd)
                For Each vWord In aWord
                    If vWord <> vKey Then
                        sKey = SortWord(vKey & " " & vWord)
                        If oDic4.exists(sKey) Then
                            If InStr(1, oDic4(sKey), vProd, vbTextCompare) = 0 Then
                                oDic4(sKey) = oDic4(sKey) & "," & vProd
                            End If
                        Else
                            oDic4(sKey) = vProd
                        End If
                    End If
                Next
            Next
        Next
        For Each vKey In oDic4.keys
            aProd = Split(oDic4(vKey), ",")
            For Each vProd In aProd
                aWord = Split(vProd)
                For Each vWord In aWord
                    If InStr(1, vKey, vWord, vbTextCompare) = 0 Then
                        sKey = SortWord(vKey & " " & vWord)
                        If oDic5.exists(sKey) Then
                            If InStr(1, oDic5(sKey), vProd, vbTextCompare) = 0 Then
                                oDic5(sKey) = oDic5(sKey) & "," & vProd
                            End If
                        Else
                            oDic5(sKey) = vProd
                        End If
                    End If
                Next
            Next
        Next
        For Each vKey In oDic5.keys
            oDic5(vKey) = UBound(Split(oDic5(vKey), ",")) + 1
        Next
        Range("D:E").Clear
        Range("D1:E1").Value = Array("Word Pair", "Times")
        Range("D2").Resize(oDic5.Count, 1) = Application.Transpose(oDic5.keys)
        Range("E2").Resize(oDic5.Count, 1) = Application.Transpose(oDic5.items)
    End Sub
    
    Function SortWord(ByVal sText As String) As String
        Dim i As Long, j As Long, aWord, sTmp As String
        aWord = Split(sText)
        If UBound(aWord) = 0 Then
            SortWord = sText
        Else
            For i = LBound(aWord) To UBound(aWord) - 1
                For j = i + 1 To UBound(aWord)
                    If aWord(i) > aWord(j) Then
                        sTmp = aWord(i): aWord(i) = aWord(j): aWord(j) = sTmp
                    End If
                Next
            Next
            SortWord = Join(aWord)
        End If
    End Function
    
    

    enter image description here