Search code examples
excelexcel-formulavlookupworksheet-functionvba

Find rows repeated in no particular order and show result sorted by number of repetitions


I was able to solve this problem using Python but I would need to implement the solution in Excel itself as well so that I can use graphics to represent the results easily.

Given this table:

b   a   c
c   a   b 
a   c   b
a   c
a   c   d
b   c   a
d   c   a

I would like to obtain a list sorted by the number of times that a row is the repeated (in no particular order) in the table.

  • So this would count as repeated rows: "a b c", "c b a", "a c b"
  • But this wouldn't: "a b c", "b c", "b", "a b", "a c"

So, the output I'm looking for would be something like:

1st place: "b+a+c" found 4 times
2nd place: "a+c+d" found 2 twice
3rd place: "a+c" found once

The output has to say "b+a+c" even if it is also counting "a+b+c", "c+b+a" and so on... because "b+a+c" was the first one of all the other subsequent repetitions.

Would anyone be able to show me the correct way to approach the problem?


Solution

  • I would use a Class module and a collection object. The class module would consist of two arrays and a counter. The first array is the row in its original order; the second array is the row in sorted order. The sorted order would be used as the Key for the collection object. If you try to add a collection object where the Key already exists, it will cause an error. Trap the error and add one to the counter.

    Then for the results, you would retrieve the original entries from the "original" array; and the counter. Sort on the counter and you have your results.

    Here is an example of VBA code to accomplish the above.

    First, insert a Class module and rename it RowEntries

    Option Explicit
    Private pOriginal() As Variant
    Private pSorted() As Variant
    Private pCount As Long
    
    Public Property Get Original() As Variant
        Original = pOriginal
    End Property
    Public Property Let Original(Value As Variant)
        pOriginal = Value
    End Property
    
    Public Property Get Sorted() As Variant
        Sorted = pSorted
    End Property
    Public Property Let Sorted(Value As Variant)
        pSorted = Value
    End Property
    
    Public Property Get Count() As Long
        Count = pCount
    End Property
    Public Property Let Count(Value As Long)
        pCount = Value
    End Property
    

    Then insert a regular module. This code assumes your source data is the CurrentRegion around A1; and the results will go several columns to the right. These algorithms are easily changed.

    Option Explicit
    Option Compare Text  'To make comparison case insensitive, if you want
    Sub RankRows()
        Dim V As Variant, VtoSort As Variant
        Dim vRes() As Variant
        Dim cRowEntries As RowEntries
        Dim colRowEntries As Collection
        Dim sKey As String, S As String
        Dim I As Long
        Dim rSrc As Range, rRes As Range  'Location for Results
    
    Set rSrc = Range("A1").CurrentRegion
    Set rRes = rSrc.Offset(columnoffset:=rSrc.Columns.Count + 3).Resize(1, 2)
    
    V = rSrc
    
    Set colRowEntries = New Collection
    On Error Resume Next
    For I = 1 To UBound(V)
        Set cRowEntries = New RowEntries
        With cRowEntries
            .Original = WorksheetFunction.Index(V, I, 0)
            VtoSort = .Original
            Quick_Sort VtoSort, LBound(VtoSort), UBound(VtoSort)
            .Sorted = VtoSort
            .Count = 1
            sKey = CStr(Join(.Sorted, ", "))
            colRowEntries.Add cRowEntries, sKey
            If Err.Number <> 0 Then
                Err.Clear
                With colRowEntries(sKey)
                    .Count = .Count + 1
                End With
            End If
        End With
    Next I
    On Error GoTo 0
    
    'populate results array
    ReDim vRes(1 To colRowEntries.Count, 1 To 2)
    For I = 1 To colRowEntries.Count
        With colRowEntries(I)
            vRes(I, 1) = Join(.Original, "+")
    
                'remove trailing delimiters
                Do While Right(vRes(I, 1), 1) = "+"
                    vRes(I, 1) = Left(vRes(I, 1), Len(vRes(I, 1)) - 1)
                Loop
    
            vRes(I, 2) = .Count
        End With
    Next I
    
    Set rRes = rRes.Resize(rowsize:=UBound(vRes), columnsize:=UBound(vRes, 2))
    With rRes
        .EntireColumn.Clear
        .Value = vRes
        .Sort key1:=rRes.Columns(2), order1:=xlDescending, Header:=xlNo
    End With
    
    V = rRes
    ReDim vRes(1 To UBound(V), 1 To 1)
    
    For I = 1 To UBound(V)
        Select Case V(I, 2)
            Case 1
                S = "once"
            Case 2
                S = "twice"
            Case Else
                S = V(I, 2) & " times"
        End Select
        vRes(I, 1) = OrdinalNum(I) & " place: """ & V(I, 1) & """ found " & S
    
    Next I
    
    rRes.EntireColumn.Clear
    rRes.Resize(columnsize:=1) = vRes
    rRes.EntireColumn.AutoFit
    
    End Sub
    
    
    Sub Quick_Sort(ByRef SortArray As Variant, ByVal first As Long, ByVal last As Long)
    Dim Low As Long, High As Long
    Dim Temp As Variant, List_Separator As Variant
    Low = first
    High = last
    List_Separator = SortArray((first + last) / 2)
    Do
        Do While (SortArray(Low) < List_Separator)
            Low = Low + 1
        Loop
        Do While (SortArray(High) > List_Separator)
            High = High - 1
        Loop
        If (Low <= High) Then
            Temp = SortArray(Low)
            SortArray(Low) = SortArray(High)
            SortArray(High) = Temp
            Low = Low + 1
            High = High - 1
        End If
    Loop While (Low <= High)
    If (first < High) Then Quick_Sort SortArray, first, High
    If (Low < last) Then Quick_Sort SortArray, Low, last
    End Sub
    
    Function OrdinalNum(num) As String
    Dim Suffix As String
    
    OrdinalNum = num
    If Not IsNumeric(num) Then Exit Function
    If num <> Int(num) Then Exit Function
    
    Select Case num Mod 10
        Case Is = 1
            Suffix = "st"
        Case Is = 2
            Suffix = "nd"
        Case Is = 3
            Suffix = "rd"
        Case Else
            Suffix = "th"
    End Select
    
    Select Case num Mod 100
        Case 11 To 19
            Suffix = "th"
    End Select
    
    OrdinalNum = Format(num, "#,##0") & Suffix
    End Function
    

    The output will be just as you show in your request above. But could be easily modified:

    enter image description here