Search code examples
excelvbasortingsimilarity

VBA collect consecutive similar cells in the row


I have a list of non conformities appeared in different time with different products. I need to find out similar problems. I already made sorting

1

Now I need to get new sheet with similar rows with similar values in Product, Non coformity and date.

2

enter image description here

To get it I used following code, but not sure that it's correct approach:

' Look for similar non conformities >2
    Sheets.Add.Name = "Result"
    Dim wb As Workbook
    Dim ws As Worksheet, ws2 As Worksheet
    Dim CurrentRow As Long, Lastrow As Long, Lastrow2 As Long, k As Long
    
    Set wb = ActiveWorkbook
    Set ws = wb.Sheets("DuplicateRecords") 'Sheet where I have filtered result
    Set ws2 = wb.Sheets("Result") ' Resulting sheet
    CurrentRow = 2
    
    Lastrow = ws.Range("V" & Rows.Count).End(xlUp).Row
        
    For k = CurrentRow To Lastrow
        If ws.Range("G" & CurrentRow).Value2 = ws.Range("G" & CurrentRow + 1).Value2 And _
           ws.Range("V" & CurrentRow).Value2 = ws.Range("V" & CurrentRow + 1).Value2 And _
           ws.Range("T" & CurrentRow).Value2 = ws.Range("T" & CurrentRow + 1).Value2 Then
            Lastrow2 = ws2.Range("A" & Rows.Count).End(xlUp).Row
            ws2.Range("A" & Lastrow2 + 1).Value2 = ws.Range("A" & CurrentRow).Value2
            ws2.Range("B" & Lastrow2 + 1).Value2 = ws.Range("B" & CurrentRow).Value2
            ws2.Range("C" & Lastrow2 + 1).Value2 = ws.Range("C" & CurrentRow).Value2
            ws2.Range("D" & Lastrow2 + 1).Value2 = ws.Range("D" & CurrentRow).Value2
          
        End If
        CurrentRow = CurrentRow + 1
    Next k


Solution

  • Another non-VBA solution would be to use Power Query (aka Get & Transform), available in Windows Excel 2010+ and Microsoft 365 (Windows or Mac)

    To use Power Query

    • Select some cell in your Data Table
    • Data => Get&Transform => from Table/Range
    • When the PQ Editor opens: Home => Advanced Editor
    • Make note of the Table Name in Line 2
    • Paste the M Code below in place of what you see
    • Change the Table name in line 2 back to what was generated originally.
    • Read the comments and explore the Applied Steps to understand the algorithm

    M Code

    let
    
    //change next line to reflect actual data source
        Source = Excel.CurrentWorkbook(){[Name="Table1"]}[Content],
        #"Changed Type" = Table.TransformColumnTypes(Source,{
            {"#", Int64.Type}, {"Product", type text}, {"Non Conf", type text}, {"Date", type date}}),
    
    //Group by the columns you want together == Product / Non-conf / Date
        #"Grouped Rows" = Table.Group(#"Changed Type", {"Product", "Non Conf", "Date"}, {
    
        //Aggregate by ensuring there are duplicates
            {"All", each if Table.RowCount(_) > 1 then _ else null, 
                type table [#"#"=nullable number, Product=nullable text, Non Conf=nullable text, Date=nullable date]}}),
    
    //Remove the original columns
        #"Removed Columns" = Table.RemoveColumns(#"Grouped Rows",{"Product", "Non Conf", "Date"}),
    
    //Expand the grouped columns and remove the empty rows
        #"Expanded All" = Table.ExpandTableColumn(#"Removed Columns", "All", {"#", "Product", "Non Conf", "Date"}),
        #"Removed Blank Rows" = Table.SelectRows(#"Expanded All", 
            each not List.IsEmpty(List.RemoveMatchingItems(Record.FieldValues(_), {"", null})))
    in
        #"Removed Blank Rows"
    

    enter image description here

    Edit
    If you must use VBA, here is a routine which, by using Collections, Dictionary and VBA Arrays, should execute quite rapidly -- 5-10 times quicker than referring to the worksheet at each step

    'Set reference to Microsoft Scripting Runtime
    '  or make the edits to use late-binding for Dictionary object
    Option Explicit
    Sub selectDups()
        Dim vSrc As Variant, vRes As Variant
        Dim Dict As Dictionary, col As Collection
        Dim vKey(0 To 2) As Variant, sKey As String
        Dim I As Long, J As Long, K As Long, V, W, X
        Dim rSrc As Range, rDest As Range
        Dim wsSrc As Worksheet, wsDest As Worksheet
        
        
    Set wsSrc = ThisWorkbook.Worksheets("Sheet2") 'Set to whatever sheet contains your data
    With wsSrc
        'Assume range starts in A1 and is four columns wide
        Set rSrc = Range(Cells(1, 1), Cells(.Rows.Count, 4).End(xlUp))
        vSrc = rSrc 'create array for faster processing
    End With
    
    Set Dict = New Dictionary
    
    'Create dictionary where key contains the items to be grouped
    ' and the contents is a Collection of the #'s
    For I = 2 To UBound(vSrc, 1)
        For J = 2 To 4
            vKey(J - 2) = vSrc(I, J)
            sKey = Join(vKey, "~")
        Next J
        If Dict.Exists(sKey) Then
            Dict(sKey).Add vSrc(I, 1)
        Else
            Set col = New Collection
                col.Add vSrc(I, 1)
            Dict.Add Key:=sKey, Item:=col
        End If
    Next I
    
    'Include only the duplicates
    For Each V In Dict.Keys
        If Dict(V).Count = 1 Then Dict.Remove (V)
    Next V
    
    'write results next to original table
    'could modify code to write results anywhere
    Set wsDest = wsSrc
    Set rDest = rSrc.Offset(columnoffset:=6)
    
    'Compute number of rows
        I = 0
        For Each V In Dict.Keys
             I = I + Dict(V).Count
        Next V
        
    Set rDest = rDest.Resize(rowsize:=I + 1) '+1 for headers
    ReDim vRes(0 To I, 1 To 4)
    
    'Headers
        For J = 1 To 4
            vRes(0, J) = vSrc(1, J)
        Next J
        
    'Data
        I = 0
        For Each V In Dict.Keys
            X = Split(V, "~")
            For K = 1 To Dict(V).Count
                I = I + 1
                vRes(I, 1) = Dict(V)(K)
                For J = 1 To 3
                    vRes(I, J + 1) = X(J - 1)
                Next J
            Next K
        Next V
        
    'Write results to the worksheet
    With rDest
        .EntireColumn.Clear
        .Value = vRes
        .Columns(4).NumberFormat = "dd-mmm-yyyy"
        .Style = "Output" 'Optional and may not work internationally
        .EntireColumn.AutoFit
    End With
    
    End Sub
    

    enter image description here