Search code examples
excelvbapowerqueryautofilter

VBA: Keep first and last rows of duplicate column values of an Excel sheet


I have an Excel worksheet with 20K rows like this:

header1 header2
1 P
2 P
3 P
4 Q
5 R
6 R
7 R
8 R
9 S
10 S

I want a VBA code to delete the rows containing duplicates, but keep the first and last row of the duplicates. The result should be like this:

header1 header2
1 P
3 P
4 Q
5 R
8 R
9 S
10 S

I have modified the following code found here to do just that, but every time I have to manually select the range containing the duplicates in column header2.

Sub Delete_Dups_Keep_Last_v2()
 Dim SelRng As Range
 Dim Cell_in_Rng As Range
 Dim RngToDelete As Range
 Dim SelLastRow As Long
 
    Application.DisplayAlerts = False
    Set SelRng = Application.InputBox("Select cells", Type:=8)
    On Error GoTo 0
    Application.DisplayAlerts = True
 
    SelLastRow = SelRng.Rows.Count + SelRng.Row - 1
    For Each Cell_in_Rng In SelRng
        
        If Cell_in_Rng.Row < SelLastRow Then
            If Cell_in_Rng.Row > SelRng.Row Then
                If Not Cell_in_Rng.Offset(1, 0).Resize(SelLastRow - Cell_in_Rng.Row).Find(What:=Cell_in_Rng.Value, Lookat:=xlWhole) Is Nothing Then
                    'this value exists again in the range
                    If RngToDelete Is Nothing Then
                        Set RngToDelete = Cell_in_Rng
                    Else
                        Set RngToDelete = Application.Union(RngToDelete, Cell_in_Rng)
                    End If
                End If
            End If
        End If
        
    Next Cell_in_Rng
 
    If Not RngToDelete Is Nothing Then RngToDelete.EntireRow.Delete

End Sub

Another code found here by user A.S.H. automates the manual selection and speed by using Dictionary, but fails to produce the wanted result.

Sub keepFirstAndLast()
  Dim toDelete As Range: Set toDelete = Sheet1.Rows(999999) '(to not start with a null range)
  Dim dict As Object: Set dict = CreateObject("Scripting.Dictionary")

  Dim a As Range
  For Each a In Sheet1.Range("B2", Sheet1.Range("B999999").End(xlUp))
    If Not dict.Exists(a.Value2) Then
      dict(a.Value2) = 0 ' first appearence, dont save the row
    Else
      ' if last observed occurrence was a duplicate, add it to deleted range
      If dict(a.Value2) > 0 Then Set toDelete = Union(toDelete, Sheet1.Rows(dict(a.Value2)))
      dict(a.Value2) = a.row ' not first appearence, save the row for eventual deletion
    End If
  Next
  toDelete.Delete
End Sub

Solution

  • Simple solution:

    Sub KeepFirstLast()
    
    Application.ScreenUpdating = False
    
    Dim lastRow As Long
    lastRow = Sheets(1).Cells(Rows.Count, 1).End(xlUp).Row
    Dim i As Long
    Dim x As Long
    Dim currentValue As String
    
    For i = lastRow To 2 Step -1
        If i = 2 Then
            Application.ScreenUpdating = True
            Exit For
        End If
        currentValue = Sheets(1).Cells(i, 2).Value
        x = i - 1
        Do While Sheets(1).Cells(x, 2).Value = currentValue And Sheets(1).Cells(x - 1, 2).Value = currentValue
            Sheets(1).Rows(x).Delete
            x = x - 1
        Loop
        i = x + 1
    Next i
    
    
    Application.ScreenUpdating = True
    
    End Sub