Here is some data generated using R:
library(tidyverse)
set.seed(0)
tibble(A = round(rnorm(10, 20, 2)),
B = round(rnorm(10, 100, 2))) %>%
group_by(B) %>%
mutate(C = n_distinct(A)) %>% # count distinct A per B
arrange(B, A) %>%
write_csv("small_example.csv")
tibble(A = round(rnorm(5000, 10000, 1000)),
B = round(rnorm(5000, 50000, 1000))) %>%
group_by(B) %>%
mutate(C = n_distinct(A)) %>%
arrange(B, A) %>%
write_csv("big_example.csv")
small_example.csv
should look something like this:
I am attempting to recreate column C in column D, using VBA in Excel:
Sub UpdateFormulas()
Dim last_row As Integer
last_row = ActiveSheet.UsedRange.Rows.Count
For i = 2 To last_row
ActiveSheet.Cells(i, 4).Formula = "=SUMPRODUCT(($B$2:$B$" & last_row & "=B" & _
i & ")/COUNTIFS($A$2:$A$" & last_row & ", $A$2:$A$" & last_row & ", $B$2:$B$" _
& last_row & ", $B$2:$B$" & last_row & "))"
Next i
End Sub
This VBA procedure does exactly what it is supposed to do and it works regardless of the length of the data. However, the formula I'm using is way too slow and seems to be an O(N^2) operation. How can I efficiently update these values on demand? I'm not opposed to simpler methods, but the method must be fairly automatic.
Dictionary
improves efficiencyOption Explicit
Sub Demo()
Dim i As Long, lastRow As Long
Dim arrData, rngData As Range
Dim oDicB, oDicAB
Dim sKeyB As String, sKeyAB As String
Set oDicB = CreateObject("scripting.dictionary")
Set oDicAB = CreateObject("scripting.dictionary")
lastRow = Cells(Rows.Count, "A").End(xlUp).Row
Set rngData = Range("A2:C" & lastRow)
arrData = rngData.Value
For i = LBound(arrData) To UBound(arrData)
arrData(i, 1) = CStr(arrData(i, 1))
arrData(i, 2) = CStr(arrData(i, 2))
sKeyB = arrData(i, 2)
sKeyAB = arrData(i, 1) & "|" & arrData(i, 2)
If Not oDicAB.exists(sKeyAB) Then
oDicAB(sKeyAB) = ""
If Not oDicB.exists(sKeyB) Then
oDicB(sKeyB) = 1
Else
oDicB(sKeyB) = oDicB(sKeyB) + 1
End If
End If
Next i
For i = LBound(arrData) To UBound(arrData)
sKeyB = arrData(i, 2)
If oDicB.exists(sKeyB) Then
arrData(i, 3) = oDicB(sKeyB)
End If
Next
rngData.Value = arrData
End Sub