Suppose I have a arbitrary probability matrix P
like below,
P = matrix(c(0.3,0.2,0.2,0.2,0.3,0.2,0.2,0.2,0.3),3,3)
P
[,1] [,2] [,3]
[1,] 0.3 0.2 0.2
[2,] 0.2 0.3 0.2
[3,] 0.2 0.2 0.3
For single adjacency matrix, it is generated like (unweighted, no self-loof)
tem = matrix(runif(3^2), nrow = 3)
tmpG = 1 * (tmpmat < P)
tmpG[lower.tri(tmpG)] <- 0
tmpG <- t(tmpG) + tmpG - diag(diag(tmpG))
However, what if I need to generate 100 adjacency matrix, so I write down the following code
G = list()
for (i in 1:rep) {
tmpmat = matrix(runif(n^2), nrow = n)
tmpG = 1 * (tmpmat < P)
tmpG[lower.tri(tmpG)] <- 0
tmpG <- t(tmpG) + tmpG - diag(diag(tmpG))
if (noloop) {
diag(tmpG) = 0
}
G[[i]] = tmpG
}
In my case, the n >10000
and T = 1000
, so it is extremely slow, any better idea to improve this?
I think we can do a bit better by only working with a vector of the length needed, and putting it into a matrix at the very end. I haven't checked this very carefully, and your code has no comments for me to compare intention to, so do make sure this is right before trusting it.
p_vec = P[upper.tri(P, diag = !noloop)]
nn = length(p_vec)
tmpG_vec = runif(nn) < p_vec
tmpG = matrix(0, n, n)
tmpG[upper.tri(tmpG, diag = !noloop)] = tmpG_vec
tmpG[lower.tri(tmpG, diag = !noloop)] = tmpG_vec
tmpG
We can then wrap that in replicate
for the iteration.
Benchmarking on more dimensions/higher reps, we get about a 25% speedup, but it's still pretty slow (I aborted a benchmark of n = 5000
because I got tired of waiting). You can probably get quite a bit of speed by running in parallel - say an almost 8x speedup if you have 8 cores. See, e.g., this question, though there may be more modern ways to do it.
rep = 5L
n = 2000
noloop = TRUE
P = matrix(runif(n^2), n)
P = P %*% t(P)
P = P / colSums(P)
p_vec = P[upper.tri(P, diag = !noloop)]
nn = length(p_vec)
microbenchmark::microbenchmark(
loop = {
G = list()
for (i in 1:rep) {
tmpmat = matrix(runif(n^2), nrow = n)
tmpG = 1 * (tmpmat < P)
tmpG[lower.tri(tmpG)] <- 0
tmpG <- t(tmpG) + tmpG - diag(diag(tmpG))
if (noloop) {
diag(tmpG) = 0
}
G[[i]] = tmpG
}
},
diagonal = replicate(rep, {
tmpG_vec = runif(nn) < p_vec
tmpG = matrix(0, n, n)
tmpG[upper.tri(tmpG, diag = !noloop)] = tmpG_vec
tmpG[lower.tri(tmpG, diag = !noloop)] = tmpG_vec
tmpG
}),
times = 5L
)
# Unit: seconds
# expr min lq mean median uq max neval
# loop 1.525028 1.614544 2.136637 2.148771 2.387423 3.007417 5
# diagonal 1.312022 1.360457 1.592914 1.444902 1.602536 2.244652 5