I have written the code in R
(see below). It works when N=100
.
I need to run the dist_statistic function N=1000
times.
Inside this function, the Cholesky decomposition is used implicitly. For the Cholesky decomposition, the matrix must be positive definite. But the elements of the i
-th matrix are random numbers. I do not control positiveness. As the result I see the error:
# Error in chol.default(rxx) :
# the leading minor of order 4 is not positive definite
and then calculations are stopped.
Question: How to catch the error location and continue the calculations with the generation of a new positive definite matrix?
library(fungible)
n <- 4
k <- 2
p <- n
n1 <- 100; n2 <- 100
R1 <- matrix(c(
1.00, 0.51, 0.44, 0.22,
0.51, 1.00, 0.36, 0.21,
0.44, 0.36, 1.00, 0.26,
0.22, 0.21, 0.26, 1.00), n, n)
skew_vec = c(-0.254, -0.083, 0.443, -0.017); kurt_vec = c(6.133, 4.709, 6.619, 4.276)
dist_statistic <- function(N, n, n1, n2, R1){
Q <- c()
for(i in 1:N)
{
X1 <- monte1(seed = i+123, nvar = n, nsub = n1, cormat = R1,
skewvec = skew_vec,
kurtvec = kurt_vec)$data #; X1
R2 <- corSample(R1, n = 10000)$cor.sample
rand_vec <- rnorm(n)
X2 <- monte1(seed = i+321, nvar = n, nsub = n2, cormat = R2,
skewvec = skew_vec + rand_vec,
kurtvec = kurt_vec + rand_vec)$data
G1 <- adfCor(X1); G2 <- adfCor(X2)
G <- ((n1 - 1)*G1 + (n2 - 1)*G2)/(n1 + n2 - 2)
Ginv <- MASS::ginv(G)
# vectorization operator
delta <- row(R1) - col(R2)
vR1 <- as.vector(t(R1[delta > 0])); vR2 <- as.vector(t(R2[delta > 0]))
stat <- n1*n2/(n1 + n2) * ((vR1 - vR2) %*% Ginv) %*% (vR1 - vR2)
Q <- c(Q, stat)
print(i)
} # for_i
Results <- list(statistic = Q, iteration = i)
return(Results)
} # function
s <- dist_statistic(N=100, n, n1, n2, R1)
Here's an approach. I first rewrite the contents of your loop as a function:
my_function <- function(i) {
X1 <- monte1(seed = i+123, nvar = n, nsub = n1, cormat = R1,
skewvec = skew_vec,
kurtvec = kurt_vec)$data #; X1
R2 <- corSample(R1, n = 10000)$cor.sample
rand_vec <- rnorm(n)
X2 <- monte1(seed = i+321, nvar = n, nsub = n2, cormat = R2,
skewvec = skew_vec + rand_vec,
kurtvec = kurt_vec + rand_vec)$data
G1 <- adfCor(X1)
G2 <- adfCor(X2)
G <- ((n1 - 1)*G1 + (n2 - 1)*G2)/(n1 + n2 - 2)
Ginv <- MASS::ginv(G)
# vectorization operator
delta <- row(R1) - col(R2)
vR1 <- as.vector(t(R1[delta > 0]))
vR2 <- as.vector(t(R2[delta > 0]))
stat <- n1*n2/(n1 + n2) * ((vR1 - vR2) %*% Ginv) %*% (vR1 - vR2)
return(stat)
}
Now we can use that function in tryCatch
:
dist_statistic <- function(N, n, n1, n2, R1){
Q <- c()
counter <- 1
i <- 1
while (counter <= N) {
tryCatch({
Q <- c(Q, my_function(i))
cat(".")
counter <- counter + 1
},
error = function(e) {
cat("*")
},
finally = {
if (i %% 20 == 0) cat("\n")
i <- i + 1
}
)}
cat("\n")
Results <- list(statistic = Q, iteration = i - 1)
return(Results)
}
There are two counters. i
controls the seed, while counter
ensures you have exactly the number of valid outputs as specified in N
. The cat
s are purely for cosmetic purposes and indicates errors. Hence
s <- dist_statistic(N=110, n, n1, n2, R1)
# ....................
# ....................
# ....................
# ....................
# ....................
# .*..*.......
str(s)
# List of 2
# $ statistic: num [1:110] 5.91 2.59 5.49 5.01 1.65 ...
# $ iteration: num 112