I'm asked to solve the following problem:
But I have trouble generating the 10000 samples using 16 sets of parameters. Here's what I have done:
library(MASS)
library(data.table)
library(Matrix)
library(stats)
set.seed(2000)
options(scipen=100)
N <- c(100, 500)
K <- c(1, 10)
ETA <- c(0.05, 1)
RHO <- c(0, 0.5)
reps <- 10000
DGP<-function(n,k,eta,rho){
# generate D, then get \Lambda=DD', where z \sim N(0,\Lambda)
D <- matrix(0, nrow = k, ncol = k)
for (x in 1:k) {
for (y in 1:k) {
D[x, y] <- ifelse(x > y, 1, ifelse(x == y, 1, 0))
}
}
# generate Z
Z <- mvrnorm(n, mu = rep(0, k), Sigma = D%*%t(D))
# generate error terms \epsilon, u, where y=\epsilon (since \beta-9)
# and u is the error term for x=z'\pi+u
err <- mvrnorm(n, mu = rep(0, 2), Sigma = matrix(c(1, rho, rho, 1), ncol = 2))
Y <- as.matrix(err[,1])
u <- err[,2]
# then we can get X
X <- Z %*% rep(eta,k) + u
return(data.frame(Y,X,Z))
}
# test if the DGP function works
DGP(500,1,1,0.5)
# generate the 16 samples
data<-list()
# I know this is inefficient but I gave up
para<-matrix(data=c(100,1,0.05,0,500,1,0.05,0,
100,10,0.05,0,500,10,0.05,0,
100,1,1,0,500,1,1,0,
100,1,0.05,0.5,500,1,0.05,0.5,
100,10,1,0,500,10,1,0,
100,10,1,0.5,500,10,1,0.5,
100,1,1,0.5,500,1,1,0.5,
100,10,0.05,0.5,500,10,0.05,0.5),nrow=16,ncol=4,byrow=TRUE)
datalist<-function(){
for (i in 1:16){
df<-DGP(para[i,][1],para[i,][2],para[i,][3],para[i,][4])
data[[i]]<-df
}
return(data)
}
sample<-datalist() # at least I have generated one sample
Is there a way to generate and store 10000 samples for the 16 sets of parameters more efficiently?
Here is a way:
library(MASS)
params <- expand.grid(
n = c(100, 500),
K = c(1, 10),
eta = c(0.05, 1),
rho = c(0, 0.5)
)
epsilon <- rnorm(500, 0, 1)
u <- rnorm(500, 0, 1)
Lambda <- function(K) {
D <- matrix(0, nrow = K, ncol = K)
D[upper.tri(D, diag = TRUE)] <- 1
tcrossprod(D)
}
reps <- 10000
beta <- 2
sims <- function(n, K, eta, rho) {
z <- replicate(reps, mvrnorm(n, rep(0, K), Lambda(K)))
Omega <- diag(2)
Omega[1, 2] <- Omega[2, 1] <- rho
epsilon_u <- replicate(reps, mvrnorm(n, c(0, 0), Omega))
epsilon <- epsilon_u[, 1, ]
u <- epsilon_u[, 2, ]
x <- eta * apply(z, 3, rowSums) + u
y <- beta * x + epsilon
list(x = x, y = y, z = z)
}
simulations <- vector("list", length = 16)
for(i in 1:16) {
pars <- params[i, ]
simulations[[i]] <- sims(pars$n, pars$K, pars$eta, pars$rho)
}
For each combination of the parameters, x
is a n x 10000
-matrix, y
as well, and z
is a 3-dimensional array with size (n, K, 10000)
.
I didn't understand the interest of simulating the x_i
if we assume beta=0
, so I took another value of beta
.