Search code examples
ralpha

Compare theoretical and empirical alpha


I've written the following code to compare the theoretical alpha = 0.05 with the empirical one from the buit-in t.test in Rstudio:

set.seed(1)
N <- 1000
n <- 20
k <- 500

poblacion <- rnorm(N, 10, 10) #Sample
mu.pob <- mean(poblacion)
sd.pob <- sd(poblacion)
p <- vector(length=k)
for (i in 1:k) {
  muestra <- poblacion[sample(1:N, n)]
  p[i] <- t.test(muestra, mu=mu.pob)$p.value
}
a_teo <- 0.05
a_emp <- length(p[p < a_teo])/k
sprintf("alpha_teo = %.3f <-> alpha_emp = %.3f", a_teo, a_emp)

And it works printing both theoretical and empirical values. Now I wanna make it more general, to different values of 'n', so I wrote this:

set.seed(1)
N <- 1000
n <- 20
k <- 500

z <-c()
for (i in n){
  poblacion <- rnorm(N, 10, 10)
  mu.pob <- mean(poblacion)
  sd.pob <- sd(poblacion)
  p <- vector(length=k)
  for (j in 1:k){
     muestra <- poblacion[sample(1:N, length(n))]
     p[j] <- t.test(muestra, mu = mu.pob)$p.value
  }
  a_teo = 0.05
  a_emp = length(p[p<a_teo])/k
  append(z, a_emp)
  print(sprintf("alpha_teo = %.3f <-> alpha_emp = %.3f", a_teo, a_emp))
}
plot(n, z)

Solution

  • The sprintf alone won't do in a for loop, you need wrap it in print.

    > for (i in n) {
    +   poblacion <- rnorm(N, 10, 10)
    +   mu.pob <- mean(poblacion)
    +   sd.pob <- sd(poblacion)
    +   p <- vector(length=k)
    +   for (j in 1:k) {
    +     muestra <- poblacion[sample(1:N, length(n))]
    +     p[j] <- t.test(muestra, mu=mu.pob)$p.value
    +   }
    +   a_teo <- 0.05
    +   a_emp <- length(p[p<a_teo])/k
    +   print(sprintf("alpha_teo = %.3f <-> alpha_emp = %.3f", a_teo, a_emp))
    + }
    [1] "alpha_teo = 0.050 <-> alpha_emp = 0.056"
    [1] "alpha_teo = 0.050 <-> alpha_emp = 0.050"
    [1] "alpha_teo = 0.050 <-> alpha_emp = 0.064"
    [1] "alpha_teo = 0.050 <-> alpha_emp = 0.048"
    

    A more R-ish way to do this would be to wrap the logic in a function.

    > comp_fn <- \(N, n, k, alpha=.05, verbose=FALSE) {
    +   poblacion <- rnorm(N, 10, 10)
    +   mu.pob <- mean(poblacion)
    +   sd.pob <- sd(poblacion)
    +   p <- replicate(k, t.test(poblacion[sample(1:N, n)], mu=mu.pob)$p.value)
    +   a_emp <- length(p[p < alpha])/k
    +   if (verbose) {
    +     message(sprintf("alpha_teo = %.3f <-> alpha_emp = %.3f", a_teo, a_emp))
    +   }
    +   c(a_teo, a_emp)
    + }
    > 
    > set.seed(1)
    > comp_fn(1000, 20, 500)
    [1] 0.050 0.058
    > comp_fn(1000, 20, 500, verbose=TRUE)
    alpha_teo = 0.050 <-> alpha_emp = 0.042
    [1] 0.050 0.042
    

    To loop over different arguments, mapply is your friend.

    > set.seed(1)
    > mapply(comp_fn, 1000, c(2, 10, 15, 20), 500)
          [,1]  [,2]  [,3]  [,4]
    [1,] 0.050 0.050 0.050 0.050
    [2,] 0.058 0.054 0.048 0.046