Search code examples
rstatistics-bootstrap

R: bootstrap mean not inside 95% CI?


library(boot)
set.seed(3)
run2 <- structure(list(X = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), 
    beta1 = c(1.40597500087168, 1.41848914211916, 1.42962301367006, 
    1.42029917323066, 1.4289966153492, 1.42596562840805, 1.42680678397731, 
    1.41425063435813, 1.40927087463386, 1.41364428128251)), .Names = c("X", 
"beta1"), class = "data.frame", row.names = 11:20)

#my boostrap mean
> b11 = mean(sample(run2$beta1, 10000, replace = TRUE)); b11
[1] 1.419424


boot.beta1 <- function(d, i){
  d2 <- d[i, ]
  return(d2$beta1)
}

> bootbeta1 <- boot(run2, boot.beta1, R = 10000)
> boot.ci(bootbeta1, type  = c("norm"))
BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
Based on 10000 bootstrap replicates

CALL : 
boot.ci(boot.out = bootbeta1, type = c("norm"))

Intervals : 
Level      Normal        
95%   ( 1.377,  1.408 )  
Calculations and Intervals on Original Scale

My boostrapped mean is 1.419, but that's outside of the 95% CI? How can I fix this?


Solution

  • bootbeta1 is just returning the vector of values, not the mean, and your non-boot version is not actually bootstrapping as it's not creating replicates on which to calculate a statistic, but just sampling a lot.

    In base R, it would look like

    run2 <- data.frame(
        X = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), 
        beta1 = c(1.40597500087168, 1.41848914211916, 1.42962301367006, 1.42029917323066, 1.4289966153492, 1.42596562840805, 1.42680678397731, 1.41425063435813, 1.40927087463386, 1.41364428128251)
    )
    
    set.seed(3)
    straps <- replicate(10000, mean(sample(run2$beta1, nrow(run2), replace = TRUE)))
    
    mean(straps)
    #> [1] 1.419342
    
    # 95% confidence interval
    qnorm(c(.025, .975), mean(straps), sd(straps))
    #> [1] 1.414387 1.424298
    

    With boot, it would look like

    library(boot)
    
    boot_mean <- boot(run2, function(d, i){mean(d[i, 'beta1'])}, R = 10000)
    boot_mean
    #> 
    #> ORDINARY NONPARAMETRIC BOOTSTRAP
    #> 
    #> 
    #> Call:
    #> boot(data = run2, statistic = function(d, i) {
    #>     mean(d[i, "beta1"])
    #> }, R = 10000)
    #> 
    #> 
    #> Bootstrap Statistics :
    #>     original        bias    std. error
    #> t1* 1.419332 -1.286626e-05 0.002550844
    
    boot_ci <- boot.ci(boot_mean, type = 'norm')
    boot_ci
    #> BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
    #> Based on 10000 bootstrap replicates
    #> 
    #> CALL : 
    #> boot.ci(boot.out = boot_mean, type = "norm")
    #> 
    #> Intervals : 
    #> Level      Normal        
    #> 95%   ( 1.414,  1.424 )  
    #> Calculations and Intervals on Original Scale
    

    Both now contain the mean.