Search code examples
rplothypothesis-test

How to plot a figure of power and true value of unknown parameter in R?


Assume that iid random sample X_i\sim Bernoulli(\pi) for i=1,\dots, 100 and sample size is 100. We want to do a hypothesis test that H_0: \pi\ge 0.6,\, H_a: \pi<0.6 I want to get a plot of the relation between our true parameter pi and power. I have got the function power. But I can only input each true pi=0.50, 0.51, 0.52, 0.53, ..., 0.59. How to plot a similar figure?

enter image description here

My code is as follows.

n=100
pi0=0.60

##power function
power_fun=function(N,pi,alpha)
{
  pvalue=c()
  power=c()
  rej=c()
  
  for (i in 1:N) {
    set.seed(i)
    y=rbinom(n,size=1,prob=pi)
    pi_hat=mean(y)
    z=(pi_hat-pi0)/sqrt(1/n*pi_hat*(1-pi_hat))
    
    rej[i]=ifelse(z<qnorm(0.05,mean=0,sd=1),1,0)
    
    pvalue[i]=pnorm(q=z,mean=0,sd=1)
    
    c_value=qnorm(alpha,mean=0,sd=1)
    aug_term=(pi-pi0)/sqrt(1/n*pi_hat*(1-pi_hat))
    power[i]=pnorm(c_value-aug_term,mean=0,sd=1)
    
  }

  mean_pvalue=mean(pvalue)

  sd_pvalue=sd(pvalue)
  

  mean_power=ifelse(pi<pi0,mean(power),NA)

  sd_power=sd(power)
  
  rej_rate=mean(pvalue<alpha)
  
  type1_error=ifelse(pi>=pi0,rej_rate,NA)
  
  type2_error=ifelse(pi<pi0,1-mean_power,NA)
  
  type2_error_emp=ifelse(pi<pi0,1-rej_rate,NA)
  
  mc_se=sqrt(1/N*rej_rate*(1-rej_rate))
  
  df_out=data.frame("pvalue"=mean_pvalue,"pvalue2"=sd_pvalue,"power"=mean_power,"ESE_power"=sd_power, 
                    "rejection rate"=rej_rate,
                    "Type I error"=  type1_error,"type_II_error"=type2_error,"Type_II_error"=type2_error_emp,
                    "MC_SE"=mc_se)
  rownames(df_out)=paste0("N=",N,", pi=",pi, ", alpha=", alpha)
  
  df_out=round(df_out,3)
  
  return(df_out)
}

For each pi, we can get power.


power_fun(N=1000,pi=0.5,alpha=0.05)
#power=0.643
power_fun(N=1000,pi=0.51,alpha=0.05)
# power=0.566
power_fun(N=1000,pi=0.52,alpha=0.05)
power_fun(N=1000,pi=0.53,alpha=0.05)
power_fun(N=1000,pi=0.54,alpha=0.05)

But this is too complicated. Is there an easy way to get the power of these pi values and plot their graph of pi and power?


Solution

  • This is more like a code-review.

    I think that you thought that you could vectorise over N, pi, and alpha. I don't think you can that with your current implementation.

    Following @Limey's advice:

    #' Power function
    #' 
    #' 
    #' @param N Total number of replications
    #' @param pi 
    #' @param alpha Significance level
    #' @param pi0 
    #' @param n Size of the replication
    #'
    #' @return
    #' 
    power_fun <- function(N, pi, alpha, pi0, n) {
      pvalue <- vector(mode = "numeric", length = N)
      power <- vector(mode = "numeric", length = N)
      rej <- vector(mode = "numeric", length = N)
      
      for (i in seq_len(N)) {
        # why?
        # set.seed(i)
        y = rbinom(n, size = 1, prob = pi)
        pi_hat = mean(y)
        z = (pi_hat - pi0) / sqrt(1 / n * pi_hat * (1 - pi_hat))
        
        rej[i] = ifelse(z < qnorm(0.05, mean = 0, sd = 1), 1, 0)
        
        pvalue[i] = pnorm(q = z, mean = 0, sd = 1)
        
        c_value = qnorm(alpha, mean = 0, sd = 1)
        aug_term = (pi - pi0) / sqrt(1 / n * pi_hat * (1 - pi_hat))
        power[i] = pnorm(c_value - aug_term, mean = 0, sd = 1)
        
      }
      
      mean_pvalue = mean(pvalue, na.rm = TRUE)
      sd_pvalue = sd(pvalue, na.rm = TRUE)
      # mean_power = ifelse(pi < pi0, mean(power), NA)
      mean_power = mean(power, na.rm = TRUE)
      sd_power = sd(power, na.rm = TRUE)
      rej_rate = mean(pvalue < alpha, na.rm = TRUE)
      
      type1_error = ifelse(pi >= pi0, rej_rate, NA)
      type2_error = ifelse(pi < pi0, 1 - mean_power, NA)
      type2_error_emp = ifelse(pi < pi0, 1 - rej_rate, NA)
      
      mc_se = sqrt(1 / N * rej_rate * (1 - rej_rate))
      
      df_out <- data.frame(
        N = N, 
        pi = pi, 
        alpha = alpha,
        pvalue = mean_pvalue,
        pvalue2 = sd_pvalue,
        power = mean_power,
        ESE_power = sd_power,
        rejection_rate = rej_rate,
        Type_I_error =  type1_error,
        type_II_error = type2_error,
        Type_II_error = type2_error_emp,
        MC_SE = mc_se
      )
      # moved this to above
      # rownames(df_out) = paste0("N=", N, ", pi=", pi, ", alpha=", alpha)
      
      # why?
      # df_out = round(df_out, 3)
      
      return(df_out)
    }
    

    Please read the comments and the changes.

    library(tidyverse)
    n <- 100
    pi0 <- 0.6
    
    # testing the function
    power_fun(N=1000,pi=0.5,alpha=0.05, pi0 = pi0, n = n)
    
    #' Plot for all pi going from 0.5 to 1.
    seq.default(0, 0.65, length.out = 200) %>%
    # seq.default(0.5, 1., length.out = 2) %>% 
      map_df(function(pi) power_fun(N=1000,pi=pi,alpha=0.05, pi0 = pi0, n = n)) %>% 
      as_tibble() -> 
      power_df
    # power_df %>% View()
    power_df %>% {
      ggplot(., aes(pi, power)) + 
        geom_line() + 
        # geom_point() +
        geom_vline(xintercept = pi0, linetype = "dashed") +
        NULL
    }
    
    

    Power plot with pi going from 0.5 to 0.65