Search code examples
rdataframeplyrstat

How to get r.squared for each regression?


Im working with a huge data frame with structure similar to the followings. I use output_reg to store slope and intercept for each treatment but I need to add r.squared for each lm (y~x) and store it in another column besides the other two. Any hint on that?

library(plyr)

field <- c('t1','t1','t1', 't2', 't2','t2', 't3', 't3','t3')
predictor <- c(4.2, 5.3, 5.4,6, 7,8.5,9, 10.1,11)
response <- c(5.1, 5.1, 2.4,6.1, 7.7,5.5,1.99, 5.42,2.5)
my_df <- data.frame(field, predictor, response, stringsAsFactors = F)

 output_reg<-list() 
  B<-(unique(my_df$field))
  for (i in 1:length(B)) {
  index <- my_df[my_df$field==B[i],]
 x<- index$predictor
 y<- index$response
output_reg[[i]] <- lm (y ~ x) #  gets estimates for each field
} 

Thanks


Solution

  • r.squared can be accessed via the summary of the model, try this:

    m <- lm(y ~ x)
    rs <- summary(m)$r.squared
    

    The summary object of the linear regression result contains almost everything you need:

    output_reg<-list() 
    B<-(unique(my_df$field))
    for (i in 1:length(B)) {
        index <- my_df[my_df$field==B[i],]
        x<- index$predictor
        y<- index$response
        m <- lm (y ~ x)
        s <- summary(m)            # get the summary of the model
        # extract every thing you need from the summary object
        output_reg[[i]] <- c(s$coefficients[, 'Estimate'], r.squared = s$r.squared) 
    } 
    output_reg
    #[[1]]
    #(Intercept)           x   r.squared 
    # 10.7537594  -1.3195489   0.3176692 
    
    #[[2]]
    #(Intercept)           x   r.squared 
    #  8.8473684  -0.3368421   0.1389040 
    
    #[[3]]
    #(Intercept)           x   r.squared 
    #-0.30500000  0.35963455  0.03788593 
    

    To bind the result together:

    do.call(rbind, output_reg)
    #      (Intercept)          x  r.squared
    # [1,]   10.753759 -1.3195489 0.31766917
    # [2,]    8.847368 -0.3368421 0.13890396
    # [3,]   -0.305000  0.3596346 0.03788593