Search code examples
rregressionp-value

Extract lists of p-values for each regression coefficients (1104 linear regressions) with R


I try to do 1104 linear regressions with the same model. My independent variable does not change. However, my dependant variable does. Indeed, I have 1104 dependent variables. I can only extract all the coefficients (intercepts included), t-stats and R-square stats. I would also like to extract all the p-values lists of each coefficients within the 1104 linear regressions. How to do that with an easy way ?

Here are my codes:

run 1104 regressions for M1

bigtest<-as.data.frame(bigtest)

test <- lapply(135:1238, function(i) lm(bigtest[,i]~bigtest[,"rm"]))

reg_sq  <- sapply(1:length(test),function(i) summary(test[[i]])$r.squared)
#reg_sq

coefrm <- sapply(1:length(test),function(i)summary(test[[i]])$coefficients[2,1])
intercept <- sapply(1:length(test),function(i)summary(test[[i]])$coefficients[1,1])
#betas

tstatrm <- sapply(1:length(test),function(i)  summary(test[[i]])$coefficients[2,3])
tstatint <- sapply(1:length(test),function(i)  summary(test[[i]])$coefficients[1,3])
#tstat

m1 <- cbind(reg_sq,coefrm,intercept,tstatrm,tstatint)
resultsM1 <- as.data.frame(m1)

Solution

  • Here's a tidyverse solution in multiple parts, hopefully easier to read that way :-) I used mtcars as a play dataset with mpg as the invariant independent variable

    library(dplyr)
    library(purrr)
    library(broom)
    library(tibble)
    
    # first key change is let `broom::tidy` do the hard work
    
    test2 <- lapply(2:10, function(i) broom::tidy(lm(mtcars[,i] ~ mtcars[,"mpg"])))
    names(test2) <- names(mtcars[2:10])
    basic_information <-
       map2_df(test2,
               names(test2),
               ~ mutate(.x, which_dependent = .y)) %>%
       mutate(term = ifelse(term == "(Intercept)", "Intercept", "mpg")) %>%
       select(which_dependent, everything())
    
    basic_information
    #> # A tibble: 18 x 6
    #>    which_dependent term      estimate std.error statistic  p.value
    #>    <chr>           <chr>        <dbl>     <dbl>     <dbl>    <dbl>
    #>  1 cyl             Intercept  11.3       0.593      19.0  2.87e-18
    #>  2 cyl             mpg        -0.253     0.0283     -8.92 6.11e-10
    #>  3 disp            Intercept 581.       41.7        13.9  1.26e-14
    #>  4 disp            mpg       -17.4       1.99       -8.75 9.38e-10
    #>  5 hp              Intercept 324.       27.4        11.8  8.25e-13
    #>  6 hp              mpg        -8.83      1.31       -6.74 1.79e- 7
    #>  7 drat            Intercept   2.38      0.248       9.59 1.20e-10
    #>  8 drat            mpg         0.0604    0.0119      5.10 1.78e- 5
    #>  9 wt              Intercept   6.05      0.309      19.6  1.20e-18
    #> 10 wt              mpg        -0.141     0.0147     -9.56 1.29e-10
    #> 11 qsec            Intercept  15.4       1.03       14.9  2.05e-15
    #> 12 qsec            mpg         0.124     0.0492      2.53 1.71e- 2
    #> 13 vs              Intercept  -0.678     0.239      -2.84 8.11e- 3
    #> 14 vs              mpg         0.0555    0.0114      4.86 3.42e- 5
    #> 15 am              Intercept  -0.591     0.253      -2.33 2.64e- 2
    #> 16 am              mpg         0.0497    0.0121      4.11 2.85e- 4
    #> 17 gear            Intercept   2.51      0.411       6.10 1.05e- 6
    #> 18 gear            mpg         0.0588    0.0196      3.00 5.40e- 3
    

    Just to change things up a bit... we'll use map to construct formula

    y <- 'mpg'
    x <- names(mtcars[2:10])
    
    models <- map(setNames(x, x),
                  ~ lm(as.formula(paste(.x, y, sep="~")),
                       data=mtcars))
    
    pvalues <-
       data.frame(rsquared = unlist(map(models, ~ summary(.)$r.squared)),
                  RSE = unlist(map(models, ~ summary(.)$sigma))) %>%
       rownames_to_column(var = "which_dependent")
    
    results <- full_join(basic_information, pvalues)
    
    #> Joining, by = "which_dependent"
    results
    # A tibble: 18 x 8
       which_dependent term      estimate std.error statistic  p.value rsquared    RSE
       <chr>           <chr>        <dbl>     <dbl>     <dbl>    <dbl>    <dbl>  <dbl>
     1 cyl             Intercept  11.3       0.593      19.0  2.87e-18    0.726  0.950
     2 cyl             mpg        -0.253     0.0283     -8.92 6.11e-10    0.726  0.950
     3 disp            Intercept 581.       41.7        13.9  1.26e-14    0.718 66.9  
     4 disp            mpg       -17.4       1.99       -8.75 9.38e-10    0.718 66.9  
     5 hp              Intercept 324.       27.4        11.8  8.25e-13    0.602 43.9  
     6 hp              mpg        -8.83      1.31       -6.74 1.79e- 7    0.602 43.9  
     7 drat            Intercept   2.38      0.248       9.59 1.20e-10    0.464  0.398
     8 drat            mpg         0.0604    0.0119      5.10 1.78e- 5    0.464  0.398
     9 wt              Intercept   6.05      0.309      19.6  1.20e-18    0.753  0.494
    10 wt              mpg        -0.141     0.0147     -9.56 1.29e-10    0.753  0.494
    11 qsec            Intercept  15.4       1.03       14.9  2.05e-15    0.175  1.65 
    12 qsec            mpg         0.124     0.0492      2.53 1.71e- 2    0.175  1.65 
    13 vs              Intercept  -0.678     0.239      -2.84 8.11e- 3    0.441  0.383
    14 vs              mpg         0.0555    0.0114      4.86 3.42e- 5    0.441  0.383
    15 am              Intercept  -0.591     0.253      -2.33 2.64e- 2    0.360  0.406
    16 am              mpg         0.0497    0.0121      4.11 2.85e- 4    0.360  0.406
    17 gear            Intercept   2.51      0.411       6.10 1.05e- 6    0.231  0.658
    18 gear            mpg         0.0588    0.0196      3.00 5.40e- 3    0.231  0.658