Search code examples
rdata.tablesapplymapply

R Run Regressions In Sequence


 set.seed(1)
   data=data.frame(
    
    student=1:5000,
    alfa =runif(5000), 
    bravo =runif(5000), 
    charlie =runif(5000), 
    delta =runif(5000), 
    echo =runif(5000), 
    foxtrot =runif(5000), 
    golf =runif(5000), 
    hotel =runif(5000), 
    india =runif(5000), 
    juliett =runif(5000), 
    kilo =runif(5000), 
    lima =runif(5000), 
    mike =runif(5000), 
    november =runif(5000), 
    oscar =runif(5000), 
    papa =runif(5000), 
    GROUP = sample(0:1, r=T))

##########################

COVS1 = c(golf, hotel, india)
COVS2 = c(juliett, kilo)
COVS3 = c(lima, mike, november, oscar, papa)


## OVERALL
lm(alpha ~ delta +  golf + hotel + india, data = data)
lm(alpha ~ delta +  golf + hotel + india + juliett + kilo, data = data)
lm(alpha ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data = data)
lm(alpha ~ echo +  golf + hotel + india, data = data)
lm(alpha ~ echo +  golf + hotel + india + juliett + kilo, data = data)
lm(alpha ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data = data)
lm(alpha ~ foxtrot +  golf + hotel + india, data = data)
lm(alpha ~ foxtrot +  golf + hotel + india + juliett + kilo, data = data)
lm(alpha ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data = data)
lm(bravo ~ delta +  golf + hotel + india, data = data)
lm(bravo ~ delta +  golf + hotel + india + juliett + kilo, data = data)
lm(bravo ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data = data)
lm(bravo ~ echo +  golf + hotel + india, data = data)
lm(bravo ~ echo +  golf + hotel + india + juliett + kilo, data = data)
lm(bravo ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data = data)
lm(bravo ~ foxtrot +  golf + hotel + india, data = data)
lm(bravo ~ foxtrot +  golf + hotel + india + juliett + kilo, data = data)
lm(bravo ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data = data)
lm(charlie ~ delta +  golf + hotel + india, data = data)
lm(charlie ~ delta +  golf + hotel + india + juliett + kilo, data = data)
lm(charlie ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data = data)
lm(charlie ~ echo +  golf + hotel + india, data = data)
lm(charlie ~ echo +  golf + hotel + india + juliett + kilo, data = data)
lm(charlie ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data = data)
lm(charlie ~ foxtrot +  golf + hotel + india, data = data)
lm(charlie ~ foxtrot +  golf + hotel + india + juliett + kilo, data = data)
lm(charlie ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data = data)

## GROUP == 0
lm(alpha ~ delta +  golf + hotel + india, data ~ subset(data, GROUP == 0)
lm(alpha ~ delta +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 0)
lm(alpha ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 0)
lm(alpha ~ echo +  golf + hotel + india, data ~ subset(data, GROUP == 0)
lm(alpha ~ echo +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 0)
lm(alpha ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 0)
lm(alpha ~ foxtrot +  golf + hotel + india, data ~ subset(data, GROUP == 0)
lm(alpha ~ foxtrot +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 0)
lm(alpha ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 0)
lm(bravo ~ delta +  golf + hotel + india, data ~ subset(data, GROUP == 0)
lm(bravo ~ delta +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 0)
lm(bravo ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 0)
lm(bravo ~ echo +  golf + hotel + india, data ~ subset(data, GROUP == 0)
lm(bravo ~ echo +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 0)
lm(bravo ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 0)
lm(bravo ~ foxtrot +  golf + hotel + india, data ~ subset(data, GROUP == 0)
lm(bravo ~ foxtrot +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 0)
lm(bravo ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 0)
lm(charlie ~ delta +  golf + hotel + india, data ~ subset(data, GROUP == 0)
lm(charlie ~ delta +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 0)
lm(charlie ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 0)
lm(charlie ~ echo +  golf + hotel + india, data ~ subset(data, GROUP == 0)
lm(charlie ~ echo +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 0)
lm(charlie ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 0)
lm(charlie ~ foxtrot +  golf + hotel + india, data ~ subset(data, GROUP == 0)
lm(charlie ~ foxtrot +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 0)
lm(charlie ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 0)

## GROUP == 1
lm(alpha ~ delta +  golf + hotel + india, data ~ subset(data, GROUP == 1)
lm(alpha ~ delta +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 1)
lm(alpha ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 1)
lm(alpha ~ echo +  golf + hotel + india, data ~ subset(data, GROUP == 1)
lm(alpha ~ echo +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 1)
lm(alpha ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 1)
lm(alpha ~ foxtrot +  golf + hotel + india, data ~ subset(data, GROUP == 1)
lm(alpha ~ foxtrot +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 1)
lm(alpha ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 1)
lm(bravo ~ delta +  golf + hotel + india, data ~ subset(data, GROUP == 1)
lm(bravo ~ delta +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 1)
lm(bravo ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 1)
lm(bravo ~ echo +  golf + hotel + india, data ~ subset(data, GROUP == 1)
lm(bravo ~ echo +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 1)
lm(bravo ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 1)
lm(bravo ~ foxtrot +  golf + hotel + india, data ~ subset(data, GROUP == 1)
lm(bravo ~ foxtrot +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 1)
lm(bravo ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 1)
lm(charlie ~ delta +  golf + hotel + india, data ~ subset(data, GROUP == 1)
lm(charlie ~ delta +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 1)
lm(charlie ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 1)
lm(charlie ~ echo +  golf + hotel + india, data ~ subset(data, GROUP == 1)
lm(charlie ~ echo +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 1)
lm(charlie ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 1)
lm(charlie ~ foxtrot +  golf + hotel + india, data ~ subset(data, GROUP == 1)
lm(charlie ~ foxtrot +  golf + hotel + india + juliett + kilo, data ~ subset(data, GROUP == 1)
lm(charlie ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa, data ~ subset(data, GROUP == 1)

I am wondering, how it can be possible to run all of these regressions using some kind of function? Basically the regressions regress 'alpha' on COVS1, then 'alpha' on COVS1 and COVS2 and then 'alpha' on COVS1 and COVS2 and COVS3. This is done for the whole data and then separately for GROUP == 0 and GROUP == 1. The same process is repeated where you replace 'alpha' with 'bravo' and then 'charlie' so I just wish to run all of these, then store the model and coefficient estimates and standard errors for all output..


Solution

  • We could create a loop to do this

    COVS1 <- c('golf', 'hotel', 'india')
    COVS2 <- c('juliett', 'kilo')
    COVS3 <- c('lima', 'mike', 'november', 'oscar', 'papa')
        
    COVS <- list(COVS1, COVS2, COVS3)
        
    df1 <- expand.grid(resp = c('alfa', 'bravo', 'charlie'),
         pred = c('delta', 'echo', 'foxtrot'), stringsAsFactors = FALSE)
        
    df1 <- df1[order(df1$resp),]
    
    
    
    library(broom)
    lst_mod <-  lapply(seq_along(COVS), function(i) 
          lapply(seq_along(COVS), function(j) {
    
          fmla <- reformulate(c(df1$pred[i], unlist(COVS[seq_len(j)])), 
                response = df1$resp[i])
          tidy(lm(fmla, data = data))
     }))
    

    Or if we need a single dataset

    library(dplyr)
    library(purrr)
    out_dat <- lapply(seq_along(COVS), function(i) 
      map_dfr(seq_along(COVS), ~ {
    
      fmla <- reformulate(c(df1$pred[i], unlist(COVS[seq_len(.x)])), 
            response = df1$resp[i])
      tidy(lm(fmla, data = data)) %>% 
            mutate(response_variable = df1$resp[i], formula = list(fmla))
        })) %>%
           bind_rows
    

    If we need to apply the lm on the subset of the data, subset the data

    dat0 <- subset(data, GROUP == 0)
    dat1 <- subset(data, GROUP == 1)
    

    and change the data in lm with 'dat0', 'dat1'