Search code examples
rlapplymapply

Referencing column names in lapply, mapply


I have a list of dataframes dat that each contain the following sets of variables that I have saved into vectors of strings:

> vars_2
 [1] "natgas_res_bau"  "fueloil_res_bau" "propane_res_bau" "natgas_res_bau"  "fueloil_res_bau" "propane_res_bau" "natgas_res_bau" 
 [8] "fueloil_res_bau" "propane_res_bau" "natgas_res_bau"  "fueloil_res_bau" "propane_res_bau" "natgas_res_bau"  "fueloil_res_bau"
[15] "propane_res_bau" "natgas_com_bau"  "fueloil_com_bau" "propane_com_bau" "natgas_com_bau"  "fueloil_com_bau" "propane_com_bau"
[22] "natgas_com_bau"  "fueloil_com_bau" "propane_com_bau" "natgas_com_bau"  "fueloil_com_bau" "propane_com_bau" "natgas_com_bau" 
[29] "fueloil_com_bau" "propane_com_bau" "natgas_res_pol"  "fueloil_res_pol" "propane_res_pol" "natgas_res_pol"  "fueloil_res_pol"
[36] "propane_res_pol" "natgas_res_pol"  "fueloil_res_pol" "propane_res_pol" "natgas_res_pol"  "fueloil_res_pol" "propane_res_pol"
[43] "natgas_res_pol"  "fueloil_res_pol" "propane_res_pol" "natgas_com_pol"  "fueloil_com_pol" "propane_com_pol" "natgas_com_pol" 
[50] "fueloil_com_pol" "propane_com_pol" "natgas_com_pol"  "fueloil_com_pol" "propane_com_pol" "natgas_com_pol"  "fueloil_com_pol"
[57] "propane_com_pol" "natgas_com_pol"  "fueloil_com_pol" "propane_com_pol"

> vars_3
 [1] "NH3_res_natgas"        "NH3_res_fueloil"       "NH3_res_propane"       "NOX_res_natgas"        "NOX_res_fueloil"      
 [6] "NOX_res_propane"       "PM25.PRI_res_natgas"   "PM25.PRI_res_fueloil"  "PM25.PRI_res_propane"  "SO2_res_natgas"       
[11] "SO2_res_fueloil"       "SO2_res_propane"       "VOC_res_natgas"        "VOC_res_fueloil"       "VOC_res_propane"      
[16] "NH3_comm_natgas"       "NH3_comm_fueloil"      "NH3_comm_propane"      "NOX_comm_natgas"       "NOX_comm_fueloil"     
[21] "NOX_comm_propane"      "PM25.PRI_comm_natgas"  "PM25.PRI_comm_fueloil" "PM25.PRI_comm_propane" "SO2_comm_natgas"      
[26] "SO2_comm_fueloil"      "SO2_comm_propane"      "VOC_comm_natgas"       "VOC_comm_fueloil"      "VOC_comm_propane"  

I would like to perform a series of arithmetic functions using these two sets of variables and create a new set of variables, the names of which I have also saved into a vector of strings:

> vars_1
 [1] "nh3_natgas_res_bau"    "nh3_fueloil_res_bau"   "nh3_propane_res_bau"   "nox_natgas_res_bau"    "nox_fueloil_res_bau"  
 [6] "nox_propane_res_bau"   "pm25_natgas_res_bau"   "pm25_fueloil_res_bau"  "pm25_propane_res_bau"  "so2_natgas_res_bau"   
[11] "so2_fueloil_res_bau"   "so2_propane_res_bau"   "voc_natgas_res_bau"    "voc_fueloil_res_bau"   "voc_propane_res_bau"  
[16] "nh3_natgas_comm_bau"   "nh3_fueloil_comm_bau"  "nh3_propane_comm_bau"  "nox_natgas_comm_bau"   "nox_fueloil_comm_bau" 
[21] "nox_propane_comm_bau"  "pm25_natgas_comm_bau"  "pm25_fueloil_comm_bau" "pm25_propane_comm_bau" "so2_natgas_comm_bau"  
[26] "so2_fueloil_comm_bau"  "so2_propane_comm_bau"  "voc_natgas_comm_bau"   "voc_fueloil_comm_bau"  "voc_propane_comm_bau" 
[31] "nh3_natgas_res_pol"    "nh3_fueloil_res_pol"   "nh3_propane_res_pol"   "nox_natgas_res_pol"    "nox_fueloil_res_pol"  
[36] "nox_propane_res_pol"   "pm25_natgas_res_pol"   "pm25_fueloil_res_pol"  "pm25_propane_res_pol"  "so2_natgas_res_pol"   
[41] "so2_fueloil_res_pol"   "so2_propane_res_pol"   "voc_natgas_res_pol"    "voc_fueloil_res_pol"   "voc_propane_res_pol"  
[46] "nh3_natgas_comm_pol"   "nh3_fueloil_comm_pol"  "nh3_propane_comm_pol"  "nox_natgas_comm_pol"   "nox_fueloil_comm_pol" 
[51] "nox_propane_comm_pol"  "pm25_natgas_comm_pol"  "pm25_fueloil_comm_pol" "pm25_propane_comm_pol" "so2_natgas_comm_pol"  
[56] "so2_fueloil_comm_pol"  "so2_propane_comm_pol"  "voc_natgas_comm_pol"   "voc_fueloil_comm_pol"  "voc_propane_comm_pol"

The arithmetic functions I would like to perform are these:

[1] "dat$nh3_natgas_res_bau <- dat$natgas_res_bau * dat$NH3_res_natgas"           
 [2] "dat$nh3_fueloil_res_bau <- dat$fueloil_res_bau * dat$NH3_res_fueloil"        
 [3] "dat$nh3_propane_res_bau <- dat$propane_res_bau * dat$NH3_res_propane"        
 [4] "dat$nox_natgas_res_bau <- dat$natgas_res_bau * dat$NOX_res_natgas"           
 [5] "dat$nox_fueloil_res_bau <- dat$fueloil_res_bau * dat$NOX_res_fueloil"        
 [6] "dat$nox_propane_res_bau <- dat$propane_res_bau * dat$NOX_res_propane"        
 [7] "dat$pm25_natgas_res_bau <- dat$natgas_res_bau * dat$PM25.PRI_res_natgas"     
 [8] "dat$pm25_fueloil_res_bau <- dat$fueloil_res_bau * dat$PM25.PRI_res_fueloil"  
 [9] "dat$pm25_propane_res_bau <- dat$propane_res_bau * dat$PM25.PRI_res_propane"  
[10] "dat$so2_natgas_res_bau <- dat$natgas_res_bau * dat$SO2_res_natgas"           
[11] "dat$so2_fueloil_res_bau <- dat$fueloil_res_bau * dat$SO2_res_fueloil"        
[12] "dat$so2_propane_res_bau <- dat$propane_res_bau * dat$SO2_res_propane"        
[13] "dat$voc_natgas_res_bau <- dat$natgas_res_bau * dat$VOC_res_natgas"           
[14] "dat$voc_fueloil_res_bau <- dat$fueloil_res_bau * dat$VOC_res_fueloil"        
[15] "dat$voc_propane_res_bau <- dat$propane_res_bau * dat$VOC_res_propane"        
[16] "dat$nh3_natgas_comm_bau <- dat$natgas_com_bau * dat$NH3_comm_natgas"         
[17] "dat$nh3_fueloil_comm_bau <- dat$fueloil_com_bau * dat$NH3_comm_fueloil"      
[18] "dat$nh3_propane_comm_bau <- dat$propane_com_bau * dat$NH3_comm_propane"      
[19] "dat$nox_natgas_comm_bau <- dat$natgas_com_bau * dat$NOX_comm_natgas"         
[20] "dat$nox_fueloil_comm_bau <- dat$fueloil_com_bau * dat$NOX_comm_fueloil"      
[21] "dat$nox_propane_comm_bau <- dat$propane_com_bau * dat$NOX_comm_propane"      
[22] "dat$pm25_natgas_comm_bau <- dat$natgas_com_bau * dat$PM25.PRI_comm_natgas"   
[23] "dat$pm25_fueloil_comm_bau <- dat$fueloil_com_bau * dat$PM25.PRI_comm_fueloil"
[24] "dat$pm25_propane_comm_bau <- dat$propane_com_bau * dat$PM25.PRI_comm_propane"
[25] "dat$so2_natgas_comm_bau <- dat$natgas_com_bau * dat$SO2_comm_natgas"         
[26] "dat$so2_fueloil_comm_bau <- dat$fueloil_com_bau * dat$SO2_comm_fueloil"      
[27] "dat$so2_propane_comm_bau <- dat$propane_com_bau * dat$SO2_comm_propane"      
[28] "dat$voc_natgas_comm_bau <- dat$natgas_com_bau * dat$VOC_comm_natgas"         
[29] "dat$voc_fueloil_comm_bau <- dat$fueloil_com_bau * dat$VOC_comm_fueloil"      
[30] "dat$voc_propane_comm_bau <- dat$propane_com_bau * dat$VOC_comm_propane"      
[31] "dat$nh3_natgas_res_pol <- dat$natgas_res_pol * dat$NH3_res_natgas"           
[32] "dat$nh3_fueloil_res_pol <- dat$fueloil_res_pol * dat$NH3_res_fueloil"        
[33] "dat$nh3_propane_res_pol <- dat$propane_res_pol * dat$NH3_res_propane"        
[34] "dat$nox_natgas_res_pol <- dat$natgas_res_pol * dat$NOX_res_natgas"           
[35] "dat$nox_fueloil_res_pol <- dat$fueloil_res_pol * dat$NOX_res_fueloil"        
[36] "dat$nox_propane_res_pol <- dat$propane_res_pol * dat$NOX_res_propane"        
[37] "dat$pm25_natgas_res_pol <- dat$natgas_res_pol * dat$PM25.PRI_res_natgas"     
[38] "dat$pm25_fueloil_res_pol <- dat$fueloil_res_pol * dat$PM25.PRI_res_fueloil"  
[39] "dat$pm25_propane_res_pol <- dat$propane_res_pol * dat$PM25.PRI_res_propane"  
[40] "dat$so2_natgas_res_pol <- dat$natgas_res_pol * dat$SO2_res_natgas"           
[41] "dat$so2_fueloil_res_pol <- dat$fueloil_res_pol * dat$SO2_res_fueloil"        
[42] "dat$so2_propane_res_pol <- dat$propane_res_pol * dat$SO2_res_propane"        
[43] "dat$voc_natgas_res_pol <- dat$natgas_res_pol * dat$VOC_res_natgas"           
[44] "dat$voc_fueloil_res_pol <- dat$fueloil_res_pol * dat$VOC_res_fueloil"        
[45] "dat$voc_propane_res_pol <- dat$propane_res_pol * dat$VOC_res_propane"        
[46] "dat$nh3_natgas_comm_pol <- dat$natgas_com_pol * dat$NH3_comm_natgas"         
[47] "dat$nh3_fueloil_comm_pol <- dat$fueloil_com_pol * dat$NH3_comm_fueloil"      
[48] "dat$nh3_propane_comm_pol <- dat$propane_com_pol * dat$NH3_comm_propane"      
[49] "dat$nox_natgas_comm_pol <- dat$natgas_com_pol * dat$NOX_comm_natgas"         
[50] "dat$nox_fueloil_comm_pol <- dat$fueloil_com_pol * dat$NOX_comm_fueloil"      
[51] "dat$nox_propane_comm_pol <- dat$propane_com_pol * dat$NOX_comm_propane"      
[52] "dat$pm25_natgas_comm_pol <- dat$natgas_com_pol * dat$PM25.PRI_comm_natgas"   
[53] "dat$pm25_fueloil_comm_pol <- dat$fueloil_com_pol * dat$PM25.PRI_comm_fueloil" 
[54] "dat$pm25_propane_comm_pol <- dat$propane_com_pol * dat$PM25.PRI_comm_propane"
[55] "dat$so2_natgas_comm_pol <- dat$natgas_com_pol * dat$SO2_comm_natgas"         
[56] "dat$so2_fueloil_comm_pol <- dat$fueloil_com_pol * dat$SO2_comm_fueloil"      
[57] "dat$so2_propane_comm_pol <- dat$propane_com_pol * dat$SO2_comm_propane"      
[58] "dat$voc_natgas_comm_pol <- dat$natgas_com_pol * dat$VOC_comm_natgas"         
[59] "dat$voc_fueloil_comm_pol <- dat$fueloil_com_pol * dat$VOC_comm_fueloil"      
[60] "dat$voc_propane_comm_pol <- dat$propane_com_pol * dat$VOC_comm_propane" 

I would like to perform these functions using a combination of lapply to iterate over each dataframe in the list and mapply to iterate over the sets of variable names. I would think this would work:

dat <- lapply(dat, function(a){
          mapply(function(x,y,z){
            a[,paste0(x)] <- a[,paste0(y)]*a[,paste0(z)]
          }, x = vars_1, y = vars_2, z = vars_3) 
        })

I've also tried:

dat <- lapply(dat, function(a){
          mapply(function(x,y,z){
            a[,x] <- a[,y]*a[,z]
          }, x = vars_1, y = vars_2, z = vars_3) 
        })
dat <- lapply(dat, function(a){
          mapply(function(x,y,z){
            a[[x]] <- a[[y]]*a[[z]]
          }, x = vars_1, y = vars_2, z = vars_3) 
        })

I am getting the following error, however:

Error in `[.data.frame`(a, , paste0(y)) : undefined columns selected

Any thoughts on how to solve the issue?


Solution

  • We could use mutate with across (but it would be lengthy and possibly not efficient) i.e. loop across the 'vars_2' columns, match the corresponding 'vars_3' column by removing the substring from the current column name (cur_column()), get the value of the 'vars_3' column, multiply (*) and modify the column name in .names

    library(dplyr)
    library(stringr)
    dat %>%
        mutate(across(all_of(vars_2), ~ {
            nm1 <- vars_3[str_detect(vars_3, str_remove(cur_column(), "_.*"))]
            nm2 <- tolower(str_remove(nm1, "_.*"))
            . * 
             get(nm1)},
            .names = paste0(nm2, "{.col}"))
    

    Or using Map, subset the columns of interest (assuming the vectors are in the same order), pass that as arguments to Map, do the *, and assign it to new columns based on the 'vars_1' vector values (assuming these are in the same order)

    dat[vars_1] <- Map(`*`, dat[vars_2], dat[vars_3])
    

    Also, we don't need a Map loop as * can be done on equal dimension data

    dat[vars_1] <- dat[vars_2] * dat[vars_3]