I have a list of dataframes dat
that each contain the following sets of variables that I have saved into vectors of strings:
> vars_2
[1] "natgas_res_bau" "fueloil_res_bau" "propane_res_bau" "natgas_res_bau" "fueloil_res_bau" "propane_res_bau" "natgas_res_bau"
[8] "fueloil_res_bau" "propane_res_bau" "natgas_res_bau" "fueloil_res_bau" "propane_res_bau" "natgas_res_bau" "fueloil_res_bau"
[15] "propane_res_bau" "natgas_com_bau" "fueloil_com_bau" "propane_com_bau" "natgas_com_bau" "fueloil_com_bau" "propane_com_bau"
[22] "natgas_com_bau" "fueloil_com_bau" "propane_com_bau" "natgas_com_bau" "fueloil_com_bau" "propane_com_bau" "natgas_com_bau"
[29] "fueloil_com_bau" "propane_com_bau" "natgas_res_pol" "fueloil_res_pol" "propane_res_pol" "natgas_res_pol" "fueloil_res_pol"
[36] "propane_res_pol" "natgas_res_pol" "fueloil_res_pol" "propane_res_pol" "natgas_res_pol" "fueloil_res_pol" "propane_res_pol"
[43] "natgas_res_pol" "fueloil_res_pol" "propane_res_pol" "natgas_com_pol" "fueloil_com_pol" "propane_com_pol" "natgas_com_pol"
[50] "fueloil_com_pol" "propane_com_pol" "natgas_com_pol" "fueloil_com_pol" "propane_com_pol" "natgas_com_pol" "fueloil_com_pol"
[57] "propane_com_pol" "natgas_com_pol" "fueloil_com_pol" "propane_com_pol"
> vars_3
[1] "NH3_res_natgas" "NH3_res_fueloil" "NH3_res_propane" "NOX_res_natgas" "NOX_res_fueloil"
[6] "NOX_res_propane" "PM25.PRI_res_natgas" "PM25.PRI_res_fueloil" "PM25.PRI_res_propane" "SO2_res_natgas"
[11] "SO2_res_fueloil" "SO2_res_propane" "VOC_res_natgas" "VOC_res_fueloil" "VOC_res_propane"
[16] "NH3_comm_natgas" "NH3_comm_fueloil" "NH3_comm_propane" "NOX_comm_natgas" "NOX_comm_fueloil"
[21] "NOX_comm_propane" "PM25.PRI_comm_natgas" "PM25.PRI_comm_fueloil" "PM25.PRI_comm_propane" "SO2_comm_natgas"
[26] "SO2_comm_fueloil" "SO2_comm_propane" "VOC_comm_natgas" "VOC_comm_fueloil" "VOC_comm_propane"
I would like to perform a series of arithmetic functions using these two sets of variables and create a new set of variables, the names of which I have also saved into a vector of strings:
> vars_1
[1] "nh3_natgas_res_bau" "nh3_fueloil_res_bau" "nh3_propane_res_bau" "nox_natgas_res_bau" "nox_fueloil_res_bau"
[6] "nox_propane_res_bau" "pm25_natgas_res_bau" "pm25_fueloil_res_bau" "pm25_propane_res_bau" "so2_natgas_res_bau"
[11] "so2_fueloil_res_bau" "so2_propane_res_bau" "voc_natgas_res_bau" "voc_fueloil_res_bau" "voc_propane_res_bau"
[16] "nh3_natgas_comm_bau" "nh3_fueloil_comm_bau" "nh3_propane_comm_bau" "nox_natgas_comm_bau" "nox_fueloil_comm_bau"
[21] "nox_propane_comm_bau" "pm25_natgas_comm_bau" "pm25_fueloil_comm_bau" "pm25_propane_comm_bau" "so2_natgas_comm_bau"
[26] "so2_fueloil_comm_bau" "so2_propane_comm_bau" "voc_natgas_comm_bau" "voc_fueloil_comm_bau" "voc_propane_comm_bau"
[31] "nh3_natgas_res_pol" "nh3_fueloil_res_pol" "nh3_propane_res_pol" "nox_natgas_res_pol" "nox_fueloil_res_pol"
[36] "nox_propane_res_pol" "pm25_natgas_res_pol" "pm25_fueloil_res_pol" "pm25_propane_res_pol" "so2_natgas_res_pol"
[41] "so2_fueloil_res_pol" "so2_propane_res_pol" "voc_natgas_res_pol" "voc_fueloil_res_pol" "voc_propane_res_pol"
[46] "nh3_natgas_comm_pol" "nh3_fueloil_comm_pol" "nh3_propane_comm_pol" "nox_natgas_comm_pol" "nox_fueloil_comm_pol"
[51] "nox_propane_comm_pol" "pm25_natgas_comm_pol" "pm25_fueloil_comm_pol" "pm25_propane_comm_pol" "so2_natgas_comm_pol"
[56] "so2_fueloil_comm_pol" "so2_propane_comm_pol" "voc_natgas_comm_pol" "voc_fueloil_comm_pol" "voc_propane_comm_pol"
The arithmetic functions I would like to perform are these:
[1] "dat$nh3_natgas_res_bau <- dat$natgas_res_bau * dat$NH3_res_natgas"
[2] "dat$nh3_fueloil_res_bau <- dat$fueloil_res_bau * dat$NH3_res_fueloil"
[3] "dat$nh3_propane_res_bau <- dat$propane_res_bau * dat$NH3_res_propane"
[4] "dat$nox_natgas_res_bau <- dat$natgas_res_bau * dat$NOX_res_natgas"
[5] "dat$nox_fueloil_res_bau <- dat$fueloil_res_bau * dat$NOX_res_fueloil"
[6] "dat$nox_propane_res_bau <- dat$propane_res_bau * dat$NOX_res_propane"
[7] "dat$pm25_natgas_res_bau <- dat$natgas_res_bau * dat$PM25.PRI_res_natgas"
[8] "dat$pm25_fueloil_res_bau <- dat$fueloil_res_bau * dat$PM25.PRI_res_fueloil"
[9] "dat$pm25_propane_res_bau <- dat$propane_res_bau * dat$PM25.PRI_res_propane"
[10] "dat$so2_natgas_res_bau <- dat$natgas_res_bau * dat$SO2_res_natgas"
[11] "dat$so2_fueloil_res_bau <- dat$fueloil_res_bau * dat$SO2_res_fueloil"
[12] "dat$so2_propane_res_bau <- dat$propane_res_bau * dat$SO2_res_propane"
[13] "dat$voc_natgas_res_bau <- dat$natgas_res_bau * dat$VOC_res_natgas"
[14] "dat$voc_fueloil_res_bau <- dat$fueloil_res_bau * dat$VOC_res_fueloil"
[15] "dat$voc_propane_res_bau <- dat$propane_res_bau * dat$VOC_res_propane"
[16] "dat$nh3_natgas_comm_bau <- dat$natgas_com_bau * dat$NH3_comm_natgas"
[17] "dat$nh3_fueloil_comm_bau <- dat$fueloil_com_bau * dat$NH3_comm_fueloil"
[18] "dat$nh3_propane_comm_bau <- dat$propane_com_bau * dat$NH3_comm_propane"
[19] "dat$nox_natgas_comm_bau <- dat$natgas_com_bau * dat$NOX_comm_natgas"
[20] "dat$nox_fueloil_comm_bau <- dat$fueloil_com_bau * dat$NOX_comm_fueloil"
[21] "dat$nox_propane_comm_bau <- dat$propane_com_bau * dat$NOX_comm_propane"
[22] "dat$pm25_natgas_comm_bau <- dat$natgas_com_bau * dat$PM25.PRI_comm_natgas"
[23] "dat$pm25_fueloil_comm_bau <- dat$fueloil_com_bau * dat$PM25.PRI_comm_fueloil"
[24] "dat$pm25_propane_comm_bau <- dat$propane_com_bau * dat$PM25.PRI_comm_propane"
[25] "dat$so2_natgas_comm_bau <- dat$natgas_com_bau * dat$SO2_comm_natgas"
[26] "dat$so2_fueloil_comm_bau <- dat$fueloil_com_bau * dat$SO2_comm_fueloil"
[27] "dat$so2_propane_comm_bau <- dat$propane_com_bau * dat$SO2_comm_propane"
[28] "dat$voc_natgas_comm_bau <- dat$natgas_com_bau * dat$VOC_comm_natgas"
[29] "dat$voc_fueloil_comm_bau <- dat$fueloil_com_bau * dat$VOC_comm_fueloil"
[30] "dat$voc_propane_comm_bau <- dat$propane_com_bau * dat$VOC_comm_propane"
[31] "dat$nh3_natgas_res_pol <- dat$natgas_res_pol * dat$NH3_res_natgas"
[32] "dat$nh3_fueloil_res_pol <- dat$fueloil_res_pol * dat$NH3_res_fueloil"
[33] "dat$nh3_propane_res_pol <- dat$propane_res_pol * dat$NH3_res_propane"
[34] "dat$nox_natgas_res_pol <- dat$natgas_res_pol * dat$NOX_res_natgas"
[35] "dat$nox_fueloil_res_pol <- dat$fueloil_res_pol * dat$NOX_res_fueloil"
[36] "dat$nox_propane_res_pol <- dat$propane_res_pol * dat$NOX_res_propane"
[37] "dat$pm25_natgas_res_pol <- dat$natgas_res_pol * dat$PM25.PRI_res_natgas"
[38] "dat$pm25_fueloil_res_pol <- dat$fueloil_res_pol * dat$PM25.PRI_res_fueloil"
[39] "dat$pm25_propane_res_pol <- dat$propane_res_pol * dat$PM25.PRI_res_propane"
[40] "dat$so2_natgas_res_pol <- dat$natgas_res_pol * dat$SO2_res_natgas"
[41] "dat$so2_fueloil_res_pol <- dat$fueloil_res_pol * dat$SO2_res_fueloil"
[42] "dat$so2_propane_res_pol <- dat$propane_res_pol * dat$SO2_res_propane"
[43] "dat$voc_natgas_res_pol <- dat$natgas_res_pol * dat$VOC_res_natgas"
[44] "dat$voc_fueloil_res_pol <- dat$fueloil_res_pol * dat$VOC_res_fueloil"
[45] "dat$voc_propane_res_pol <- dat$propane_res_pol * dat$VOC_res_propane"
[46] "dat$nh3_natgas_comm_pol <- dat$natgas_com_pol * dat$NH3_comm_natgas"
[47] "dat$nh3_fueloil_comm_pol <- dat$fueloil_com_pol * dat$NH3_comm_fueloil"
[48] "dat$nh3_propane_comm_pol <- dat$propane_com_pol * dat$NH3_comm_propane"
[49] "dat$nox_natgas_comm_pol <- dat$natgas_com_pol * dat$NOX_comm_natgas"
[50] "dat$nox_fueloil_comm_pol <- dat$fueloil_com_pol * dat$NOX_comm_fueloil"
[51] "dat$nox_propane_comm_pol <- dat$propane_com_pol * dat$NOX_comm_propane"
[52] "dat$pm25_natgas_comm_pol <- dat$natgas_com_pol * dat$PM25.PRI_comm_natgas"
[53] "dat$pm25_fueloil_comm_pol <- dat$fueloil_com_pol * dat$PM25.PRI_comm_fueloil"
[54] "dat$pm25_propane_comm_pol <- dat$propane_com_pol * dat$PM25.PRI_comm_propane"
[55] "dat$so2_natgas_comm_pol <- dat$natgas_com_pol * dat$SO2_comm_natgas"
[56] "dat$so2_fueloil_comm_pol <- dat$fueloil_com_pol * dat$SO2_comm_fueloil"
[57] "dat$so2_propane_comm_pol <- dat$propane_com_pol * dat$SO2_comm_propane"
[58] "dat$voc_natgas_comm_pol <- dat$natgas_com_pol * dat$VOC_comm_natgas"
[59] "dat$voc_fueloil_comm_pol <- dat$fueloil_com_pol * dat$VOC_comm_fueloil"
[60] "dat$voc_propane_comm_pol <- dat$propane_com_pol * dat$VOC_comm_propane"
I would like to perform these functions using a combination of lapply to iterate over each dataframe in the list and mapply to iterate over the sets of variable names. I would think this would work:
dat <- lapply(dat, function(a){
mapply(function(x,y,z){
a[,paste0(x)] <- a[,paste0(y)]*a[,paste0(z)]
}, x = vars_1, y = vars_2, z = vars_3)
})
I've also tried:
dat <- lapply(dat, function(a){
mapply(function(x,y,z){
a[,x] <- a[,y]*a[,z]
}, x = vars_1, y = vars_2, z = vars_3)
})
dat <- lapply(dat, function(a){
mapply(function(x,y,z){
a[[x]] <- a[[y]]*a[[z]]
}, x = vars_1, y = vars_2, z = vars_3)
})
I am getting the following error, however:
Error in `[.data.frame`(a, , paste0(y)) : undefined columns selected
Any thoughts on how to solve the issue?
We could use mutate
with across
(but it would be lengthy and possibly not efficient) i.e. loop across
the 'vars_2' columns, match the corresponding 'vars_3' column by removing the substring from the current column name (cur_column()
), get
the value of the 'vars_3' column, multiply (*
) and modify the column name in .names
library(dplyr)
library(stringr)
dat %>%
mutate(across(all_of(vars_2), ~ {
nm1 <- vars_3[str_detect(vars_3, str_remove(cur_column(), "_.*"))]
nm2 <- tolower(str_remove(nm1, "_.*"))
. *
get(nm1)},
.names = paste0(nm2, "{.col}"))
Or using Map
, subset the columns of interest (assuming the vectors are in the same order), pass that as arguments to Map
, do the *
, and assign it to new columns based on the 'vars_1' vector values (assuming these are in the same order)
dat[vars_1] <- Map(`*`, dat[vars_2], dat[vars_3])
Also, we don't need a Map
loop as *
can be done on equal dimension data
dat[vars_1] <- dat[vars_2] * dat[vars_3]