Search code examples
rdataframepredict

Coalescing columns more efficiently in R


I have 2 dataframes - the first dataframe (df1) has columns with values at different times of the year; these columns are ones that don't have stationary or Air in the column name. I used a linear model to predict the rest of the values for the year - which I created into a second data frame (df2).

df1 = df = structure(list(Date_Time_GMT_3 = 
                      structure(c(1622552400, 1622553300,1622554200, 1622555100, 1622556000, 1622556900), 
                                class = c("POSIXct","POSIXt"), 
                                tzone = "EST"),
                    X20819830_R1AR_U_Stationary = c(NA_real_, NA_real_, NA_real_, 16.808, 16.713, 17.753), 
                    X20819742_R1AR_S_Stationary = c(16.903, 16.828, 16.808, NA_real_, NA_real_, NA_real_), 
                    X20822215_R3AR_U_Stationary = c(NA_real_, NA_real_, NA_real_, 13.942, 13.942, 13.846), 
                    X20822215_R3AR_S_Stationary = c(13.942, 13.972, 13.842, NA_real_, NA_real_, NA_real_), 
                    X20874235_R4AR_U_Stationary = c(NA_real_, NA_real_, NA_real_, 14.134, 14.534, 14.404), 
                    X20874235_R4AR_S_Stationary = c(14.23, 14.23, 14.134, NA_real_, NA_real_, NA_real_), 
                    X20874311_F1AR_U_Stationary = c(NA_real_, NA_real_, NA_real_, 15.187, 15.327, 15.567), 
                    X20874311_F1AR_S_Stationary = c(15.282, 15.387, 15.587, NA_real_, NA_real_, NA_real_), 
                    X20817727_F8AR_U = c(15.421, 14.441, 14.631, 14.781, 15.521, 15.821), 
                    X20819742_X1AR_U = c(14.996, 15.996, 14.776, 14.920, 14.870, 14.235), 
                    X20819742_R2AR_U = c(14.781, 15.521, 15.821, NA_real_, NA_real_, NA_real_), 
                    X20817727_R5AR_U = c(NA_real_, NA_real_, NA_real_, 13.942, 13.942, 13.846), 
                    X20817727_R7AR = c(14.23, 14.23, 14.134, NA_real_, NA_real_, NA_real_)), 
               row.names = c(NA, 6L), class = "data.frame")
df2 = structure(list(Date_Time_GMT_3 = 
                      structure(c(1622552400, 1622553300,1622554200, 1622555100, 1622556000, 1622556900), 
                                class = c("POSIXct","POSIXt"), 
                                tzone = "EST"),
                    Predicted_X20817727_F8AR_U = c(17.421, 15.441, 17.631, 15.781, 15.001, 16.821), 
                    Predicted_X20819742_X1AR_U = c(15.596, 17.996, 13.676, 13.620, 12.860, 13.245), 
                    Predicted_X20819742_R2AR_U = c(14.781, 15.521, 15.821, 17.421, 15.441, 17.631), 
                    Predicted_X20817727_R5AR_U = c(15.596, 17.996, 13.676, 13.620, 12.860, 13.245), 
                    Predicted_X20817727_R7AR = c(13.942, 13.942, 13.846, 17.421, 15.441, 17.631)), 
               row.names = c(NA, 6L), class = "data.frame")

I am trying to add the column values from df2 to matching column names and matching Date_Time into df1 where there are no values (i.e. NA), and maintain the original values that are already recorded in the df1. The code I'm using now works, but I was wondering if there was a more efficient way of doing it, without having to type out each column name. Here is the code I've been using

###cOMBINE the predicted columns to the mobile loggers so that values in mobile loggers are preserved
df1$Predicted_F8AR = df2$Predicted_X20817727_F8AR_U
df1$Predicted_R2AR = df2$Predicted_X20819742_R2AR_U
df1$Predicted_R5AR = df2$Predicted_X20817727_R5AR_U
df1$Predicted_X1AR = df2$Predicted_X20819742_X1AR_U









###cOMBINE the predicted columns to the mobile loggers so that values in mobile loggers are preserved
F8AR_U = df1 %>% 
  mutate(F8AR_U = coalesce(X20817727_F8AR_U,Predicted_F8AR)) %>%
  select(X20817727_F8AR_U, F8AR_U)
df1$X20817727_F8AR_U = F8AR_U$F8AR_U


R2AR_U = df1 %>% 
  mutate(R2AR_U = coalesce(X20819742_R2AR_U,Predicted_R2AR)) %>%
  select(X20819742_R2AR_U, R2AR_U)
df1$X20819742_R2AR_U = R2AR_U$R2AR_U


R5AR_U = df1 %>% 
  mutate(R5AR_U = coalesce(X20817727_R5AR_U,Predicted_R5AR)) %>%
  select(X20817727_R5AR_U, R5AR_U)
df1$X20817727_R5AR_U = R5AR_U$R5AR_U


X1AR_U = df1 %>% 
  mutate(X1AR_U = coalesce(X20819742_X1AR_U,Predicted_X1AR)) %>%
  select(X20819742_X1AR_U, X1AR_U)
df1$`X20819742_X1AR_U` = X1AR_U$X1AR_U


#gET RID OF PREDICTED COLUMNS FOR FINAL TABLE Export
df1 = df1[,c(1:13)]


any ideas?


Solution

  • consider:

    df1 %>%
      pivot_longer(-Date_Time_GMT_3, names_to = c('name1', 'name', 'name2'),
                   names_pattern = '(.*?)_([^_]+)_(.*)') %>%
      left_join(df2 %>%
       pivot_longer(-Date_Time_GMT_3,names_pattern = 'Predicted_[^_]+_([^_]+)') , 
       by = c('Date_Time_GMT_3', 'name'))%>%
      mutate(value=coalesce(value.x, value.y)) %>%
      pivot_wider(Date_Time_GMT_3, names_from = c(name1, name, name2))