I am trying to add a column to a dataframe based on criteria from two columns in this original dataframe and three columns of a second dataframe. The new column would simply have an 'a' added to the end of the values in one column.
Reproducible example: actual df1 has > 80k rows & 16 columns; df2 7 rows & 6 columns
df1 <- data.frame(Date = as.Date(c("2024-01-01", "2024-02-01", "2024-03-01", "2024-04-01", "2024-05-01", "2024-06-01", "2024-07-01")),
RT = c("150.5-40", "150.5-40", "150.5-40", "150.5-40", "150.5-140", "150.5-80", "150.5-80"),
RTa = c("150.5-40a", "150.5-40a", "150.5-40a", "150.5-40a", "150.5- 140a", "150.5-80a", "150.5-80a"))
df1
Date RT RTa
2024-01-01 150.5-40 150.5-40a
2024-02-01 150.5-40 150.5-40a
2024-03-01 150.5-40 150.5-40a
2024-04-01 150.5-40 150.5-40a
2024-05-01 150.5-140 150.5-140a
2024-06-01 150.5-80 150.5-80a
2024-07-01 150.5-80 150.5-80a
df2 <- data.frame(Date = as.Date(c("2024-03-01", "2024-04-01")),
RT = c("150.5-40", "150.5-10"),
MRT = c("150.5-140", "150.5-110"))
df2
Date RT MRT
2024-03-01 150.5-40 150.5-140
2024-04-01 150.5-10 150.5-110
Desired results:
Date RT RTa RT_F
2024-01-01 150.5-40 150.5-40a 150.5-40
2024-02-01 150.5-40 150.5-40a 150.5-40
2024-03-01 150.5-40 150.5-40a 150.5-40a
2024-04-01 150.5-40 150.5-40a 150.5-40a
2024-05-01 150.5-140 150.5-140a 150.5-140a
2024-06-01 150.5-80 150.5-80a 150.5-80
2024-07-01 150.5-80 150.5-80a 150.5-80
I have tried using dplyr::mutate as follows:
df1 %>%
dplyr::mutate(RT_F = if_else(RT == df2$RT | RT == df2$MRT & Date >= df2$Date, RTa, RT))
which produces the following inaccurate results:
Date RT RTa RT_F
2024-01-01 150.5-40 150.5-40a 150.5-40a
2024-02-01 150.5-40 150.5-40a 150.5-40
2024-03-01 150.5-40 150.5-40a 150.5-40a
2024-04-01 150.5-40 150.5-40a 150.5-40
2024-05-01 150.5-140 150.5-140a 150.5-140a
2024-06-01 150.5-80 150.5-80a 150.5-80
2024-07-01 150.5-80 150.5-80a 150.5-80
I have also tried:
for (i in 1:nrow(df1)) { # For every row in df1
# # Select if df1 RT = df2 RT
if (df1[i, "RT"] %in% df2$"RT") { # If RT is in df2
df[i, "RTF"] <- df2$RTa # Add RTa to the RTF column
}
}
which produces the following error:
Error in df[i, "RTF"] <- df2$RTa : object of type 'closure' is not subsettable
Also tried the following with no success
df1 %>%
dplyr::mutate(RTF = case_when(
RT == df2$RT | RT == df2$MRT & Date >= df2$Date ~ RT,
TRUE ~ RTa
))
Based on the logic in your dplyr
code, one way to achieve your desired output is to use dplyr::left_join()
to combine df1 and df2, then apply the date condition.
Converting your df2 object to long form using tidyr::pivot_longer()
prior to joining will simplify the process.
First, load required packages and data (modified as there are errors in your example data):
library(dplyr)
library(tidyr)
df1 <- data.frame(
Date = as.Date(c("2024-01-01", "2024-02-01", "2024-03-01",
"2024-04-01", "2024-05-01", "2024-06-01", "2024-07-01")),
RT = c("150.5-40", "150.5-40", "150.5-40",
"150.5-40", "150.5-140", "150.5-80", "150.5-80"),
RTa = c("150.5-40a", "150.5-40a", "150.5-40a",
"150.5-40a", "150.5-140a", "150.5-80a", "150.5-80a")
)
df2 <- data.frame(
Date = as.Date(c("2024-03-01", "2024-04-01")),
RT = c("150.5-40", "150.5-10"),
MRT = c("150.5-140", "150.5-110")
)
then pivot df2 to long form:
df3 <- df2 |>
pivot_longer(-Date,
names_to = "Type",
values_to = "Code")
df3
# # A tibble: 4 × 3
# Date Type Code
# <date> <chr> <chr>
# 1 2024-03-01 RT 150.5-40
# 2 2024-03-01 MRT 150.5-140
# 3 2024-04-01 RT 150.5-10
# 4 2024-04-01 MRT 150.5-110
and finally, join df1 and df3 and create a new column where your date condition is applied. Based on your comment, you can use case_when(.default = RT)
. This will ensure all other values in the RTF column are from RT, in this case accounting for Date.x < Date.y and any NA values introduced from the left join:
df4 <- df1 |>
left_join(df3, by = join_by(RT == Code)) |>
mutate(RTF = case_when(Date.x >= Date.y ~ RTa,
.default = RT))|>
select(Date = Date.x, RT, RTa, RTF)
df4
# Date RT RTa RTF
# 1 2024-01-01 150.5-40 150.5-40a 150.5-40
# 2 2024-02-01 150.5-40 150.5-40a 150.5-40
# 3 2024-03-01 150.5-40 150.5-40a 150.5-40a
# 4 2024-04-01 150.5-40 150.5-40a 150.5-40a
# 5 2024-05-01 150.5-140 150.5-140a 150.5-140a
# 6 2024-06-01 150.5-80 150.5-80a 150.5-80
# 7 2024-07-01 150.5-80 150.5-80a 150.5-80