I'm trying to change the variable "exposure" according to multiple conditions.
For example: If stimulus_content is "neg", and if condition is "neg", and if set is "A", then the content of the variable "exposure" should be changed to "long" for the lines in which stimulus_no is either X1, X2, ... or X5. The variable "exposure should be changed to "short" for the lines in which stimulus_no is either X6, X7, ... or X10. And so on...
I hope the code below makes the problem more clear.
First, this is the approximate dataset:
n <- 6
dataset <- data.frame(
participant = rep(1:n, each=40),
condition = rep(c("pos","neg"), each=40),
set = rep(c("A","B"), each=40),
stimulus_content = rep(c("pos","neg"), each=2),
stimulus_no = rep(c("X1","X10","X11","X12","X13","X14","X15","X16","X17","X18","X19","X2","X20","X3","X4","X5","X6","X7","X8","X9"), each=2),
exposure = NA)
The first thing we tried is via a loop. For the sake of simplicity, only one part of the loop is included. It doesn't return an error but it also doesn't do anything.
for (i in 1:length(longdat[,1])){
if(longdat[i,"stimulus_content"] == "pos") {
if(longdat[i,"condition"] == "pos") {
if(longdat[i,"set"] == "A") {
for(stimulus_no in c("X1","X2","X3","X4","X5")){longdat[i,"exposure"] == "long"}
for(stimulus_no in c("X6","X7","X8","X9","X10")){longdat[i,"exposure"] == "short"}
for(stimulus_no in c("X11","X12","X13","X14","X15","X16","X17","X18","X19","X20")){longdat[i,"exposure"] == "none"}
} else { #for condition = pos and set != A
for(stimulus_no in c("X11","X12","X13","X14","X15")){longdat[i,"exposure"] == "long"}
for(stimulus_no in c("X16","X17","X18","X19","X20")){longdat[i,"exposure"] == "short"}
for(stimulus_no in c("X1","X2","X3","X4","X5","X6","X7","X8","X9","X10")){longdat[i,"exposure"] == "none"}
}
}
}
}
Next, we tried via mutate and case_when. This code does exactly what it's supposed to but it's almost 100 lines long! Please find an excerpt below.
longdat2 <- longdat %>%
mutate(exposure = case_when(
# Condition pos, set A
stimulus_no=="X1" & stimulus_content=="pos" & condition=="pos" & set=="A" ~ "long",
stimulus_no=="X2" & stimulus_content=="pos" & condition=="pos" & set=="A" ~ "long",
# ...
stimulus_no=="X9" & stimulus_content=="pos" & condition=="pos" & set=="A" ~ "short",
stimulus_no=="X10" & stimulus_content=="pos" & condition=="pos" & set=="A" ~ "short",
stimulus_no=="X11" & stimulus_content=="pos" & condition=="pos" & set=="A" ~ "none",
# ... accordingly for condition pos and set B, and for condition neg and set A
# and eventually for condition neg and set B
stimulus_no=="X18" & stimulus_content=="neg" & condition=="neg" & set=="B" ~ "short",
stimulus_no=="X19" & stimulus_content=="neg" & condition=="neg" & set=="B" ~ "short",
stimulus_no=="X20" & stimulus_content=="neg" & condition=="neg" & set=="B" ~ "short",
)
)
If someone manages to spot the error in the loop or could tell me a more succinct version of the second (or first) option, I'd be very grateful!
Thanks a lot in advance!
You can simplify your second solution using %in%
operator and inverse condition for the else
part :
dataset2 <- dataset %>%
mutate(exposure = case_when(
# Condition pos, set A
(stimulus_content=="pos" & condition=="pos" & set=="A") & stimulus_no %in% c("X1","X2","X3","X4","X5") ~ "long",
(stimulus_content=="pos" & condition=="pos" & set=="A") & stimulus_no %in% c("X6","X7","X8","X9","X10") ~ "short",
(stimulus_content=="pos" & condition=="pos" & set=="A") & stimulus_no %in% c("X11","X12","X13","X14","X15","X16","X17","X18","X19","X20") ~ "none",
# else
!(stimulus_content=="pos" & condition=="pos" & set=="A") & stimulus_no %in% c("X11","X12","X13","X14","X15") ~ "long",
!(stimulus_content=="pos" & condition=="pos" & set=="A") & stimulus_no %in% c("X16","X17","X18","X19","X20") ~ "short",
!(stimulus_content=="pos" & condition=="pos" & set=="A") & stimulus_no %in% c("X1","X2","X3","X4","X5","X6","X7","X8","X9","X10") ~ "none"
)
)
EDIT
For the solution with the loop :
dataset3 <- dataset
for (i in 1:length(dataset3[,1])){
if(dataset3[i,"stimulus_content"] == "pos" & dataset3[i,"condition"] == "pos" & dataset3[i,"set"] == "A") {
if(dataset3[i,"stimulus_no"] %in% c("X1","X2","X3","X4","X5")) {dataset3[i,"exposure"] <- "long"}
if(dataset3[i,"stimulus_no"] %in% c("X6","X7","X8","X9","X10")) {dataset3[i,"exposure"] <- "short"}
if(dataset3[i,"stimulus_no"] %in% c("X11","X12","X13","X14","X15","X16","X17","X18","X19","X20")){dataset3[i,"exposure"] <- "none"}
} else {
if(dataset3[i,"stimulus_no"] %in% c("X11","X12","X13","X14","X15")) {dataset3[i,"exposure"] <- "long"}
if(dataset3[i,"stimulus_no"] %in% c("X16","X17","X18","X19","X20")) {dataset3[i,"exposure"] <- "short"}
if(dataset3[i,"stimulus_no"] %in% c("X1","X2","X3","X4","X5","X6","X7","X8","X9","X10")) {dataset3[i,"exposure"] <- "none"}
}
}
compareDF::compare_df(dataset3, dataset2, rownames)
#> Error in stop_or_warn("The two data frames are the same!", stop_on_error): The two data frames are the same!
and to avoid the loop, like @g-grothendieck but closer to your code :
dataset4 <- within(dataset, {
# Condition pos, set A
exposure[(stimulus_content == "pos" & condition == "pos" & set == "A") & stimulus_no %in% c("X1","X2","X3","X4","X5")] <- "long"
exposure[(stimulus_content == "pos" & condition == "pos" & set == "A") & stimulus_no %in% c("X6","X7","X8","X9","X10")] <- "short"
exposure[(stimulus_content == "pos" & condition == "pos" & set == "A") & stimulus_no %in% c("X11","X12","X13","X14","X15","X16","X17","X18","X19","X20")] <- "none"
# else
exposure[!(stimulus_content == "pos" & condition == "pos" & set == "A") & stimulus_no %in% c("X11","X12","X13","X14","X15")] <- "long"
exposure[!(stimulus_content == "pos" & condition == "pos" & set == "A") & stimulus_no %in% c("X16","X17","X18","X19","X20")] <- "short"
exposure[!(stimulus_content == "pos" & condition == "pos" & set == "A") & stimulus_no %in% c("X1","X2","X3","X4","X5","X6","X7","X8","X9","X10")] <- "none"
})
compareDF::compare_df(dataset4, dataset2, rownames)
#> Error in stop_or_warn("The two data frames are the same!", stop_on_error): The two data frames are the same!
or
dataset5 <- within(dataset, {
# Condition pos, set A
exposure <- ifelse((stimulus_content == "pos" & condition == "pos" & set == "A") & stimulus_no %in% c("X1","X2","X3","X4","X5"), "long", exposure)
exposure <- ifelse((stimulus_content == "pos" & condition == "pos" & set == "A") & stimulus_no %in% c("X6","X7","X8","X9","X10"), "short", exposure)
exposure <- ifelse((stimulus_content == "pos" & condition == "pos" & set == "A") & stimulus_no %in% c("X11","X12","X13","X14","X15","X16","X17","X18","X19","X20"), "none", exposure)
# else
exposure <- ifelse(!(stimulus_content == "pos" & condition == "pos" & set == "A") & stimulus_no %in% c("X11","X12","X13","X14","X15"), "long", exposure)
exposure <- ifelse(!(stimulus_content == "pos" & condition == "pos" & set == "A") & stimulus_no %in% c("X16","X17","X18","X19","X20"), "short", exposure)
exposure <- ifelse(!(stimulus_content == "pos" & condition == "pos" & set == "A") & stimulus_no %in% c("X1","X2","X3","X4","X5","X6","X7","X8","X9","X10"), "none", exposure)
})
compareDF::compare_df(dataset5, dataset2, rownames)
#> Error in stop_or_warn("The two data frames are the same!", stop_on_error): The two data frames are the same!
Regards,