Little background, I am conducting a food choice experiment that includes different types of food such as steak, pizza, burgers, etc. I would like to find out which trial used which type of food so that I can conduct a GLM to investigate if the type of food had an impact of food choice.
The problem is that I have spelt these types of food differently in the data, either with a capitalised first letter or not (n00b error i know). Also, i'm trying to extract 2 possible words from the column "crossCheckExperiment" that contains many words.
Here is my dataset
structure(list(session_id = c(53047, 53047, 53047, 53047, 53047,
53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047,
53047, 53047, 53047, 53047, 53047, 53047), project_id = c(495,
495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495, 495,
495, 495, 495, 495, 495, 495), exp_name = c("Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice", "Vegan label and food choice",
"Vegan label and food choice"), exp_id = c(569, 569, 569, 569,
569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569, 569,
569, 569, 569), user_id = c(46946, 46946, 46946, 46946, 46946,
46946, 46946, 46946, 46946, 46946, 46946, 46946, 46946, 46946,
46946, 46946, 46946, 46946, 46946, 46946), user_sex = c("male",
"male", "male", "male", "male", "male", "male", "male", "male",
"male", "male", "male", "male", "male", "male", "male", "male",
"male", "male", "male"), user_status = c("guest", "guest", "guest",
"guest", "guest", "guest", "guest", "guest", "guest", "guest",
"guest", "guest", "guest", "guest", "guest", "guest", "guest",
"guest", "guest", "guest"), user_age = c(21, 21, 21, 21, 21,
21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21),
trial_name = c("Steak_V_L_03_NV_NL_04", "Steak_V_L_01_NV_NL_02",
"Chicken_V_NL_02_NV_L_01", "Sausage_V_L_01_NV_NL_02", "Curry_V_NL_06_NV_L_05",
"Steak_NV_L_04_NV_NL_03", "Curry_V_NL_02_NV_L_01", "Pizza_V_NL_04_NV_L_03",
"Pizza_V_L_05_NV_NL_06", "Steak_NV_L_02_NV_NL_01", "Burger_V_NL_04_NV_L_03",
"Curry_NV_L_06_NV_NL_05", "Sausage_V_L_06_V_NL_05", "Steak_V_L_05_NV_NL_06",
"Curry_V_L_01_NV_NL_02", "Burger_V_L_02_V_NL_01", "Steak_V_NL_02_NV_L_01",
"Sausage_V_L_03_NV_NL_04", "Pizza_NV_L_06_NV_NL_05", "Pizza_NV_L_02_NV_NL_01"
), trial_n = c(29, 25, 50, 1, 46, 32, 38, 22, 69, 28, 14,
48, 67, 33, 37, 11, 26, 5, 72, 20), order = c(1, 2, 3, 4,
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
), dv = c(0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0,
0, 0, 1, 1), rt = c(2054, 2417, 2007, 3003, 2391, 3238, 3316,
2371, 4109, 3052, 3274, 3658, 2824, 1888, 4198, 5815, 2651,
6591, 5766, 3682), side = c(1, 2, 2, 1, 1, 1, 1, 2, 2, 1,
2, 1, 2, 1, 1, 2, 2, 1, 2, 2), dt = structure(c(1607441861,
1607441863, 1607441865, 1607441868, 1607441871, 1607441874,
1607441877, 1607441880, 1607441884, 1607441887, 1607441890,
1607441894, 1607441896, 1607441898, 1607441903, 1607441909,
1607441911, 1607441918, 1607441924, 1607441927), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), nreps = c(53047 = 144L, 53047 = 144L,
53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
53047 = 144L, 53047 = 144L, 53047 = 144L, 53047 = 144L,
53047 = 144L, 53047 = 144L), subjBelief_left = c("vegan",
"vegan", "vegan", "vegan", "vegan", "no vegan", "vegan",
"vegan", "vegan", "no vegan", "vegan", "no vegan", "vegan",
"vegan", "vegan", "vegan", "vegan", "vegan", "no vegan",
"no vegan"), subjBelief_right = c("no vegan", "no vegan",
"no vegan", "vegan", "no vegan", "no vegan", "no vegan",
"vegan", "no vegan", "no vegan", "no vegan", "no vegan",
"vegan", "no vegan", "vegan", "vegan", "no vegan", "no vegan",
"no vegan", "no vegan"), X1 = c("Vegan_steak_nolabel_3",
"Vegan_steak_nolabel_1", "Vegan_chicken_nolabel_2", "Vegan_sausage_nolabel_1",
"Vegan_curry_nolabel_6", "Nonvegan_steak_nolabel_4", "Vegan_curry_nolabel_2",
"Vegan_pizza_nolabel_4", "Vegan_pizza_nolabel_5", "Nonvegan_steak_nolabel_2",
"Vegan_burger_nolabel_4", "Nonvegan_curry_nolabel_6", "Vegan_sausage_nolabel_6",
"Vegan_steak_nolabel_5", "Vegan_curry_nolabel_1", "Vegan_burger_nolabel_2",
"Vegan_steak_nolabel_2", "Vegan_sausage_nolabel_3", "Nonvegan_pizza_nolabel_6",
"Nonvegan_pizza_nolabel_2"), X2 = c("Nonvegan_steak_nolabel_4",
"Nonvegan_steak_nolabel_2", "Nonvegan_chicken_nolabel_1",
"Nonvegan_sausage_nolabel_2", "Nonvegan_curry_nolabel_5",
"Nonvegan_steak_nolabel_3", "Nonvegan_curry_nolabel_1", "Nonvegan_pizza_nolabel_3",
"Nonvegan_pizza_nolabel_6", "Nonvegan_steak_nolabel_1", "Nonvegan_burger_nolabel_3",
"Nonvegan_curry_nolabel_5", "Vegan_sausage_nolabel_5", "Nonvegan_steak_nolabel_6",
"Nonvegan_curry_nolabel_2", "Vegan_burger_nolabel_1", "Nonvegan_steak_nolabel_1",
"Nonvegan_sausage_nolabel_4", "Nonvegan_pizza_nolabel_5",
"Nonvegan_pizza_nolabel_1"), crossCheckExperiment = c("Steak_V_L_03_NV_NL_04",
"Steak_V_L_01_NV_NL_02", "Chicken_V_NL_02_NV_L_01", "Sausage_V_L_01_NV_NL_02",
"Curry_V_NL_06_NV_L_05", "Steak_NV_L_04_NV_NL_03", "Curry_V_NL_02_NV_L_01",
"Pizza_V_NL_04_NV_L_03", "Pizza_V_L_05_NV_NL_06", "Steak_NV_L_02_NV_NL_01",
"Burger_V_NL_04_NV_L_03", "Curry_NV_L_06_NV_NL_05", "Sausage_V_L_06_V_NL_05",
"Steak_V_L_05_NV_NL_06", "Curry_V_L_01_NV_NL_02", "Burger_V_L_02_V_NL_01",
"Steak_V_NL_02_NV_L_01", "Sausage_V_L_03_NV_NL_04", "Pizza_NV_L_06_NV_NL_05",
"Pizza_NV_L_02_NV_NL_01"), checkSubjId = c(53047, 53047,
53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047,
53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047, 53047
), subjectVeganQuantification = c(5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), left_food = c("V", "V",
"V", "V", "V", "NV", "V", "V", "V", "NV", "V", "NV", "V",
"V", "V", "V", "V", "V", "NV", "NV"), left_label = c("L",
"L", "NL", "L", "NL", "L", "NL", "NL", "L", "L", "NL", "L",
"L", "L", "L", "L", "NL", "L", "L", "L"), right_food = c("NV",
"NV", "NV", "NV", "NV", "NV", "NV", "NV", "NV", "NV", "NV",
"NV", "V", "NV", "NV", "V", "NV", "NV", "NV", "NV"), right_label = c("NL",
"NL", "L", "NL", "L", "NL", "L", "L", "NL", "NL", "L", "NL",
"NL", "NL", "NL", "NL", "L", "NL", "NL", "NL"), dv_inv = c(1,
1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0),
appear_selected = c("no vegan", "no vegan", "no vegan", "vegan",
"no vegan", "no vegan", "vegan", "vegan", "vegan", "no vegan",
"no vegan", "no vegan", "vegan", "no vegan", "vegan", "vegan",
"no vegan", "no vegan", "no vegan", "no vegan"), label_selected = c("NL",
"NL", "L", "L", "L", "NL", "NL", "NL", "L", "NL", "L", "NL",
"L", "NL", "NL", "NL", "L", "NL", "L", "L"), counter = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
dv_recoded = c(0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0,
1, 1, 0, 0, 0, 0), cong = c(1, 1, 0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 1, 0, 0, 0, 1, 0, 0), veg_cong = c(0, 0, 0, 1, 0,
0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0), incong = c(0,
0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0),
noveg_cong = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 1, 1), control_cong = c(1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), congVar = c("cong",
"cong", "incong", "veg_cong", "incong", "noveg_cong", "incong",
"veg_cong", "cong", "noveg_cong", "incong", "noveg_cong",
"veg_cong", "cong", "veg_cong", "veg_cong", "incong", "cong",
"noveg_cong", "noveg_cong"), cong2 = c(TRUE, TRUE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE), veg_cong2 = c(FALSE,
FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE,
FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE, FALSE,
FALSE), incong2 = c(FALSE, FALSE, TRUE, FALSE, TRUE, FALSE,
TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE,
FALSE, TRUE, FALSE, FALSE, FALSE), noveg_cong2 = c(FALSE,
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, TRUE,
FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE,
TRUE), fac = structure(c(1L, 1L, 3L, 2L, 3L, 4L, 3L, 2L,
1L, 4L, 3L, 4L, 2L, 1L, 2L, 2L, 3L, 1L, 4L, 4L), .Label = c("cong",
"veg_cong", "incong", "noveg_cong"), class = "factor"), trialType = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA)), row.names = c(NA, 20L), class = "data.frame")
This is the code I currently have
my_data_filt <- mutate(Group =
case_when(if (my_data_filt) crossCheckExperiment == 'Steak', 'steak' ~ "steak",
if (my_data_filt) crossCheckExperiment == 'Burger', 'burger' ~ "burger",
if (my_data_filt) crossCheckExperiment == 'Chicken', 'chicken' ~ "chicken",
if (my_data_filt) crossCheckExperiment == 'Pizza', 'pizza' ~ "pizza",
if (my_data_filt) crossCheckExperiment == 'Sausage', 'sausage' ~ "sausage",
if (my_data_filt) crossCheckExperiment == 'Curry', 'curry' ~ "sausage")
)
however, I keep getting this error
Error in if (my_data_filt) crossCheckExperiment == "Steak" :
argument is not interpretable as logical
There are several issues in your code. The first is that you didn't include the data frame in the first argument for mutate()
. The second is that case_when()
is not being used properly. The third is that ==
only works when you are looking for something to be equal to one value. If you want it to be equal to one of several values, use the %in%
operator.
mutate(my_data_filt,
Group = case_when(crossCheckExperiment %in% c('Steak', 'steak') ~ "steak",
crossCheckExperiment %in% c('Burger', 'burger') ~ "burger",
etc....))
If the variable has the word of interest in it along with other stuff, the str_detect()
function from stringr
is helpful:
mutate(my_data_filt,
Group = case_when(str_detect(crossCheckExperiment, '(S|s)teak') ~ "steak",
str_detect(crossCheckExperiment, '(B|b)urger') ~ "burger",
etc....))