Goal: Find all dates where the only color present is blue or yellow If there are 2 colors then they BOTH need to be blue and yellow If there's one color then it can be either one
In the example dataset:
2024-01-01 is ruled out because it has purple - and because it has 3 unique colors, so it's impossible that it can only be yellow and blue
2024-03-27 is OK since it only has blue
2024-05-15 is OK since it has blue AND yellow
2024-06-08 is ruled out because it has orange (this is the situation I don't know how to do hence the ??????)
library(tidyverse)
dates <- c(rep("2024-01-01",3), "2024-03-27", rep("2024-05-15",2), rep("2024-06-08",2))
color <- c("blue","purple","yellow","blue","blue", "yellow", "yellow","orange")
df <- tibble(dates, color)
df
df %>%
group_by(dates) %>%
mutate(unique_colors_per_day = n_distinct(color)) %>%
ungroup() %>%
filter(unique_colors_per_day == 1 & color %in% c("blue", "yellow") |
unique_colors_per_day == 2 & color "??????")
library(dplyr)
df |>
filter(n_distinct(color) <= 2, (n_distinct(color) == 1 & color %in% c('blue', 'yellow') |
all(c('blue', 'yellow') %in% color)), .by = dates)