R filter out rows based on value in different row

Here's an example df:

df <- data.frame(
  id= c(1,1,1,1,2,2,2,2),
  admit_dt = c("2022-01-01", "" ,"2022-05-05","", "", "2022-09-03", "", ""),
  discharge_dt = c("2022-01-04","", "2022-05-06","","", "2022-09-04", "", ""),
  first_dt = c("2022-01-01", "2022-01-02", "2022-05-05", "2022-06-09", "2022-01-03", "2022-09-03", "2022-10-10", "2022-09-04")
)

I need to filter out rows where for each unique id, there's a date value in first_dt that is between (inclusive) the admit_dt and discharge_dt in any other row for the same id. So I want to keep row 1 in the above df, but get rid of row 2, since the value in first_dt is between the value in admit_dt and discharge_dt in row 1. I'd want to end up with this df:

id	admit_dt	discharge_dt	first_dt
1	2022-01-01	2022-01-04	2022-01-01
1	2022-05-05	2022-05-06	2022-05-05
1			2022-06-09
2			2022-01-03
2	2022-09-03	2022-09-04	2022-09-03
2			2022-10-10

Thank you for any suggestions!

Solution

library(dplyr)

df <- tibble(
  id= c(1,1,1,1,2,2,2,2),
  admit_dt = c("2022-01-01", "" ,"2022-05-05","", "", "2022-09-03", "", ""),
  discharge_dt = c("2022-01-04","", "2022-05-06","","", "2022-09-04", "", ""),
  first_dt = c("2022-01-01", "2022-01-02", "2022-05-05", "2022-06-09", "2022-01-03", "2022-09-03", "2022-10-10", "2022-09-04")
)

goal_df <- tibble::tribble(
  ~id,    ~admit_dt, ~discharge_dt,    ~first_dt,
  1, "2022-01-01",  "2022-01-04", "2022-01-01",
  1, "2022-05-05",  "2022-05-06", "2022-05-05",
  1,           "",             "", "2022-06-09",
  2,            "",             "", "2022-01-03",
  2, "2022-09-03",  "2022-09-04", "2022-09-03",
  2,            "",             "", "2022-10-10"
)




# filter out rows where for each unique id, 
# there's a date value in first_dt 
# that  in any other row for the same id is between (inclusive) the admit_dt and discharge_dt 

# add rownumbers
dfr <- df |> mutate(rn=row_number())

# get a set of data which is all the original rows, joined to their same id, but different rownumber counterparts

df2 <- left_join(dfr,
  dfr |> rename_all(\(x)paste0(x, "_2")),
  by = join_by(id == id_2, between(
    first_dt,
    admit_dt_2,
    discharge_dt_2
  ))
) |> mutate(dropme= rn_2 != rn)

df3 <- df2 |> filter(!dropme | is.na(dropme))

cleanedup <- select(df3,
                    id,admit_dt,
                    discharge_dt,
                    first_dt)

identical(cleanedup,goal_df)