I have a dataframe that I want to check if any column starting with 'Time' contains a certain value (for example '1'). If 1 or more columns contain that value then I want to get 1 column containing TRUE/FALSE. This is my example dataframe:
structure(list(Nummer = c("449002", "449003", "449005", "449006",
"449010"), `Sample ID.1` = c("4490021", "4490031", "4490051",
"4490061", "4490101"), Time.1 = c("1", "1", "1", "1", "1"), `Sample ID.2` = c("4490022",
"4490032", "4490052", "4490062", "4490102"), Time.2 = c("2",
"2", "2", "2", "2"), `Sample ID.3` = c("4490022", "4490032",
"4490052", "4490062", "4490102"), Time.3 = c("2", "2", "2", "2",
"2"), `Sample ID.4` = c("4490023", "4490033", "4490053", "4490063",
"4490103"), Time.4 = c("3", "3", "3", "3", "3"), `Sample ID.5` = c("4490023",
"4490031", "4490053", "4490063", "4490103"), Time.5 = c("3",
"1", "3", "3", "3"), `Sample ID.6` = c("4490021", "4490031",
"44900551", "4490061", "44901051"), Time.6 = c("1", "1", "51",
"1", "51"), `Sample ID.7` = c("4490021", "4490031", "4490051",
"4490061", "4490101"), Time.7 = c("1", "1", "1", "1", "1"), `Sample ID.8` = c("4490022",
"4490032", "4490051", "4490062", "4490101"), Time.8 = c("2",
"2", "1", "2", "1"), `Sample ID.9` = c("4490022", "4490032",
"4490052", "4490062", "4490102"), Time.9 = c("2", "2", "2", "2",
"2"), `Sample ID.10` = c("4490023", "4490032", "4490052", "4490063",
"4490102"), Time.10 = c("3", "2", "2", "3", "2"), `Sample ID.11` = c("4490023",
"4490033", "4490053", "4490063", "4490103"), Time.11 = c("3",
"3", "3", "3", "3"), `Sample ID.12` = c(NA, "4490033", "4490053",
NA, "4490103"), Time.12 = c(NA, "3", "3", NA, "3"), `Sample ID.13` = c(NA,
"4490033", "44900551", NA, "44901051"), Time.13 = c(NA, "3",
"51", NA, "51"), `Sample ID.14` = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), Time.14 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_),
`Sample ID.15` = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), Time.15 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), `Sample ID.16` = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), Time.16 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L), groups = structure(list(Nummer = c("449002",
"449003", "449005", "449006", "449010"), .rows = structure(list(
1L, 2L, 3L, 4L, 5L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L), .drop = TRUE))
Thanks in advance! I would prefer a dplyr solution if possible
In base R this is a matter of comparing the "Time"
columns with the target string "1"
and adding the logical values. If the sums are greater than zero, return TRUE
.
rowSums(df1[grep("Time", names(df1))] == "1", na.rm = TRUE) > 0L
# [1] TRUE TRUE TRUE TRUE TRUE
dplyr
solutionWith package dplyr
, use if_any
.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
df1 %>%
mutate(flag = if_any(starts_with("Time"), ~. == "1")) %>%
select(Nummer, flag)
#> # A tibble: 5 × 2
#> # Groups: Nummer [5]
#> Nummer flag
#> <chr> <lgl>
#> 1 449002 TRUE
#> 2 449003 TRUE
#> 3 449005 TRUE
#> 4 449006 TRUE
#> 5 449010 TRUE
Created on 2024-09-12 with reprex v2.1.0