I have a dataset with susceptibilities of various drugs to different bacteria.
I would like to get the susceptibility frequencies by organism. Is there a way to streamline this, instead of copy/pasting for each drug?
I'm thinking using apply
or maybe writing a function, but not sure where to start.
pacman::p_load(tidyverse,
janitor)
demo_dat <- data.frame(
stringsAsFactors = FALSE,
organism_name = c("Klebsiella pneumonia","Klebsiella pneumonia",
"Escherichia coli","Klebsiella pneumonia",
"Enterobacter cloacae","Escherichia coli",
"Klebsiella pneumonia","Escherichia coli",
"Escherichia coli","Escherichia coli",
"Klebsiella pneumonia","Klebsiella pneumonia",
"Escherichia coli","Klebsiella pneumonia",
"Escherichia coli","Serratia marcenscens",
"Klebsiella oxytoca","Escherichia coli",
"Proteus mirabilis","Escherichia coli"),
amox_clav_po = c("S",
"S","S","I","R","I","S","I","R","I",
"S","S","S","S","I","R","S","S","S",
"R"),
amp_sul_iv = c("S",
"I","S","S","R","R","S","S","R","I",
"S","I","S","I","R","R","S","S","S",
"R"),
cefaclor_po = c("S",
"S","S","S","R","S","S","S","S","S",
"S","S","S","S","R","R","S","S","S",
"S"),
ceftriaxone_iv = c("S",
"S","S","S","S","S","S","S","S","S",
"S","S","S","S","R","S","S","S","S",
"S")
)
demo_dat |>
group_by(organism_name) |>
summarise(susceptibility = sum((amox_clav_po == "S")/n()))
#> # A tibble: 6 × 2
#> organism_name susceptibility
#> <chr> <dbl>
#> 1 Enterobacter cloacae 0
#> 2 Escherichia coli 0.333
#> 3 Klebsiella oxytoca 1
#> 4 Klebsiella pneumonia 0.857
#> 5 Proteus mirabilis 1
#> 6 Serratia marcenscens 0
demo_dat |>
group_by(organism_name) |>
summarise(susceptibility = sum((amp_sul_iv == "S")/n()))
#> # A tibble: 6 × 2
#> organism_name susceptibility
#> <chr> <dbl>
#> 1 Enterobacter cloacae 0
#> 2 Escherichia coli 0.444
#> 3 Klebsiella oxytoca 1
#> 4 Klebsiella pneumonia 0.571
#> 5 Proteus mirabilis 1
#> 6 Serratia marcenscens 0
Created on 2024-01-29 with reprex v2.0.2
Session infosessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.2.2 (2022-10-31)
#> os macOS Big Sur ... 10.16
#> system x86_64, darwin17.0
#> ui X11
#> language (EN)
#> collate en_US.UTF-8
#> ctype en_US.UTF-8
#> tz America/Phoenix
#> date 2024-01-29
#> pandoc 3.1.1 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.2.0)
#> backports 1.4.1 2021-12-13 [1] CRAN (R 4.2.0)
#> broom 1.0.1 2022-08-29 [1] CRAN (R 4.2.0)
#> cellranger 1.1.0 2016-07-27 [1] CRAN (R 4.2.0)
#> cli 3.6.1 2023-03-23 [1] CRAN (R 4.2.0)
#> colorspace 2.0-3 2022-02-21 [1] CRAN (R 4.2.0)
#> crayon 1.5.2 2022-09-29 [1] CRAN (R 4.2.0)
#> DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.0)
#> dbplyr 2.2.1 2022-06-27 [1] CRAN (R 4.2.0)
#> digest 0.6.30 2022-10-18 [1] CRAN (R 4.2.0)
#> dplyr * 1.1.2 2023-04-20 [1] CRAN (R 4.2.0)
#> ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.2.0)
#> evaluate 0.17 2022-10-07 [1] CRAN (R 4.2.0)
#> fansi 1.0.3 2022-03-24 [1] CRAN (R 4.2.0)
#> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.0)
#> forcats * 0.5.2 2022-08-19 [1] CRAN (R 4.2.0)
#> fs 1.5.2 2021-12-08 [1] CRAN (R 4.2.0)
#> gargle 1.2.1 2022-09-08 [1] CRAN (R 4.2.0)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.0)
#> ggplot2 * 3.3.6 2022-05-03 [1] CRAN (R 4.2.0)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.0)
#> googledrive 2.0.0 2021-07-08 [1] CRAN (R 4.2.0)
#> googlesheets4 1.0.1 2022-08-13 [1] CRAN (R 4.2.0)
#> gtable 0.3.1 2022-09-01 [1] CRAN (R 4.2.0)
#> haven 2.5.1 2022-08-22 [1] CRAN (R 4.2.0)
#> highr 0.9 2021-04-16 [1] CRAN (R 4.2.0)
#> hms 1.1.2 2022-08-19 [1] CRAN (R 4.2.0)
#> htmltools 0.5.3 2022-07-18 [1] CRAN (R 4.2.0)
#> httr 1.4.4 2022-08-17 [1] CRAN (R 4.2.0)
#> janitor * 2.1.0 2021-01-05 [1] CRAN (R 4.2.0)
#> jsonlite 1.8.3 2022-10-21 [1] CRAN (R 4.2.0)
#> knitr 1.40 2022-08-24 [1] CRAN (R 4.2.0)
#> lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.2.0)
#> lubridate 1.9.0 2022-11-06 [1] CRAN (R 4.2.0)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.0)
#> modelr 0.1.9 2022-08-19 [1] CRAN (R 4.2.0)
#> munsell 0.5.0 2018-06-12 [1] CRAN (R 4.2.0)
#> pacman 0.5.1 2019-03-11 [1] CRAN (R 4.2.0)
#> pillar 1.9.0 2023-03-22 [1] CRAN (R 4.2.0)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.0)
#> purrr * 1.0.1 2023-01-10 [1] CRAN (R 4.2.0)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.0)
#> readr * 2.1.3 2022-10-01 [1] CRAN (R 4.2.0)
#> readxl 1.4.1 2022-08-17 [1] CRAN (R 4.2.0)
#> reprex 2.0.2 2022-08-17 [1] CRAN (R 4.2.0)
#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.2.0)
#> rmarkdown 2.17 2022-10-07 [1] CRAN (R 4.2.0)
#> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.0)
#> rvest 1.0.3 2022-08-19 [1] CRAN (R 4.2.0)
#> scales 1.2.1 2022-08-20 [1] CRAN (R 4.2.0)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.0)
#> snakecase 0.11.0 2019-05-25 [1] CRAN (R 4.2.0)
#> stringi 1.7.8 2022-07-11 [1] CRAN (R 4.2.0)
#> stringr * 1.4.1 2022-08-20 [1] CRAN (R 4.2.0)
#> tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.2.0)
#> tidyr * 1.2.1 2022-09-08 [1] CRAN (R 4.2.0)
#> tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.2.0)
#> tidyverse * 1.3.2 2022-07-18 [1] CRAN (R 4.2.0)
#> timechange 0.1.1 2022-11-04 [1] CRAN (R 4.2.0)
#> tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.2.0)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.0)
#> vctrs 0.6.2 2023-04-19 [1] CRAN (R 4.2.0)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.0)
#> xfun 0.34 2022-10-18 [1] CRAN (R 4.2.0)
#> xml2 1.3.3 2021-11-30 [1] CRAN (R 4.2.0)
#> yaml 2.3.6 2022-10-18 [1] CRAN (R 4.2.0)
#>
#> [1] /Library/Frameworks/R.framework/Versions/4.2/Resources/library
#>
#> ──────────────────────────────────────────────────────────────────────────────
You can pivot and summarize:
library(dplyr)
library(tidyr)
out <- demo_dat |>
pivot_longer(-organism_name) |>
summarize(
susceptibility = sum(value == "S")/n(),
.by = c(organism_name, name)
)
out
# # A tibble: 24 × 3
# organism_name name susceptibility
# <chr> <chr> <dbl>
# 1 Klebsiella pneumonia amox_clav_po 0.857
# 2 Klebsiella pneumonia amp_sul_iv 0.571
# 3 Klebsiella pneumonia cefaclor_po 1
# 4 Klebsiella pneumonia ceftriaxone_iv 1
# 5 Escherichia coli amox_clav_po 0.333
# 6 Escherichia coli amp_sul_iv 0.444
# 7 Escherichia coli cefaclor_po 0.889
# 8 Escherichia coli ceftriaxone_iv 0.889
# 9 Enterobacter cloacae amox_clav_po 0
# 10 Enterobacter cloacae amp_sul_iv 0
# # ℹ 14 more rows
# # ℹ Use `print(n = ...)` to see more rows
Just to make sure we're seeing the same thing as your output,
filter(out, name == "amox_clav_po")
# # A tibble: 6 × 3
# organism_name name susceptibility
# <chr> <chr> <dbl>
# 1 Klebsiella pneumonia amox_clav_po 0.857
# 2 Escherichia coli amox_clav_po 0.333
# 3 Enterobacter cloacae amox_clav_po 0
# 4 Serratia marcenscens amox_clav_po 0
# 5 Klebsiella oxytoca amox_clav_po 1
# 6 Proteus mirabilis amox_clav_po 1