Here is the programme which i am using to summarize the adsl data by groups TRT01PN and SITEID, however i get the error and it is from filter, so basically if more than 1 group variable is used and if we try to filter then its not working, i am not sure of the reason, here is the code i tried with sample data
filter(if_all(all_of(group), ~ !is.na(.)))
Error in `map2()`:
ℹ In index: 1.
Caused by error in `filter()`:
ℹ In argument: `if_all(all_of(valid_group), ~!is.na(.))`.
Caused by error in `if_all()`:
ℹ In argument: `all_of(valid_group)`.
Caused by error in `all_of()`:
! Can't subset elements that don't exist.
✖ Elements `TRT01PN` and `SITEID` don't exist.
use the data from tidyCDISC package
dput(head(adsl))
structure(list(STUDYID = structure(c("CDISCPILOT01", "CDISCPILOT01",
"CDISCPILOT01", "CDISCPILOT01", "CDISCPILOT01", "CDISCPILOT01"
), label = "Study Identifier"), USUBJID = structure(c("01-701-1015",
"01-701-1023", "01-701-1028", "01-701-1033", "01-701-1034", "01-701-1047"
), label = "Unique Subject Identifier"), SUBJID = structure(c("1015",
"1023", "1028", "1033", "1034", "1047"), label = "Subject Identifier for the Study"),
SITEID = structure(c("701", "701", "701", "701", "701", "701"
), label = "Study Site Identifier"), SITEGR1 = structure(c("701",
"701", "701", "701", "701", "701"), label = "Pooled Site Group 1"),
ARM = structure(c("Placebo", "Placebo", "Xanomeline High Dose",
"Xanomeline Low Dose", "Xanomeline High Dose", "Placebo"), label = "Description of Planned Arm"),
TRT01P = structure(c("Placebo", "Placebo", "Xanomeline High Dose",
"Xanomeline Low Dose", "Xanomeline High Dose", "Placebo"), label = "Planned Treatment for Period 01"),
TRT01PN = structure(c(0, 0, 81, 54, 81, 0), label = "Planned Treatment for Period 01 (N)"),
TRT01A = structure(c("Placebo", "Placebo", "Xanomeline High Dose",
"Xanomeline Low Dose", "Xanomeline High Dose", "Placebo"), label = "Actual Treatment for Period 01"),
TRT01AN = structure(c(0, 0, 81, 54, 81, 0), label = "Actual Treatment for Period 01 (N)"),
TRTSDT = structure(c(16072, 15557, 15905, 16147, 16252, 15748
), label = "Date of First Exposure to Treatment", format.sas = "DATE9", class = "Date"),
TRTEDT = structure(c(16253, 15584, 16084, 16160, 16434, 15773
), label = "Date of Last Exposure to Treatment", format.sas = "DATE9", class = "Date"),
TRTDURD = structure(c(182, 28, 180, 14, 183, 26), label = "Total Treatment Duration (Days)"),
AVGDD = structure(c(0, 0, 77.7, 54, 76.9, 0), label = "Avg Daily Dose (as planned)"),
CUMDOSE = structure(c(0, 0, 13986, 756, 14067, 0), label = "Cumulative Dose (as planned)"),
AGE = structure(c(63, 64, 71, 74, 77, 85), label = "Age"),
AGEGR1 = structure(c("<65", "<65", "65-80", "65-80", "65-80",
">80"), label = "Pooled Age Group 1"), AGEGR1N = structure(c(1,
1, 2, 2, 2, 3), label = "Pooled Age Group 1 (N)"), AGEU = structure(c("YEARS",
"YEARS", "YEARS", "YEARS", "YEARS", "YEARS"), label = "Age Units"),
RACE = structure(c("WHITE", "WHITE", "WHITE", "WHITE", "WHITE",
"WHITE"), label = "Race"), RACEN = structure(c(1, 1, 1, 1,
1, 1), label = "Race (N)"), SEX = structure(c("F", "M", "M",
"M", "F", "F"), label = "Sex"), ETHNIC = structure(c("HISPANIC OR LATINO",
"HISPANIC OR LATINO", "NOT HISPANIC OR LATINO", "NOT HISPANIC OR LATINO",
"NOT HISPANIC OR LATINO", "NOT HISPANIC OR LATINO"), label = "Ethnicity"),
SAFFL = structure(c("Y", "Y", "Y", "Y", "Y", "Y"), label = "Safety Population Flag"),
ITTFL = structure(c("Y", "Y", "Y", "Y", "Y", "Y"), label = "Intent-To-Treat Population Flag"),
EFFFL = structure(c("Y", "Y", "Y", "Y", "Y", "Y"), label = "Efficacy Population Flag"),
COMP8FL = structure(c("Y", "N", "Y", "N", "Y", "N"), label = "Completers of Week 8 Population Flag"),
COMP16FL = structure(c("Y", "N", "Y", "N", "Y", "N"), label = "Completers of Week 16 Population Flag"),
COMP24FL = structure(c("Y", "N", "Y", "N", "Y", "N"), label = "Completers of Week 24 Population Flag"),
DISCONFL = structure(c(NA, "Y", NA, "Y", NA, "Y"), label = "Did the Subject Discontinue the Study?"),
DSRAEFL = structure(c(NA, "Y", NA, NA, NA, "Y"), label = "Discontinued due to AE?"),
DTHFL = structure(c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), label = "Subject Died?"),
BMIBL = structure(c(25.1, 30.4, 31.4, 28.8, 26.1, 30.4), label = "Baseline BMI (kg/m^2)"),
BMIBLGR1 = structure(c("25-<30", ">=30", ">=30", "25-<30",
"25-<30", ">=30"), label = "Pooled Baseline BMI Group 1"),
HEIGHTBL = structure(c(147.3, 162.6, 177.8, 175.3, 154.9,
148.6), label = "Baseline Height (cm)"), WEIGHTBL = structure(c(54.4,
80.3, 99.3, 88.5, 62.6, 67.1), label = "Baseline Weight (kg)"),
EDUCLVL = structure(c(16, 14, 16, 12, 9, 8), label = "Years of Education"),
DISONSDT = structure(c(14729, 13218, 14594, 14458, 15246,
14451), label = "Date of Onset of Disease", format.sas = "DATE9", class = "Date"),
DURDIS = structure(c(43.9, 76.4, 42.8, 55.3, 32.9, 42), label = "Duration of Disease (Months)"),
DURDSGR1 = structure(c(">=12", ">=12", ">=12", ">=12", ">=12",
">=12"), label = "Pooled Disease Duration Group 1"), VISIT1DT = structure(c(16065,
15543, 15897, 16139, 16245, 15727), label = "Date of Visit 1", format.sas = "DATE9", class = "Date"),
RFSTDTC = structure(c("2014-01-02", "2012-08-05", "2013-07-19",
"2014-03-18", "2014-07-01", "2013-02-12"), label = "Subject Reference Start Date/Time"),
RFENDTC = structure(c("2014-07-02", "2012-09-02", "2014-01-14",
"2014-04-14", "2014-12-30", "2013-03-29"), label = "Subject Reference End Date/Time"),
VISNUMEN = structure(c(12, 5, 12, 5, 12, 6), label = "End of Trt Visit (Vis 12 or Early Term.)"),
RFENDT = structure(c(16253, 15585, 16084, 16174, 16434, 15793
), label = "Date of Discontinuation/Completion", format.sas = "DATE9", class = "Date"),
DCDECOD = structure(c("COMPLETED", "ADVERSE EVENT", "COMPLETED",
"STUDY TERMINATED BY SPONSOR", "COMPLETED", "ADVERSE EVENT"
), label = "Standardized Disposition Term"), EOSSTT = structure(c("COMPLETED",
"DISCONTINUED", "COMPLETED", "DISCONTINUED", "COMPLETED",
"DISCONTINUED"), label = "End of Study Status"), DCSREAS = structure(c(NA,
"Adverse Event", NA, "Sponsor Decision", NA, "Adverse Event"
), label = "Reason for Discontinuation from Study"), MMSETOT = structure(c(23,
23, 23, 23, 21, 23), label = "MMSE Total"), FASFL = structure(c("Y",
"Y", "Y", "Y", "Y", "Y"), label = "Full Analysis Set Population Flag"),
RANDFL = structure(c("Y", "Y", "Y", "Y", "Y", "Y"), label = "Randomized Population Flag"),
EOTSTT = structure(c("COMPLETED", "DISCONTINUED", "COMPLETED",
"DISCONTINUED", "COMPLETED", "DISCONTINUED"), label = "End of Treatment Status"),
DCTREAS = structure(c(NA, "Adverse Event", NA, "Sponsor Decision",
NA, "Adverse Event"), label = "Reason for Discontinuation of Treatment"),
DTHDT = structure(c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_), label = "Date of Death", class = "Date")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
this is what is tried
library(tidyverse)
library(tidyCDISC)
# data
adsl <- tidyCDISC::adsl |> select(SITEID, TRT01PN, AGE, TRTDURD, AVGDD)
# function to summarize the numeric column
adsl_summ <- function(.data, group=NULL, vars=NULL){
#browser()
group_syms <- syms(group)
vars <- syms(vars)
adsl <- .data |> select(!!!group_syms, !!!vars)
var_name <- adsl |> select(- all_of(as.character(group_syms))) |> names()
adsl_list <- map(var_name, \(x) adsl |> select(!!!group_syms, all_of(x)))
map2_dfr(adsl_list, var_name, \(x,y) {
ynam <- sym(y)
adsl |> group_by(!!!group_syms) |> filter(if_all(all_of(group), ~ !is.na(.))) |>
summarise(mean=mean(!!ynam),
sd=sd(!!ynam),
median=median(!!ynam),
min=min(!!ynam),
max=max(!!ynam)
) |> ungroup() |> mutate(name=y)
})
}
adsl_summ(adsl, group=c('TRT01PN','SITEID'), vars=c('AGE', 'TRTDURD', 'AVGDD'))
Overall there is no need for !!(!)
or sym(s)
and you can simplify your code considerably like so:
library(tidyverse)
adsl <- adsl |> select(SITEID, TRT01PN, AGE, TRTDURD, AVGDD)
adsl_summ <- function(.data, group = NULL, vars = NULL) {
adsl <- .data |> select(all_of(c(group, vars)))
map_dfr(vars, \(y) {
adsl |>
filter(if_all(all_of(group), ~ !is.na(.))) |>
summarise(
mean = mean(.data[[y]]),
sd = sd(.data[[y]]),
median = median(.data[[y]]),
min = min(.data[[y]]),
max = max(.data[[y]]),
.by = all_of(group)
) |>
mutate(name = y)
})
}
adsl_summ(adsl, group = c("TRT01PN", "SITEID"), vars = c("AGE", "TRTDURD", "AVGDD"))
#> # A tibble: 9 × 8
#> TRT01PN SITEID mean sd median min max name
#> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
#> 1 0 701 70.7 12.4 64 63 85 AGE
#> 2 81 701 74 4.24 74 71 77 AGE
#> 3 54 701 74 NA 74 74 74 AGE
#> 4 0 701 78.7 89.5 28 26 182 TRTDURD
#> 5 81 701 182. 2.12 182. 180 183 TRTDURD
#> 6 54 701 14 NA 14 14 14 TRTDURD
#> 7 0 701 0 0 0 0 0 AVGDD
#> 8 81 701 77.3 0.566 77.3 76.9 77.7 AVGDD
#> 9 54 701 54 NA 54 54 54 AVGDD