Search code examples
rlapplysurveymapply

How to iterate object in svytable while dynamically subsetting survey design


I have survey data I'm creating contingency tables for. I have 3 specific variables I want to both loop over and subset my survey object with the factor levels of those 3 specific variables to get the proportions I am seeking.

I have read several posts on SO, but the two posts which get me extremely close to my intended goal are here (rafa pereira's reply) and here.

My problem is in the combining of both solutions for my purposes, which encounters various errors I haven't been able to resolve.

here's a reprex with my initial solution:

library(dplyr)
library(survey)
df<-structure(list(USOC_Wave = c(2, 6, 9, 5, 11, 11, 2, 2, 4, 3, 
10, 8, 11, 11, 11, 5, 6, 10, 6, 1, 9, 4, 9, 4, 11, 12, 5, 2, 
10, 11, 7, 5, 4, 11, 6, 10, 9, 13, 3, 7, 5, 10, 8, 7, 6, 12, 
12, 1, 12, 5), ethnicity = c("White", "White", "White", "Asian", 
"Asian", "White", "White", "White", "White", "White", "White", 
"Asian", "White", "White", "White", "White", "White", "Asian", 
"White", "White", NA, "Asian", "Asian", "White", "White", "White", 
"White", "White", "White", "White", "White", "White", "Asian", 
"White", "White", "Asian", "White", "White", "White", "White", 
"White", "White", "White", "White", "White", "White", "Asian", 
"White", "White", "Mixed"), sex = c("Men", "Men", "Men", "Men", 
"Men", "Women", "Men", "Men", "Women", "Men", "Women", "Women", 
"Women", "Women", "Women", "Women", "Women", "Men", "Women", 
"Women", "Men", "Men", "Men", "Men", "Women", "Women", "Women", 
"Men", "Women", "Women", "Women", "Men", "Women", "Women", "Women", 
"Men", "Women", "Women", "Women", "Men", "Women", "Women", "Women", 
"Women", "Women", "Men", "Men", "Men", "Women", "Women"), age = c("16-29", 
"30-64", "30-64", "30-64", "30-64", "30-64", "16-29", "65+", 
"30-64", "30-64", "30-64", "30-64", "30-64", "65+", "30-64", 
"30-64", "30-64", "30-64", "65+", "30-64", "16-29", "65+", "16-29", 
"16-29", "16-29", "30-64", "30-64", "30-64", "65+", "16-29", 
"30-64", "30-64", "65+", "30-64", "30-64", "30-64", "16-29", 
"16-29", "30-64", "30-64", "30-64", "30-64", "30-64", "30-64", 
"16-29", "30-64", "65+", "65+", "30-64", "30-64"), strata = c(2902, 
3165, 3069, 2108, 3943, 2683, 2521, 3175, 3232, 3256, 42, 3401, 
2326, 2108, 701, 2074, 1, 5122, 12, 2721, 5122, 3991, 3717, 3157, 
2311, 101, 2717, 118, 2425, 2584, 2523, 2222, 2400, 2729, 2199, 
3361, 10, 2427, 2151, 2584, 2327, 2, 2750, 3297, 2363, 114, 2750, 
2574, 2843, 4121), psu = c(3804, 4330, 4138, 2215, 38089, 3365, 
3041, 4350, 4464, 4512, 156, 11187, 2651, 2216, 1672, 2147, 3, 
52063, 47, 3441, 52086, 40537, 26666, 4314, 2621, 403, 3433, 
458, 2849, 3168, 3045, 2443, 2800, 3458, 2397, 9013, 31, 2854, 
2302, 3168, 2653, 6, 3500, 4593, 2725, 447, 3499, 3148, 3686, 
46785), weight_cs = c(2.80231904983521, 0, 0.950280964374542, 
0.28423735499382, 0.300251632928848, 0.766829490661621, 2.18452429771423, 
0.680638015270233, 0.224062830209732, 2.74595475196838, 0.718028843402863, 
0.340109616518021, 2.88688373565674, 1.17885708808899, 0.620745718479156, 
1.20946884155273, 0.57785838842392, 0.305908054113388, 0.727640688419342, 
1.17930126190186, 0, 0.623862087726593, 0.372526079416275, 0, 
1.3677384853363, 2.87374138832092, 1.31425619125366, 0.462548196315765, 
1.18157768249512, 0.814507722854614, 1.21053576469421, 2.14700984954834, 
0.449016481637955, 1.151535987854, 0.790829658508301, 0.359708696603775, 
3.43058443069458, 0.309507787227631, 1.17791354656219, 1.70297181606293, 
0.741691768169403, 1.52170836925507, 0, 0.989463746547699, 1.34024882316589, 
0.842447340488434, 0.869455099105835, 0.846965670585632, 3.40495872497559, 
0.730816066265106), unpaid = c(2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 
1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1)), row.names = c(NA, 
-50L), class = c("tbl_df", "tbl", "data.frame"))

uos_design<-survey::svydesign(id= ~psu, strata= ~strata, survey.lonely.psu="adjust",
                              weights= ~weight_cs, data=df)

Here is the an initial solution that allows me to subset dynamically and then bind into a dataframe, let's call it #1:

groups <- unique(df$ethnicity) #get unique levels

tablefun <- function(i){svytable(~USOC_Wave+unpaid+ethnicity,
                                 design = subset(uos_design, ethnicity == i)) %>%
    prop.table(margin = 1)  
}

results <- do.call(rbind, lapply(groups, tablefun)) #into df

and here is #2 which allows me to iterate object x in the formula of svytable:

lapply(names(df[c("ethnicity","sex","age")]), function(x){
  svytable(bquote(~.(as.name(x)) +USOC_Wave + unpaid), 
           design = uos_design) %>%
    prop.table(margin = 1)
} )

I am aware of possible dplyr and srvyr resolutions to my challenge but srvyr::summarise is taking forever to run (possibly due to this issue) whereas survey is painlessly quick - ergo tidyverse solutions are unlikely to work for my use case, which pains me.

#1 is my perfect solution, it just lacks automation. I can just manually type #1 for my other variables (sex and age), but surely there must be a way to combine both?

I have tried adapting the two solutions by replacing ethnicity with y and calling y in both a function and a for loop that preceded the existing function(i), and tried using mapply instead of lapply too but am getting stuck.

Help is gratefully received!


Solution

  • Try the following, which makes use of nesting your functions. The results are not ideally suited for rbinding into a data frame, so I left it as a named list.

    vars <- c('ethnicity', 'sex', 'age')
    
    f <- function(var) {
      form <- as.formula(paste0("~USOC_Wave+unpaid"))
      groups <- na.omit(unique(df[,var, drop=TRUE]))   # get unique levels
    
      tablefun <- function(grp, var){
        design <- subset(uos_design, get(var)==grp)
        svytable(form, design) |> prop.table(margin=1)
      }
    
      setNames(lapply(groups, tablefun, var=var), groups)
    }
    
    result <- setNames(lapply(vars, f), vars); result
    

    $ethnicity
    $ethnicity$White
             unpaid
    USOC_Wave          1          2
           1  0.41799314 0.58200686
           2  0.07545611 0.92454389
           3  0.00000000 1.00000000
           4  0.00000000 1.00000000
           5  0.39668156 0.60331844
           6  0.00000000 1.00000000
           7  0.56367299 0.43632701
           8                       
           9  0.00000000 1.00000000
           10 0.00000000 1.00000000
           11 0.00000000 1.00000000
           12 0.00000000 1.00000000
           13 0.00000000 1.00000000
    
    $ethnicity$Asian
             unpaid
    USOC_Wave 2
           4  1
           5  1
           8  1
           9  1
           10 1
           11 1
           12 1
    
    $ethnicity$Mixed
             unpaid
    USOC_Wave 1
            5 1
    

    $sex
    $sex$Men
             unpaid
    USOC_Wave          1          2
           1  1.00000000 0.00000000
           2  0.07545611 0.92454389
           3  0.00000000 1.00000000
           4  0.00000000 1.00000000
           5  0.88308990 0.11691010
           6                       
           7  0.00000000 1.00000000
           9  0.00000000 1.00000000
           10 0.00000000 1.00000000
           11 0.00000000 1.00000000
           12 0.00000000 1.00000000
    
    $sex$Women
             unpaid
    USOC_Wave         1         2
           1  0.0000000 1.0000000
           3  0.0000000 1.0000000
           4  0.0000000 1.0000000
           5  0.1828762 0.8171238
           6  0.0000000 1.0000000
           7  1.0000000 0.0000000
           8  0.0000000 1.0000000
           9  0.0000000 1.0000000
           10 0.0000000 1.0000000
           11 0.0000000 1.0000000
           12 0.0000000 1.0000000
           13 0.0000000 1.0000000
    

    $age
    $age$`16-29`
             unpaid
    USOC_Wave   2
           2    1
           4     
           6    1
           9    1
           11   1
           13   1
    
    $age$`30-64`
             unpaid
    USOC_Wave         1         2
           1  0.0000000 1.0000000
           2  1.0000000 0.0000000
           3  0.0000000 1.0000000
           4  0.0000000 1.0000000
           5  0.4477378 0.5522622
           6  0.0000000 1.0000000
           7  0.5636730 0.4363270
           8  0.0000000 1.0000000
           9  0.0000000 1.0000000
           10 0.0000000 1.0000000
           11 0.0000000 1.0000000
           12 0.0000000 1.0000000
    
    $age$`65+`
             unpaid
    USOC_Wave 1 2
           1  1 0
           2  0 1
           4  0 1
           6  0 1
           10 0 1
           11 0 1
           12 0 1