Search code examples
rexpss

make a table in expss that shows both freq and cpct but only tests cpct on the cpct columns


Using this data set with a multiple dichotomy set and a group:

  set.seed(14)
  checkall <- data.frame(ID=1:200, 
                         group=sample(c("A", "B", "C"), size=200, replace=TRUE),
                         q1a=sample(c(0,1), size=200, replace=TRUE),
                         q1b=sample(c(0,1), size=200, replace=TRUE),
                         q1c=sample(c(0,1), size=200, replace=TRUE), 
                         q1d=sample(c(0,1), size=200, replace=TRUE),
                         q1e=sample(c(0,1), size=200, replace=TRUE),
                         q1f=sample(c(0,1), size=200, replace=TRUE),
                         q1g=sample(c(0,1), size=200, replace=TRUE),
                         q1h=sample(c(0,1), size=200, replace=TRUE))
  
#Doctor some to be related to group
  checkall$q1c[checkall$group=="A"] <-  sample(c(0,1,1,1), size=sum(checkall$group=="A"), replace=TRUE)
  checkall$q1e[checkall$group=="A"] <-  sample(c(0,0,0,1), size=sum(checkall$group=="A"), replace=TRUE)

I would like to make a table that shows frequencies and column percents like this:

library(dplyr)
if( !require(expss) ){ install.packages("expss", dependencies=TRUE); library(expss) }
  checkall %>% tab_cells(mdset(q1a %to% q1h)) %>%
    tab_cols(total(), group) %>%
    tab_stat_cases(label = "freq") %>%
    tab_stat_cpct(label = "col %") %>%
    tab_pivot(stat_position = "inside_columns")

 |              | #Total |       | group |       |      |       |      |       |
 |              |   freq | col % |     A |       |    B |       |    C |       |
 |              |        |       |  freq | col % | freq | col % | freq | col % |
 | ------------ | ------ | ----- | ----- | ----- | ---- | ----- | ---- | ----- |
 |          q1a |    101 |  50.8 |    33 |  47.8 |   36 |  51.4 |   32 |  53.3 |
 |          q1b |     92 |  46.2 |    34 |  49.3 |   29 |  41.4 |   29 |  48.3 |
 |          q1c |    111 |  55.8 |    53 |  76.8 |   30 |  42.9 |   28 |  46.7 |
 |          q1d |     89 |  44.7 |    35 |  50.7 |   30 |  42.9 |   24 |  40.0 |
 |          q1e |    100 |  50.3 |    19 |  27.5 |   43 |  61.4 |   38 |  63.3 |
 |          q1f |     89 |  44.7 |    34 |  49.3 |   36 |  51.4 |   19 |  31.7 |
 |          q1g |     97 |  48.7 |    29 |  42.0 |   33 |  47.1 |   35 |  58.3 |
 |          q1h |    113 |  56.8 |    40 |  58.0 |   36 |  51.4 |   37 |  61.7 |
 | #Total cases |    199 | 199.0 |    69 |  69.0 |   70 |  70.0 |   60 |  60.0 |

But I would like to add the notations that compare the cpct values to that in the first column. I can get that on a table with just cpct values like this:

  checkall %>% tab_cells(mdset(q1a %to% q1h)) %>%
    tab_cols(total(), group) %>%
    tab_stat_cpct(label = "col %")%>%
    tab_pivot(stat_position = "inside_columns")%>%
    significance_cpct(compare_type = "first_column")

 |              | #Total |  group |       |       |
 |              |  col % |      A |     B |     C |
 |              |        |  col % | col % | col % |
 | ------------ | ------ | ------ | ----- | ----- |
 |          q1a |   50.8 | 47.8   |  51.4 |  53.3 |
 |          q1b |   46.2 | 49.3   |  41.4 |  48.3 |
 |          q1c |   55.8 | 76.8 + |  42.9 |  46.7 |
 |          q1d |   44.7 | 50.7   |  42.9 |  40.0 |
 |          q1e |   50.3 | 27.5 - |  61.4 |  63.3 |
 |          q1f |   44.7 | 49.3   |  51.4 |  31.7 |
 |          q1g |   48.7 | 42.0   |  47.1 |  58.3 |
 |          q1h |   56.8 | 58.0   |  51.4 |  61.7 |
 | #Total cases |    199 |   69   |    70 |    60 |

Is there a way to get the + and - notations onto the first graph in just the cpct columns? If I try to mix the lines with tab_stat_cases(label="freq") and significance_cpct(compare_type = "first_column"), I get a weird table that tries to compare both the freq and cpct columns to the first column:

  checkall %>% tab_cells(mdset(q1a %to% q1h)) %>%
    tab_cols(total(), group) %>%
    #tab_stat_cases(label = "freq") %>%
    tab_stat_cpct(label = "col %")%>%
    tab_pivot(stat_position = "inside_columns")%>%
    significance_cpct(compare_type = "first_column") 

 |              | #Total |        |  group |        |        |        |        |        |
 |              |   freq |  col % |      A |        |      B |        |      C |        |
 |              |        |        |   freq |  col % |   freq |  col % |   freq |  col % |
 | ------------ | ------ | ------ | ------ | ------ | ------ | ------ | ------ | ------ |
 |          q1a |  101.0 | 50.8 - | 33.0 - | 47.8 - | 36.0 - | 51.4 - | 32.0 - | 53.3 - |
 |          q1b |   92.0 | 46.2 - | 34.0 - | 49.3 - | 29.0 - | 41.4 - | 29.0 - | 48.3 - |
 |          q1c |  111.0 | 55.8 - | 53.0 - | 76.8   | 30.0 - | 42.9 - | 28.0 - | 46.7 - |
 |          q1d |   89.0 | 44.7 - | 35.0 - | 50.7 - | 30.0 - | 42.9 - | 24.0 - | 40.0 - |
 |          q1e |  100.0 | 50.3 - | 19.0 - | 27.5 - | 43.0 - | 61.4 - | 38.0 - | 63.3 - |
 |          q1f |   89.0 | 44.7 - | 34.0 - | 49.3 - | 36.0 - | 51.4 - | 19.0 - | 31.7 - |
 |          q1g |   97.0 | 48.7 - | 29.0 - | 42.0 - | 33.0 - | 47.1 - | 35.0 - | 58.3 - |
 |          q1h |  113.0 | 56.8 - | 40.0 - | 58.0 - | 36.0 - | 51.4 - | 37.0 - | 61.7   |
 | #Total cases |    199 |  199   |   69   |   69   |   70   |   70   |   60   |   60   |

I'm looking for the top table with the + and - notation as below:

 |              | #Total |       | group |        |      |       |      |       |
 |              |   freq | col % |     A |        |    B |       |    C |       |
 |              |        |       |  freq | col %  | freq | col % | freq | col % |
 | ------------ | ------ | ----- | ----- | -----  | ---- | ----- | ---- | ----- |
 |          q1a |    101 |  50.8 |    33 |  47.8  |   36 |  51.4 |   32 |  53.3 |
 |          q1b |     92 |  46.2 |    34 |  49.3  |   29 |  41.4 |   29 |  48.3 |
 |          q1c |    111 |  55.8 |    53 |  76.8 +|   30 |  42.9 |   28 |  46.7 |
 |          q1d |     89 |  44.7 |    35 |  50.7  |   30 |  42.9 |   24 |  40.0 |
 |          q1e |    100 |  50.3 |    19 |  27.5 -|   43 |  61.4 |   38 |  63.3 |
 |          q1f |     89 |  44.7 |    34 |  49.3  |   36 |  51.4 |   19 |  31.7 |
 |          q1g |     97 |  48.7 |    29 |  42.0  |   33 |  47.1 |   35 |  58.3 |
 |          q1h |    113 |  56.8 |    40 |  58.0  |   36 |  51.4 |   37 |  61.7 |
 | #Total cases |    199 | 199.0 |    69 |  69.0  |   70 |  70.0 |   60 |  60.0 |

Solution

  • There is a special function for such case - tab_last_sig_cpct - which will be applied only to the last calculation:

    checkall %>% tab_cells(mdset(q1a %to% q1h)) %>%
        tab_cols(total(), group) %>%
        tab_stat_cases(label = "freq") %>%
        tab_stat_cpct(label = "col %") %>%
        tab_last_sig_cpct(compare_type = "first_column") %>%  
        tab_pivot(stat_position = "inside_columns")