Search code examples
rproportionscrosstable

Calculating conditional proportions from R data.frame


There is a multiway crosstable saved as a frequency table like this:

df = data.frame(
    Correct = c('yes','no','yes','no','yes','no','yes','no',
        'yes','no','yes','no','yes','no','yes','no'),
    Type = c('t1','t1','t2','t2','t1','t1','t2','t2',
        't1','t1','t2','t2','t1','t1','t2','t2'),
    Subtype = c('st1','st1','st1','st1','st2','st2','st2','st2',
        'st1','st1','st1','st1','st2','st2','st2','st2'),
    Level = c('a','a','a','a','a','a','a','a',
        'b','b','b','b','b','b','b','b'),
    Freq = c(115,99,140,81,104,100,156,52,61,160,59,164,41,160,48,159)
)

How can one recalculate Freq into proportions conditioned on all variables but `Correct', resulting in this new variable:

df$Prop = c(0.54,0.46,0.63,0.37,0.51,0.49,0.75,0.25,
    0.28,0.72,0.26,0.74,0.20,0.80,0.23,0.77)
df
#    Correct Type Subtype Level Freq Prop
# 1      yes   t1     st1     a  115 0.54
# 2       no   t1     st1     a   99 0.46
# 3      yes   t2     st1     a  140 0.63
# 4       no   t2     st1     a   81 0.37
# 5      yes   t1     st2     a  104 0.51
# 6       no   t1     st2     a  100 0.49
# 7      yes   t2     st2     a  156 0.75
# 8       no   t2     st2     a   52 0.25
# 9      yes   t1     st1     b   61 0.28
# 10      no   t1     st1     b  160 0.72
# 11     yes   t2     st1     b   59 0.26
# 12      no   t2     st1     b  164 0.74
# 13     yes   t1     st2     b   41 0.20
# 14      no   t1     st2     b  160 0.80
# 15     yes   t2     st2     b   48 0.23
# 16      no   t2     st2     b  159 0.77

Solution

  • In base R:

    df$Prop <- with(df, ave(x = Freq, Type, Level, Subtype, FUN = \(x) x / sum(x)))
    # or
    df$Prop <- with(df, Freq / ave(x = Freq, Type, Level, Subtype, FUN = sum))
     
    #    Correct Type Subtype Level Freq      Prop
    # 1      yes   t1     st1     a  115 0.5373832
    # 2       no   t1     st1     a   99 0.4626168
    # 3      yes   t2     st1     a  140 0.6334842
    # 4       no   t2     st1     a   81 0.3665158
    # 5      yes   t1     st2     a  104 0.5098039
    # 6       no   t1     st2     a  100 0.4901961
    # 7      yes   t2     st2     a  156 0.7500000
    # 8       no   t2     st2     a   52 0.2500000
    # 9      yes   t1     st1     b   61 0.2760181
    # 10      no   t1     st1     b  160 0.7239819
    # 11     yes   t2     st1     b   59 0.2645740
    # 12      no   t2     st1     b  164 0.7354260
    # 13     yes   t1     st2     b   41 0.2039801
    # 14      no   t1     st2     b  160 0.7960199
    # 15     yes   t2     st2     b   48 0.2318841
    # 16      no   t2     st2     b  159 0.7681159