Search code examples
rdata-analysis

How to apply propt.test on my dataset in R?


I tried to analyzed Aids2 and I want to apply "prop.test" for proportion of infected by "hs" method in men vs. women. How can I do that?

This is a part of my dataset:

  state sex  diag death status T.categ age
1      NSW   M 10905 11081      D      hs  35
2      NSW   M 11029 11096      D      hs  53
3      NSW   M  9551  9983      D      hs  42
4      NSW   M  9577  9654      D    haem  44
5      NSW   M 10015 10290      D      hs  39
6      NSW   M  9971 10344      D      hs  36
7      NSW   M 10746 11135      D   other  36
8      NSW   M 10042 11069      D      hs  31
9      NSW   M 10464 10956      D      hs  26
10     NSW   M 10439 10873      D    hsid  27
11     NSW   M 10416 10432      D      hs  45
12     NSW   M 10216 10524      D      hs  36
13     NSW   M 10385 10477      D      hs  27
14     NSW   M 10366 10631      D      hs  35
15     NSW   M 10452 11504      A      hs  30
16     NSW   M 10552 10684      D      hs  39
17     NSW   M 10673 11200      D      hs  30
18     NSW   M 10923 11504      A    haem  21
19     NSW   M 10993 11504      A      hs  56
20     NSW   M 11020 11171      D      hs  41
21     NSW   M 10805 10877      D      hs  28
22     NSW   M 10996 11504      A      hs  38
23     NSW   M 10738 11504      A     het  26
24     NSW   M 11063 11504      A      id  39
25     NSW   M 10885 11196      D      hs  46
26     NSW   M 11056 11504      A    haem  13
27     NSW   M 11283 11504      A      hs  34
28     NSW   M 11195 11504      A     het  39
29     NSW   M 10848 11504      A      hs  31
30     NSW   M 11289 11504      A  mother   1
31     NSW   F 10961 11504      A      id  30
32     NSW   M 11311 11312      D   blood  37
33     NSW   M 11337 11504      A      hs  38
34     NSW   M 11458 11463      D      hs  33
35     NSW   M 11480 11504      A      hs  30
36     NSW   M 11462 11504      A      hs  40
37     NSW   M  8302  8469      D      hs  51
38     NSW   M  8711  8850      D      hs  29
39     NSW   M  8726  9254      D    hsid  29
40     NSW   M  8760  8959      D    hsid  37
41     NSW   M  8802  8879      D      hs  46
42     NSW   M  8877  9180      D      hs  37
43     NSW   M  9011  9696      D   blood  54
44     NSW   M  8990  9175      D      hs  30
45     NSW   M  9063  9172      D   blood  25
46     NSW   M  9003  9109      D    hsid  26
47     NSW   M  9022  9218      D      hs  41
48     NSW   M  8985  9254      D      hs  41
49     NSW   M  9030  9781      D      hs  27
50     NSW   M  9086  9314      D      hs  35
51     NSW   M  9015  9943      D      hs  35
52     NSW   M  9009  9350      D      hs  25
53     NSW   M  8970  9240      D      hs  34
54     NSW   M  9171  9309      D      hs  35
55     NSW   M  9087  9598      D      hs  33
56     NSW   M  9115  9686      D      hs  31
57     NSW   M  9065  9262      D      hs  43
58     NSW   M  9104  9126      D      hs  59
59     NSW   M  9028  9532      D      hs  31
60     NSW   M  9101  9268      D      hs  41
61     NSW   M  9096  9226      D      hs  34
62     NSW   M  9128  9660      D      hs  37
63     NSW   M  9125  9207      D      hs  31
64     NSW   M  9083  9682      D      hs  37
65     NSW   M  9150  9285      D      hs  38
66     NSW   F  9014  9152      D   blood  44
67     NSW   M  9157  9962      D      hs  41
68     NSW   M  9098  9418      D      hs  41
69     NSW   M  8913  9082      D      hs  32
70     NSW   M  9141  9222      D      hs  40
71     NSW   M  9158  9920      D      hs  23
72     NSW   M  9167 10461      D      hs  42
73     NSW   M  9244  9379      D      hs  33
74     NSW   M  9138  9565      D      hs  47
75     NSW   M  9222  9536      D      hs  52
76     NSW   M  9272  9290      D      hs  35
77     NSW   M  9131  9392      D      hs  38
78     NSW   M  9236 10013      D      hs  23
79     NSW   M  9145  9250      D      hs  45
80     NSW   M  8964  9300      D    haem  48
81     NSW   M  9207  9768      D      hs  32
82     NSW   M  9240  9447      D      hs  38
83     NSW   M  9281  9723      D      hs  25
84     NSW   M  9300  9736      D      hs  36
85     NSW   M  9294 10070      D      hs  39
86     NSW   F  9258  9259      D   blood  25
87     NSW   M  9145  9436      D      hs  33
88     NSW   M  9310  9533      D      hs  35
89     NSW   M  9344 11320      D      hs  49
90     NSW   M  9185  9214      D      hs  38
91     NSW   M  9247  9549      D      hs  30
92     NSW   M  9201  9315      D      hs  44
93     NSW   F  9349  9392      D   blood  55
94     NSW   M  9246  9956      D      hs  31
95     NSW   M  9273 10018      D      hs  32
96     NSW   M  9241  9576      D      hs  29
97     NSW   M  9264  9451      D      hs  42
98     NSW   M  9310  9730      D      hs  28

Can you help me, because I'm beginner in data analysis and I don't know how to apply this type of test ("prop.test).

Thank you in advances for your help!


Solution

  • I'm using your example dataset:

    df = read.table(text = "
    state sex  diag death status T.categ age
    1      NSW   M 10905 11081      D      hs  35
    2      NSW   M 11029 11096      D      hs  53
    3      NSW   M  9551  9983      D      hs  42
    4      NSW   M  9577  9654      D    haem  44
    5      NSW   M 10015 10290      D      hs  39
    6      NSW   M  9971 10344      D      hs  36
    7      NSW   M 10746 11135      D   other  36
    8      NSW   M 10042 11069      D      hs  31
    9      NSW   M 10464 10956      D      hs  26
    10     NSW   M 10439 10873      D    hsid  27
    11     NSW   M 10416 10432      D      hs  45
    12     NSW   M 10216 10524      D      hs  36
    13     NSW   M 10385 10477      D      hs  27
    14     NSW   M 10366 10631      D      hs  35
    15     NSW   M 10452 11504      A      hs  30
    16     NSW   M 10552 10684      D      hs  39
    17     NSW   M 10673 11200      D      hs  30
    18     NSW   M 10923 11504      A    haem  21
    19     NSW   M 10993 11504      A      hs  56
    20     NSW   M 11020 11171      D      hs  41
    21     NSW   M 10805 10877      D      hs  28
    22     NSW   M 10996 11504      A      hs  38
    23     NSW   M 10738 11504      A     het  26
    24     NSW   M 11063 11504      A      id  39
    25     NSW   M 10885 11196      D      hs  46
    26     NSW   M 11056 11504      A    haem  13
    27     NSW   M 11283 11504      A      hs  34
    28     NSW   M 11195 11504      A     het  39
    29     NSW   M 10848 11504      A      hs  31
    30     NSW   M 11289 11504      A  mother   1
    31     NSW   F 10961 11504      A      id  30
    32     NSW   M 11311 11312      D   blood  37
    33     NSW   M 11337 11504      A      hs  38
    34     NSW   M 11458 11463      D      hs  33
    35     NSW   M 11480 11504      A      hs  30
    36     NSW   M 11462 11504      A      hs  40
    37     NSW   M  8302  8469      D      hs  51
    38     NSW   M  8711  8850      D      hs  29
    39     NSW   M  8726  9254      D    hsid  29
    40     NSW   M  8760  8959      D    hsid  37
    41     NSW   M  8802  8879      D      hs  46
    42     NSW   M  8877  9180      D      hs  37
    43     NSW   M  9011  9696      D   blood  54
    44     NSW   M  8990  9175      D      hs  30
    45     NSW   M  9063  9172      D   blood  25
    46     NSW   M  9003  9109      D    hsid  26
    47     NSW   M  9022  9218      D      hs  41
    48     NSW   M  8985  9254      D      hs  41
    49     NSW   M  9030  9781      D      hs  27
    50     NSW   M  9086  9314      D      hs  35
    51     NSW   M  9015  9943      D      hs  35
    52     NSW   M  9009  9350      D      hs  25
    53     NSW   M  8970  9240      D      hs  34
    54     NSW   M  9171  9309      D      hs  35
    55     NSW   M  9087  9598      D      hs  33
    56     NSW   M  9115  9686      D      hs  31
    57     NSW   M  9065  9262      D      hs  43
    58     NSW   M  9104  9126      D      hs  59
    59     NSW   M  9028  9532      D      hs  31
    60     NSW   M  9101  9268      D      hs  41
    61     NSW   M  9096  9226      D      hs  34
    62     NSW   M  9128  9660      D      hs  37
    63     NSW   M  9125  9207      D      hs  31
    64     NSW   M  9083  9682      D      hs  37
    65     NSW   M  9150  9285      D      hs  38
    66     NSW   F  9014  9152      D   blood  44
    67     NSW   M  9157  9962      D      hs  41
    68     NSW   M  9098  9418      D      hs  41
    69     NSW   M  8913  9082      D      hs  32
    70     NSW   M  9141  9222      D      hs  40
    71     NSW   M  9158  9920      D      hs  23
    72     NSW   M  9167 10461      D      hs  42
    73     NSW   M  9244  9379      D      hs  33
    74     NSW   M  9138  9565      D      hs  47
    75     NSW   M  9222  9536      D      hs  52
    76     NSW   M  9272  9290      D      hs  35
    77     NSW   M  9131  9392      D      hs  38
    78     NSW   M  9236 10013      D      hs  23
    79     NSW   M  9145  9250      D      hs  45
    80     NSW   M  8964  9300      D    haem  48
    81     NSW   M  9207  9768      D      hs  32
    82     NSW   M  9240  9447      D      hs  38
    83     NSW   M  9281  9723      D      hs  25
    84     NSW   M  9300  9736      D      hs  36
    85     NSW   M  9294 10070      D      hs  39
    86     NSW   F  9258  9259      D   blood  25
    87     NSW   M  9145  9436      D      hs  33
    88     NSW   M  9310  9533      D      hs  35
    89     NSW   M  9344 11320      D      hs  49
    90     NSW   M  9185  9214      D      hs  38
    91     NSW   M  9247  9549      D      hs  30
    92     NSW   M  9201  9315      D      hs  44
    93     NSW   F  9349  9392      D   blood  55
    94     NSW   M  9246  9956      D      hs  31
    95     NSW   M  9273 10018      D      hs  32
    96     NSW   M  9241  9576      D      hs  29
    97     NSW   M  9264  9451      D      hs  42
    98     NSW   M  9310  9730      D      hs  28
    ", header=T, stringsAsFactors=F)
    

    You can create a dataset with the counts of hs / other infections by gender and apply the prop.test function

    library(dplyr)
    library(tidyr)
    
    df_counts = df %>%
      mutate(T.categ2 = ifelse(T.categ == "hs", T.categ, "other")) %>%  # flag hs or other
      count(sex, T.categ2) %>%                                          # count how many times each sex was infected by hs or other
      spread(T.categ2, n, fill = 0) %>%                                 # reshape dataset
      mutate(Total = hs+other)                                          # count totals
    
    # see how your aggregated dataset with counts looks like
    df_counts
    
    # # A tibble: 2 x 4
    #   sex      hs other Total
    #   <chr> <dbl> <dbl> <dbl>
    # 1 F       0    4.00  4.00
    # 2 M      78.0 16.0  94.0
    
    # apply prop test using the appropriate columns
    prop.test(df_counts$hs, df_counts$Total)