Search code examples
rdata.tablechi-squaredproportions

Chisq.test/prop.test on vectors of different lengths


I am trying to do a chisq.test or a prop.test on data where some patients are surgical and other are non-surgical. I want to see if the proportions of the two sexes are equal in both groups. What is the most appropriate test in this instance where my surgical and non-surgical group sizes are not equivalent?

I am trying to use the following code, using data.table but I get the error also found below:

> chisq.test(lateral[Surgery == "No", "sex",with=F], lateral[Surgery == "Yes", "sex",with=F])

Error in chisq.test(lateral[Surgery == "No", "sex", with = F], lateral[Surgery ==  : 
  'x' and 'y' must have the same length

For prop.test():
Error in complete.cases(x, n) : not all arguments have the same length

Data

setDT(structure(list(sex = c(2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L), Surgery = c("No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes" )), row.names = c(NA, -239L), class = c("data.table", "data.frame" )))

Solution

  • You can pass a table to the function chisq.test, making it more simple. For example

    df <- structure(list(sex = c(2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 
                           1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 
                           2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
                           1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 
                           1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
                           2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
                           1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 
                           2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 
                           1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                           2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
                           1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
                           2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
                           1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 
                           1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 
                           2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
                           2L, 2L, 2L, 1L, 2L), Surgery = c("No", "No", "No", "No", "No", 
                                                            "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
                                                            "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
                                                            "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
                                                            "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
                                                            "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
                                                            "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
                                                            "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
                                                            "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
                                                            "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
                                                            "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
                                                            "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
                                                            "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
                                                            "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
                                                            "No", "No", "No", "No", "No", "No", "Yes", "Yes", "Yes", "Yes", 
                                                            "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
                                                            "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
                                                            "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
                                                            "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
                                                            "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
                                                            "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
                                                            "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
                                                            "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
                                                            "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"
                           )), row.names = c(NA, -239L), class = c("data.table", "data.frame"))
    
    df$sex <- as.factor(df$sex)
    df$Surgery <- as.factor(df$Surgery)
    (prop_table <- table(df))
    # Surgery
    # sex No Yes
    # 1 97  66
    # 2 57  19
    mod1 <- chisq.test(prop_table)
    mod1
    # Pearson's Chi-squared test with Yates' continuity correction
    # 
    # data:  prop_table
    # X-squared = 4.7727, df = 1, p-value = 0.02892
    str(mod1)
    # List of 9
    # $ statistic: Named num 4.77
    # ..- attr(*, "names")= chr "X-squared"
    # $ parameter: Named int 1
    # ..- attr(*, "names")= chr "df"
    # $ p.value  : num 0.0289
    # $ method   : chr "Pearson's Chi-squared test with Yates' continuity correction"
    # $ data.name: chr "prop_table"
    # $ observed : 'table' int [1:2, 1:2] 97 57 66 19
    # ..- attr(*, "dimnames")=List of 2
    # .. ..$ sex    : chr [1:2] "1" "2"
    # .. ..$ Surgery: chr [1:2] "No" "Yes"
    # $ expected : num [1:2, 1:2] 105 49 58 27
    # ..- attr(*, "dimnames")=List of 2
    # .. ..$ sex    : chr [1:2] "1" "2"
    # .. ..$ Surgery: chr [1:2] "No" "Yes"
    # $ residuals: 'table' num [1:2, 1:2] -0.783 1.147 1.055 -1.544
    # ..- attr(*, "dimnames")=List of 2
    # .. ..$ sex    : chr [1:2] "1" "2"
    # .. ..$ Surgery: chr [1:2] "No" "Yes"
    # $ stdres   : 'table' num [1:2, 1:2] -2.33 2.33 2.33 -2.33
    # ..- attr(*, "dimnames")=List of 2
    # .. ..$ sex    : chr [1:2] "1" "2"
    # .. ..$ Surgery: chr [1:2] "No" "Yes"
    # - attr(*, "class")= chr "htest"