I want to know if there is a difference between mean distance between pairs of individuals across months. This is a repeated measures situation with a non normally distributed response variable. So, I am trying to run a Friedman test, but I can't get it to work and do not understand the error. I have made sure each pair within month is unreplicated. Any advice? Thanks
dput(head(cohorts,10))
structure(list(DeerCow = structure(c(1L, 1L, 1L, 2L, 2L, 3L,
3L, 3L, 4L, 4L), .Label = c("A82118_A632", "A82118_A633", "A82118_A635",
"A82118_A636", "A82120_A629", "A82120_A630", "A82120_A631", "A82120_A633",
"A82120_A634", "A82120_A637", "A82121_A628", "A82121_A629", "A82121_A630",
"A82121_A631", "A82121_A633", "A82121_A634", "A82121_A637", "A82122_A632",
"A82122_A633", "A82122_A635", "A82122_A636", "A82126_A629", "A82126_A630",
"A82126_A631", "A82126_A633", "A82126_A634", "A82126_A637", "A82127_A628",
"A82127_A632", "A82127_A633", "A82127_A636", "A82132_A628", "A82132_A629",
"A82132_A630", "A82132_A631", "A82132_A632", "A82132_A633", "A82132_A636",
"A82135_A629", "A82135_A630", "A82135_A631", "A82135_A632", "A82135_A633",
"A82135_A634", "A82135_A635", "A82135_A636", "A82135_A637", "A82136_A628",
"A82136_A630", "A82136_A631", "A82136_A632", "A82136_A634", "A82136_A636",
"A82139_A628", "A82139_A629", "A82139_A630", "A82139_A631", "A82139_A633",
"A82139_A634", "A82139_A637", "A82140_A628", "A82140_A629", "A82140_A630",
"A82140_A631", "A82140_A632", "A82140_A633", "A82140_A634", "A82140_A636",
"A82140_A637", "A82141_A632", "A82141_A633", "A82141_A635", "A82141_A636",
"A82142_A628", "A82142_A630", "A82142_A631", "A82142_A632", "A82142_A633",
"A82142_A634", "A82142_A636", "A82145_A628", "A82145_A630", "A82145_A631",
"A82145_A632", "A82145_A633", "A82145_A634", "A82145_A636", "A82146_A628",
"A82146_A629", "A82146_A631", "A82146_A632", "A82146_A633", "A82146_A634",
"A82146_A635", "A82146_A636", "A82146_A637"), class = "factor"),
YearMonth = structure(c(1L, 2L, 3L, 1L, 2L, 1L, 2L, 3L, 1L,
2L), .Label = c("2019-06", "2019-07", "2019-08", "2019-09"
), class = "factor"), Mean_dist_m = c(1612.14219548009, 1870.02550251614,
2108.91771044161, 2435.58097649919, 2909.78460371203, 3311.10358068401,
2661.09192488057, 2378.49371438137, 1336.20465251428, 1637.15035958527
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L), groups = structure(list(DeerCow = structure(1:4, .Label = c("A82118_A632",
"A82118_A633", "A82118_A635", "A82118_A636", "A82120_A629", "A82120_A630",
"A82120_A631", "A82120_A633", "A82120_A634", "A82120_A637", "A82121_A628",
"A82121_A629", "A82121_A630", "A82121_A631", "A82121_A633", "A82121_A634",
"A82121_A637", "A82122_A632", "A82122_A633", "A82122_A635", "A82122_A636",
"A82126_A629", "A82126_A630", "A82126_A631", "A82126_A633", "A82126_A634",
"A82126_A637", "A82127_A628", "A82127_A632", "A82127_A633", "A82127_A636",
"A82132_A628", "A82132_A629", "A82132_A630", "A82132_A631", "A82132_A632",
"A82132_A633", "A82132_A636", "A82135_A629", "A82135_A630", "A82135_A631",
"A82135_A632", "A82135_A633", "A82135_A634", "A82135_A635", "A82135_A636",
"A82135_A637", "A82136_A628", "A82136_A630", "A82136_A631", "A82136_A632",
"A82136_A634", "A82136_A636", "A82139_A628", "A82139_A629", "A82139_A630",
"A82139_A631", "A82139_A633", "A82139_A634", "A82139_A637", "A82140_A628",
"A82140_A629", "A82140_A630", "A82140_A631", "A82140_A632", "A82140_A633",
"A82140_A634", "A82140_A636", "A82140_A637", "A82141_A632", "A82141_A633",
"A82141_A635", "A82141_A636", "A82142_A628", "A82142_A630", "A82142_A631",
"A82142_A632", "A82142_A633", "A82142_A634", "A82142_A636", "A82145_A628",
"A82145_A630", "A82145_A631", "A82145_A632", "A82145_A633", "A82145_A634",
"A82145_A636", "A82146_A628", "A82146_A629", "A82146_A631", "A82146_A632",
"A82146_A633", "A82146_A634", "A82146_A635", "A82146_A636", "A82146_A637"
), class = "factor"), .rows = structure(list(1:3, 4:5, 6:8, 9:10), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -4L), .drop = TRUE))
hist(cohorts$Mean_dist_m)
friedman.test(y=cohorts$Mean_dist_m, groups=cohorts$YearMonth, blocks=cohorts$DeerCow)
Error in friedman.test.default(y = cohorts$Mean_dist_m, groups = cohorts$YearMonth, :
not an unreplicated complete block design
If you look at the code, the part that is throwing the error is:
if (any(table(groups, blocks) != 1))
stop("not an unreplicated complete block design")
If you look at your data and do the appropriate table()
operation, you get:
table(cohorts$YearMonth, cohorts$DeerCow)
# A82118_A632 A82118_A633 A82118_A635 A82118_A636 A82120_A629 A82120_A630 A82120_A631 A82120_A633
# 2019-06 1 1 1 1 0 0 0 0
# 2019-07 1 1 1 1 0 0 0 0
# 2019-08 1 0 1 0 0 0 0 0
# 2019-09 0 0 0 0 0 0 0 0
#
# A82120_A634 A82120_A637 A82121_A628 A82121_A629 A82121_A630 A82121_A631 A82121_A633 A82121_A634
# 2019-06 0 0 0 0 0 0 0 0
# 2019-07 0 0 0 0 0 0 0 0
# 2019-08 0 0 0 0 0 0 0 0
# 2019-09 0 0 0 0 0 0 0 0
#
# A82121_A637 A82122_A632 A82122_A633 A82122_A635 A82122_A636 A82126_A629 A82126_A630 A82126_A631
# 2019-06 0 0 0 0 0 0 0 0
# 2019-07 0 0 0 0 0 0 0 0
# 2019-08 0 0 0 0 0 0 0 0
# 2019-09 0 0 0 0 0 0 0 0
#
# A82126_A633 A82126_A634 A82126_A637 A82127_A628 A82127_A632 A82127_A633 A82127_A636 A82132_A628
# 2019-06 0 0 0 0 0 0 0 0
# 2019-07 0 0 0 0 0 0 0 0
# 2019-08 0 0 0 0 0 0 0 0
# 2019-09 0 0 0 0 0 0 0 0
#
# A82132_A629 A82132_A630 A82132_A631 A82132_A632 A82132_A633 A82132_A636 A82135_A629 A82135_A630
# 2019-06 0 0 0 0 0 0 0 0
# 2019-07 0 0 0 0 0 0 0 0
# 2019-08 0 0 0 0 0 0 0 0
# 2019-09 0 0 0 0 0 0 0 0
#
# A82135_A631 A82135_A632 A82135_A633 A82135_A634 A82135_A635 A82135_A636 A82135_A637 A82136_A628
# 2019-06 0 0 0 0 0 0 0 0
# 2019-07 0 0 0 0 0 0 0 0
# 2019-08 0 0 0 0 0 0 0 0
# 2019-09 0 0 0 0 0 0 0 0
#
# A82136_A630 A82136_A631 A82136_A632 A82136_A634 A82136_A636 A82139_A628 A82139_A629 A82139_A630
# 2019-06 0 0 0 0 0 0 0 0
# 2019-07 0 0 0 0 0 0 0 0
# 2019-08 0 0 0 0 0 0 0 0
# 2019-09 0 0 0 0 0 0 0 0
#
# A82139_A631 A82139_A633 A82139_A634 A82139_A637 A82140_A628 A82140_A629 A82140_A630 A82140_A631
# 2019-06 0 0 0 0 0 0 0 0
# 2019-07 0 0 0 0 0 0 0 0
# 2019-08 0 0 0 0 0 0 0 0
# 2019-09 0 0 0 0 0 0 0 0
#
# A82140_A632 A82140_A633 A82140_A634 A82140_A636 A82140_A637 A82141_A632 A82141_A633 A82141_A635
# 2019-06 0 0 0 0 0 0 0 0
# 2019-07 0 0 0 0 0 0 0 0
# 2019-08 0 0 0 0 0 0 0 0
# 2019-09 0 0 0 0 0 0 0 0
#
# A82141_A636 A82142_A628 A82142_A630 A82142_A631 A82142_A632 A82142_A633 A82142_A634 A82142_A636
# 2019-06 0 0 0 0 0 0 0 0
# 2019-07 0 0 0 0 0 0 0 0
# 2019-08 0 0 0 0 0 0 0 0
# 2019-09 0 0 0 0 0 0 0 0
#
# A82145_A628 A82145_A630 A82145_A631 A82145_A632 A82145_A633 A82145_A634 A82145_A636 A82146_A628
# 2019-06 0 0 0 0 0 0 0 0
# 2019-07 0 0 0 0 0 0 0 0
# 2019-08 0 0 0 0 0 0 0 0
# 2019-09 0 0 0 0 0 0 0 0
#
# A82146_A629 A82146_A631 A82146_A632 A82146_A633 A82146_A634 A82146_A635 A82146_A636 A82146_A637
# 2019-06 0 0 0 0 0 0 0 0
# 2019-07 0 0 0 0 0 0 0 0
# 2019-08 0 0 0 0 0 0 0 0
# 2019-09 0 0 0 0 0 0 0 0
The output is so long because you still have all the factor levels attached. If you use droplevels()
, you'll see that you still don't pass the test:
table(droplevels(cohorts$YearMonth), droplevels(cohorts$DeerCow))
# A82118_A632 A82118_A633 A82118_A635 A82118_A636
# 2019-06 1 1 1 1
# 2019-07 1 1 1 1
# 2019-08 1 0 1 0
If you filter the data to exclude 2019-08
, then you pass the test and the Friedman.test()
function works.
tmp <- subset(cohorts, YearMonth != "2019-08")
table(droplevels(tmp$YearMonth), droplevels(tmp$DeerCow))
# A82118_A632 A82118_A633 A82118_A635 A82118_A636
# 2019-06 1 1 1 1
# 2019-07 1 1 1 1
friedman.test(y=tmp$Mean_dist_m, groups=droplevels(tmp$YearMonth), blocks=droplevels(tmp$DeerCow))
# Friedman rank sum test
#
# data: tmp$Mean_dist_m, droplevels(tmp$YearMonth) and droplevels(tmp$DeerCow)
# Friedman chi-squared = 1, df = 1, p-value = 0.3173
The key is to look at the cross-tabulation of group
and block
variables and if there are any values in the table that are note 1
, then your design does not meet the requirements of Friedman.test()
.