I have a df with strings and numbers. I need to filter the max value by group excluding the year (date type).
I did this:
test
type Process RegionName Time Level a b c d e f g h i j k l m n o p q r s t u v w
a1 XYZ_1 ABC 2010 fixed 0 0 0 0 0 0 1 957 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
a1 XYZ_2 ABC 2010 fixed 0 0 0 0 0 0 0 61 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0
a1 XYZ_3 ABC 2010 fixed 0 0 0 0 0 0 0 695 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0
a1 XYZ_4 ABC 2010 fixed 0 0 0 0 0 0 0 525 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0
b1 XYZ_5 ABC 2010 fixed 0 0 0 0 0 0 10551 1168053 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
b1 XYZ_6 ABC 2010 fixed 0 0 0 0 0 0 0 7571 0 0 0 30 0 0 0 0 0 0 0 0 0 0 0
b1 XYZ_7 ABC 2010 fixed 0 0 0 0 0 0 0 10883 0 0 0 0 0 51 0 0 0 0 0 0 0 0 0
b1 XYZ_8 ABC 2010 fixed 0 0 0 0 0 0 0 40453 0 0 0 0 0 0 0 196 0 0 0 0 0 0 0
b1 XYZ_9 ABC 2010 fixed 0 0 0 0 0 0 0 24464 0 0 0 0 0 0 0 0 0 0 0 0 0 0 53
c1 XYZ_10 ABC 2010 fixed 0 0 0 0 0 0 0 0 0 0 0 16 0 0 0 0 0 0 0 0 0 0 0
c1 XYZ_11 ABC 2010 fixed 0 0 0 0 0 0 0 129 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
d1 XYZ_12 ABC 2010 fixed 0 0 0 0 0 0 2 1616 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
d1 XYZ_13 ABC 2010 fixed 0 0 0 0 0 0 0 762 0 0 4 0 0 0 0 0 0 0 0 0 0 0 0
d1 XYZ_14 ABC 2010 fixed 0 0 0 0 0 0 0 1002 0 0 0 12 0 0 0 0 0 0 0 0 0 0 0
d1 XYZ_15 ABC 2010 fixed 0 0 0 0 0 0 0 556 0 0 0 0 0 7 0 0 0 0 0 0 0 0 0
d1 XYZ_16 ABC 2010 fixed 0 0 0 0 0 0 0 961647 0 0 0 0 0 0 0 4661 0 0 0 0 0 0 0
d1 XYZ_17 ABC 2010 fixed 0 0 0 0 0 0 0 1381 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3
max_test <- test %>%
group_by(type) %>%
slice(which.max(a:w))
max_test
type Process RegionName Time Level a b c d e f g h i j k
a1 XYZ_1 ABC 2010 fixed 0 0 0 0 0 0 1 957 0 0 0
b1 XYZ_5 ABC 2010 fixed 0 0 0 0 0 0 10551 1168053 0 0 0
c1 XYZ_10 ABC 2010 fixed 0 0 0 0 0 0 0 0 0 0 0
d1 XYZ_12 ABC 2010 fixed 0 0 0 0 0 0 2 1616 0 0 0
a1 and b1 are what I expect. However, c1 and d1 are not.
For c1, I expect XYZ_11 because 129 > 16
For d1, I expect XYZ_16 because 961647 > 1616
Any idea what I am doing wrong?
Note: I have not introduce in the code to avoid Time values. I only do which.max(a:w). So, a1 might be considering 2010 instead of 957 as the highest value?
Using matrixStats::rowMaxs
in by
.
by(dat, dat$ype, \(x) {
x[which.max(matrixStats::rowMaxs(as.matrix(x[-(1:5)]))), ]
}) |> do.call(what='rbind')
# ype Process RegionName Time Level a b c d e f g h i j k l m n o p q r s t u v w
# a1 a1 XYZ_1 ABC 2010 fixed 0 0 0 0 0 0 1 957 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
# b1 b1 XYZ_5 ABC 2010 fixed 0 0 0 0 0 0 10551 1168053 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
# c1 c1 XYZ_11 ABC 2010 fixed 0 0 0 0 0 0 0 129 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
# d1 d1 XYZ_16 ABC 2010 fixed 0 0 0 0 0 0 0 961647 0 0 0 0 0 0 0 4661 0 0 0 0 0 0 0
Data:
dat <- structure(list(ype = c("a1", "a1", "a1", "a1", "b1", "b1", "b1",
"b1", "b1", "c1", "c1", "d1", "d1", "d1", "d1", "d1", "d1"),
Process = c("XYZ_1", "XYZ_2", "XYZ_3", "XYZ_4", "XYZ_5",
"XYZ_6", "XYZ_7", "XYZ_8", "XYZ_9", "XYZ_10", "XYZ_11", "XYZ_12",
"XYZ_13", "XYZ_14", "XYZ_15", "XYZ_16", "XYZ_17"), RegionName = c("ABC",
"ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC",
"ABC", "ABC", "ABC", "ABC", "ABC", "ABC", "ABC"), Time = c(2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L), Level = c("fixed",
"fixed", "fixed", "fixed", "fixed", "fixed", "fixed", "fixed",
"fixed", "fixed", "fixed", "fixed", "fixed", "fixed", "fixed",
"fixed", "fixed"), a = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), b = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), c = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), d = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), e = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), f = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), g = c(1L,
0L, 0L, 0L, 10551L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L,
0L, 0L), h = c(957L, 61L, 695L, 525L, 1168053L, 7571L, 10883L,
40453L, 24464L, 0L, 129L, 1616L, 762L, 1002L, 556L, 961647L,
1381L), i = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L), j = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), k = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 4L, 0L, 0L, 0L, 0L),
l = c(0L, 3L, 0L, 0L, 0L, 30L, 0L, 0L, 0L, 16L, 0L, 0L, 0L,
12L, 0L, 0L, 0L), m = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), n = c(0L, 0L, 2L, 0L, 0L,
0L, 51L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 7L, 0L, 0L), o = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), p = c(0L, 0L, 0L, 3L, 0L, 0L, 0L, 196L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 4661L, 0L), q = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), r = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L
), s = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), t = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), u = c(0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), v = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), w = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 53L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 3L)), class = "data.frame", row.names = c(NA,
-17L))