I would like to determine the maximum count of consecutive repeated non-NA Valor
values for each ADM2_PCODE
. Therefore, the idea is to group by ADM2_PCODE
, filter out the NA values, calculate for each Valor
value the highest count of consecutive cases, and select the maximum occurrence between them.
Example dataframe below:
df <- structure(list(Year = c(1981, 1982, 1983, 1984, 1985, 1986,
1981, 1982, 1983, 1984, 1985, 1986,
1981, 1982, 1983, 1984, 1985, 1986), ADM2_PCODE = c(1100015, 1100015, 1100015, 1100015, 1100015, 1100015,
1100016, 1100016, 1100016, 1100016, 1100016, 1100016,
1100017, 1100017, 1100017, 1100017, 1100017, 1100017),
Valor = c(NA, NA, 30, 30, NA, NA,
90, 10, 90, 10, 10, 10,
30, 20, 30, 40, 30, 60), geometry = c("MULTIPOLYGON (((-62.0495 -1...",
"MULTIPOLYGON (((-62.0495 -1...", "MULTIPOLYGON (((-62.0495 -1...",
"MULTIPOLYGON (((-62.0495 -1...", "MULTIPOLYGON (((-62.0495 -1...",
"MULTIPOLYGON (((-62.0495 -1...", "MULTIPOLYGON (((-63.0495 -1...",
"MULTIPOLYGON (((-62.0495 -1...", "MULTIPOLYGON (((-62.0495 -1...",
"MULTIPOLYGON (((-62.0495 -1...", "MULTIPOLYGON (((-62.0495 -1...",
"MULTIPOLYGON (((-62.0495 -1...", "MULTIPOLYGON (((-63.0495 -1...",
"MULTIPOLYGON (((-63.0495 -1...", "MULTIPOLYGON (((-63.0495 -1...",
"MULTIPOLYGON (((-63.0495 -1...", "MULTIPOLYGON (((-63.0495 -1...",
"MULTIPOLYGON (((-63.0495 -1...")), row.names = c(NA, -18L), class = c("tbl_df", "tbl", "data.frame"))
Input:
df
# A tibble: 18 x 4
Year ADM2_PCODE Valor geometry
<dbl> <dbl> <dbl> <chr>
1 1981 1100015 NA MULTIPOLYGON (((-62.0495 -1...
2 1982 1100015 NA MULTIPOLYGON (((-62.0495 -1...
3 1983 1100015 30 MULTIPOLYGON (((-62.0495 -1...
4 1984 1100015 30 MULTIPOLYGON (((-62.0495 -1...
5 1985 1100015 NA MULTIPOLYGON (((-62.0495 -1...
6 1986 1100015 NA MULTIPOLYGON (((-62.0495 -1...
7 1981 1100016 90 MULTIPOLYGON (((-63.0495 -1...
8 1982 1100016 10 MULTIPOLYGON (((-62.0495 -1...
9 1983 1100016 90 MULTIPOLYGON (((-62.0495 -1...
10 1984 1100016 10 MULTIPOLYGON (((-62.0495 -1...
11 1985 1100016 10 MULTIPOLYGON (((-62.0495 -1...
12 1986 1100016 10 MULTIPOLYGON (((-62.0495 -1...
13 1981 1100017 10 MULTIPOLYGON (((-63.0495 -1...
14 1982 1100017 20 MULTIPOLYGON (((-63.0495 -1...
15 1983 1100017 30 MULTIPOLYGON (((-63.0495 -1...
16 1984 1100017 40 MULTIPOLYGON (((-63.0495 -1...
17 1985 1100017 50 MULTIPOLYGON (((-63.0495 -1...
18 1986 1100017 60 MULTIPOLYGON (((-63.0495 -1...
Output expected:
ADM2_PCODE max_consecutive_values
<dbl> <lgl>
1 1100015 2
2 1100016 3
3 1100017 1
Using data.table
rleid
to keep track of consecutive values you can do -
library(dplyr)
library(data.table)
df %>%
filter(!is.na(Valor)) %>%
group_by(ADM2_PCODE) %>%
mutate(grp = rleid(Valor)) %>%
count(grp) %>%
summarise(max_consecutive_values = max(n))
# ADM2_PCODE max_consecutive_values
# <dbl> <int>
#1 1100015 2
#2 1100016 3
#3 1100017 1