I've computed the index_new
column based on specific conditions. However, I'm facing an issue with resetting the calculation to zero
when the variable dry_hours
exceeds 5.
Here is my code:
base_temperature <- 44
df <- df %>%
mutate(dry_hours = ifelse(lwd== 0, sequence(rle(lwd == 0)$lengths), 0)) %>%
mutate(zero_index = lwd == 0 | dry_hours > 5 | temp < 44 | temp > 86) %>%
group_by(event) %>%
mutate(index_new = cumsum(ifelse(zero_index, 0, temp - base_temperature))) %>%
select(-zero_index) %>%
relocate(index, .before = index_new)
Here is the reproducible example
df <- structure(list(event = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2), lwd = c(1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0,
0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1), temp = c(40, 41, 42, 43, 44, 45, 46, 47, 48,
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 40, 41, 42, 43, 44, 45,
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77,
78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90), dry_hours = c(0,
0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
3, 4, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), index = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 5, 11, 18, 26, 35, 45, 56, 68, 81, 95,
110, 110, 110, 110, 110, 130, 151, 173, 196, 220, 245, 245, 245,
245, 245, 245, 0, 0, 33, 67, 102, 138, 175, 213, 252, 292, 333,
375, 375, 375, 375, 375, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 11, 18,
26, 35, 45, 56, 68, 81, 95, 110, 110, 110, 110, 110, 130, 151,
173, 196, 220, 245, 271, 298, 326, 355, 385, 416, 448, 481, 515,
550, 586, 623, 661, 700, 740, 781, 823, 823, 823, 823, 823),
index_new = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 11, 18, 26, 35,
45, 56, 68, 81, 95, 110, 110, 110, 110, 110, 130, 151, 173,
196, 220, 245, 245, 245, 245, 245, 245, 245, 245, 278, 312,
347, 383, 420, 458, 497, 537, 578, 620, 620, 620, 620, 620,
0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 11, 18, 26, 35, 45, 56, 68,
81, 95, 110, 110, 110, 110, 110, 130, 151, 173, 196, 220,
245, 271, 298, 326, 355, 385, 416, 448, 481, 515, 550, 586,
623, 661, 700, 740, 781, 823, 823, 823, 823, 823)),
class = c("grouped_df", "tbl_df", "tbl", "data.frame"), row.names = c(NA, -102L),
groups = structure(list(event = c(1, 2), .rows = structure(list(1:51, 52:102),
ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -2L), .drop = TRUE))
If you want the calculation grouped by event
and to reset whenever dry_hours
exceeds 5, you need to add a count of when dry_hours
exceeds 5 to the grouping. Change group_by(event)
to group_by(event, cumsum(dry_hours > 5))
:
df %>%
mutate(dry_hours = ifelse(lwd== 0, sequence(rle(lwd == 0)$lengths), 0)) %>%
mutate(zero_index = lwd == 0 | dry_hours > 5 | temp < 44 | temp > 86) %>%
group_by(event, cumsum(dry_hours > 5)) %>%
mutate(index_new = cumsum(ifelse(zero_index, 0, temp - base_temperature))) %>%
select(-zero_index) %>%
relocate(index, .before = index_new) |>
ungroup() |>
filter(index != index_new) ## keep only rows that do not match
# A tibble: 0 × 7
# ℹ 7 variables: event <dbl>, lwd <dbl>, temp <dbl>, dry_hours <dbl>, index <dbl>, index_new <dbl>,
# cumsum(dry_hours > 5) <int>
## all rows match!