df <- structure(list(ID = c(2L, 2L, 13L, 13L, 13L, 21L, 21L, 21L, 24L,
24L, 24L, 24L), mignr = c(1L, 0L, 1L, 2L, 0L, 0L, 2L, 1L, 2L,
3L, 0L, 1L), start = c(1387L, 903L, 1357L, 1391L, 1087L, 936L,
1367L, 1354L, 1363L, 1392L, 908L, 1361L), end = c(1401L, 1386L,
1390L, 1401L, 1356L, 1353L, 1399L, 1366L, 1391L, 1400L, 1360L,
1362L), staytime = c(15L, 484L, 34L, 11L, 270L, 418L, 33L, 13L,
29L, 9L, 453L, 2L)), row.names = c(NA, -12L), class = "data.frame")
My objective is to reset the sequences in this spell dataset so that they all restart at zero and for mignr greater than one increase by staytime. The data is grouped by IDs and mignr represents the sequence in the spell. I have set start == 0 end == 1 for the first spell (mignr == 0) and would like for each subsequent spell that
df <- df %>%
mutate(start = ifelse(mignr == 0, 0, start)) %>%
mutate(end = ifelse(mignr == 0, 1, end))
max_spell <- max(df$mignr)
for(i in seq_along(1:max_spell)){
j <- i-1
df <- df %>%
mutate(start = ifelse(mignr == i, (df[which(df$ID == ID & df$mignr == j),4]), start)) %>%
mutate(end = ifelse(mignr == i, start + (staytime - 1), end))
}
df
My attempt seems to work for mignr = 1 the start and end values are both as I would expect. However for subsequent values of mignr (>1) the start value is no longer what I would expect it to be. The way I am currently calculating the start is not very clean, but I can't quite tell why for subsequent iterations it is breaking.
I would use cumsum()
on staytime
, but for this to work, I'd change the first staytime
of mignr == 0
to 1
, since that is basically what setting the first start
and end
to 0
and 1
implies. Would that be acceptable?
library(tidyverse)
df %>%
group_by(ID) %>%
arrange(mignr, ,.by_group = TRUE) %>%
mutate(start = (row_number()- 1L),
staytime = ifelse(row_number() == 1, 1, staytime),
end = cumsum(staytime),
start = lag(end + 1, default = first(start)))
#> # A tibble: 12 x 5
#> # Groups: ID [4]
#> ID mignr start end staytime
#> <int> <int> <dbl> <dbl> <dbl>
#> 1 2 0 0 1 1
#> 2 2 1 2 16 15
#> 3 13 0 0 1 1
#> 4 13 1 2 35 34
#> 5 13 2 36 46 11
#> 6 21 0 0 1 1
#> 7 21 1 2 14 13
#> 8 21 2 15 47 33
#> 9 24 0 0 1 1
#> 10 24 1 2 3 2
#> 11 24 2 4 32 29
#> 12 24 3 33 41 9
Data from OP
df <- structure(list(ID = c(2L, 2L, 13L, 13L, 13L, 21L, 21L, 21L, 24L,
24L, 24L, 24L), mignr = c(1L, 0L, 1L, 2L, 0L, 0L, 2L, 1L, 2L,
3L, 0L, 1L), start = c(1387L, 903L, 1357L, 1391L, 1087L, 936L,
1367L, 1354L, 1363L, 1392L, 908L, 1361L), end = c(1401L, 1386L,
1390L, 1401L, 1356L, 1353L, 1399L, 1366L, 1391L, 1400L, 1360L,
1362L), staytime = c(15L, 484L, 34L, 11L, 270L, 418L, 33L, 13L,
29L, 9L, 453L, 2L)), row.names = c(NA, -12L), class = "data.frame")
Created on 2023-02-20 by the reprex package (v2.0.1)