I have data that looks like:
intial<-
tibble(
start_date=
rep(seq.Date(as.Date("2021-06-01"),as.Date("2021-10-01"),by="months"),4),
end_date=
rep(seq.Date(as.Date("2021-07-01"),as.Date("2021-11-01"),by="months"),4),
id=
rep(c(rep(1,5),rep(2,5)),2),
group=
c(rep("a",10),rep("b",10)),
increase=
c(
c(4:8),
c(5:9),
c(6:10),
c(7:11)
),
decrease=
c(
c(1:5),
c(2:6),
c(3:7),
c(4:8)
),
start_count=
c(c(10,13,NA,NA,NA),c(15,18,NA,NA,NA),c(20,23,NA,NA,NA),c(25,28,NA,NA,NA)),
end_count=
c(c(13,16,NA,NA,NA),c(18,21,NA,NA,NA),c(23,26,NA,NA,NA),c(28,31,NA,NA,NA))
)
print(initial)
start_date end_date id group increase decrease start_count end_count
<date> <date> <dbl> <chr> <int> <int> <dbl> <dbl>
1 2021-06-01 2021-07-01 1 a 4 1 10 13
2 2021-07-01 2021-08-01 1 a 5 2 13 16
3 2021-08-01 2021-09-01 1 a 6 3 NA NA
4 2021-09-01 2021-10-01 1 a 7 4 NA NA
5 2021-10-01 2021-11-01 1 a 8 5 NA NA
6 2021-06-01 2021-07-01 2 a 5 2 15 18
7 2021-07-01 2021-08-01 2 a 6 3 18 21
8 2021-08-01 2021-09-01 2 a 7 4 NA NA
9 2021-09-01 2021-10-01 2 a 8 5 NA NA
10 2021-10-01 2021-11-01 2 a 9 6 NA NA
11 2021-06-01 2021-07-01 1 b 6 3 20 23
12 2021-07-01 2021-08-01 1 b 7 4 23 26
13 2021-08-01 2021-09-01 1 b 8 5 NA NA
14 2021-09-01 2021-10-01 1 b 9 6 NA NA
15 2021-10-01 2021-11-01 1 b 10 7 NA NA
16 2021-06-01 2021-07-01 2 b 7 4 25 28
17 2021-07-01 2021-08-01 2 b 8 5 28 31
18 2021-08-01 2021-09-01 2 b 9 6 NA NA
19 2021-09-01 2021-10-01 2 b 10 7 NA NA
20 2021-10-01 2021-11-01 2 b 11 8 NA NA
My goal is to populate each missing start_count
and end_count
by sequentially populating the missing start_count
with the previous month's end_count
, and calculate the subsequent end_count
with the newly populated start_count
by start_count
+ increase
- decrease
within each unique id
group
.
The final result should look like:
final<-
tibble(
start_date=
rep(seq.Date(as.Date("2021-06-01"),as.Date("2021-10-01"),by="months"),4),
end_date=
rep(seq.Date(as.Date("2021-07-01"),as.Date("2021-11-01"),by="months"),4),
id=
rep(c(rep(1,5),rep(2,5)),2),
group=
c(rep("a",10),rep("b",10)),
increase=
c(
c(4:8),
c(5:9),
c(6:10),
c(7:11)
),
decrease=
c(
c(1:5),
c(2:6),
c(3:7),
c(4:8)
),
start_count=
c(c(10,13,16,19,22),c(15,18,21,24,27),c(20,23,26,29,32),c(25,28,31,34,37)),
end_count=
c(c(13,16,19,22,25),c(18,21,24,27,30),c(23,26,29,32,35),c(28,31,34,37,40))
)
print(final)
start_date end_date id group increase decrease start_count end_count
<date> <date> <dbl> <chr> <int> <int> <dbl> <dbl>
1 2021-06-01 2021-07-01 1 a 4 1 10 13
2 2021-07-01 2021-08-01 1 a 5 2 13 16
3 2021-08-01 2021-09-01 1 a 6 3 16 19
4 2021-09-01 2021-10-01 1 a 7 4 19 22
5 2021-10-01 2021-11-01 1 a 8 5 22 25
6 2021-06-01 2021-07-01 2 a 5 2 15 18
7 2021-07-01 2021-08-01 2 a 6 3 18 21
8 2021-08-01 2021-09-01 2 a 7 4 21 24
9 2021-09-01 2021-10-01 2 a 8 5 24 27
10 2021-10-01 2021-11-01 2 a 9 6 27 30
11 2021-06-01 2021-07-01 1 b 6 3 20 23
12 2021-07-01 2021-08-01 1 b 7 4 23 26
13 2021-08-01 2021-09-01 1 b 8 5 26 29
14 2021-09-01 2021-10-01 1 b 9 6 29 32
15 2021-10-01 2021-11-01 1 b 10 7 32 35
16 2021-06-01 2021-07-01 2 b 7 4 25 28
17 2021-07-01 2021-08-01 2 b 8 5 28 31
18 2021-08-01 2021-09-01 2 b 9 6 31 34
19 2021-09-01 2021-10-01 2 b 10 7 34 37
20 2021-10-01 2021-11-01 2 b 11 8 37 40
Thanks!
Here is a solution. There might be better ones.
library(tidyverse)
initial<-
tibble(
start_date=
rep(seq.Date(as.Date("2021-06-01"),as.Date("2021-10-01"),by="months"),4),
end_date=
rep(seq.Date(as.Date("2021-07-01"),as.Date("2021-11-01"),by="months"),4),
id=
rep(c(rep(1,5),rep(2,5)),2),
group=
c(rep("a",10),rep("b",10)),
increase=
c(
c(4:8),
c(5:9),
c(6:10),
c(7:11)
),
decrease=
c(
c(1:5),
c(2:6),
c(3:7),
c(4:8)
),
start_count=
c(c(10,13,NA,NA,NA),c(15,18,NA,NA,NA),c(20,23,NA,NA,NA),c(25,28,NA,NA,NA)),
end_count=
c(c(13,16,NA,NA,NA),c(18,21,NA,NA,NA),c(23,26,NA,NA,NA),c(28,31,NA,NA,NA))
)
initial %>%
rowid_to_column(var = "obs_id") %>%
mutate(net = increase - decrease) %>%
select(obs_id, id, group, everything()) %>%
arrange(id, group, start_date) %>%
group_by(id, group) %>%
mutate(end_count = cumsum(net) + first(start_count)) %>%
mutate(start_count = end_count - net) %>%
ungroup() %>%
arrange(obs_id) %>%
select(-net)
#> # A tibble: 20 × 9
#> obs_id id group start_date end_date increase decrease start_count
#> <int> <dbl> <chr> <date> <date> <int> <int> <dbl>
#> 1 1 1 a 2021-06-01 2021-07-01 4 1 10
#> 2 2 1 a 2021-07-01 2021-08-01 5 2 13
#> 3 3 1 a 2021-08-01 2021-09-01 6 3 16
#> 4 4 1 a 2021-09-01 2021-10-01 7 4 19
#> 5 5 1 a 2021-10-01 2021-11-01 8 5 22
#> 6 6 2 a 2021-06-01 2021-07-01 5 2 15
#> 7 7 2 a 2021-07-01 2021-08-01 6 3 18
#> 8 8 2 a 2021-08-01 2021-09-01 7 4 21
#> 9 9 2 a 2021-09-01 2021-10-01 8 5 24
#> 10 10 2 a 2021-10-01 2021-11-01 9 6 27
#> 11 11 1 b 2021-06-01 2021-07-01 6 3 20
#> 12 12 1 b 2021-07-01 2021-08-01 7 4 23
#> 13 13 1 b 2021-08-01 2021-09-01 8 5 26
#> 14 14 1 b 2021-09-01 2021-10-01 9 6 29
#> 15 15 1 b 2021-10-01 2021-11-01 10 7 32
#> 16 16 2 b 2021-06-01 2021-07-01 7 4 25
#> 17 17 2 b 2021-07-01 2021-08-01 8 5 28
#> 18 18 2 b 2021-08-01 2021-09-01 9 6 31
#> 19 19 2 b 2021-09-01 2021-10-01 10 7 34
#> 20 20 2 b 2021-10-01 2021-11-01 11 8 37
#> # … with 1 more variable: end_count <dbl>
Created on 2021-08-11 by the reprex package (v2.0.0)