I need help writing either a for loop or dply code for two things:
The data sett looks like this:
Unit Date Prcent_daily.mortality Date.treatment
A 20.07.2020 0.2 NA
A 21.07.2020 0 NA
A 22.07.2020 0.4 NA
A 23.07.2020 0.3 NA
A 24.07.2020 0.6 NA
A 25.07.2020 0.05 NA
A 26.07.2020 0 NA
A 27.07.2020 0 NA
A 28.07.2020 0.01 28.07.2020
A 29.07.2020 0.1 NA
A 30.07.2020 0.2 NA
A 31.07.2020 0 NA
A 01.08.2020 0.2 NA
A 02.08.2020 0.3 NA
A 03.08.2020 0.3 NA
A 04.08.2020 0.05 NA
A 05.08.2020 0 NA
A 06.08.2020 0 NA
A 07.08.2020 0.01 05.08.2020
A 08.08.2020 0.1 NA
A 09.08.2020 0.2 NA
And I want to achieve this:
Unit Date Prcent_daily.mortality Date.treatment akkum.7dbt days.post.treatment
A 20.07.2020 0.2 NA NA NA
A 21.07.2020 0 NA 1.35 NA
A 22.07.2020 0.4 NA 1.35 NA
A 23.07.2020 0.3 NA 1.35 NA
A 24.07.2020 0.6 NA 1.35 NA
A 25.07.2020 0.05 NA 1.35 NA
A 26.07.2020 0 NA 1.35 NA
A 27.07.2020 0 NA 1.35 NA
A 28.07.2020 0.01 28.07.2020 1.35 0
A 29.07.2020 0.1 NA NA 1
A 30.07.2020 0.2 NA NA 2
A 31.07.2020 0 NA 0.85 3
A 01.08.2020 0.2 NA 0.85 4
A 02.08.2020 0.3 NA 0.85 5
A 03.08.2020 0.3 NA 0.85 6
A 04.08.2020 0.05 NA 0.85 7
A 05.08.2020 0 NA 0.85 8
A 06.08.2020 0 NA 0.85 9
A 07.08.2020 0.01 05.08.2020 0.85 0
A 08.08.2020 0.1 NA NA 1
A 09.08.2020 0.2 NA NA 2
Thanks for all help form a self learned R amateur.
Try this solution which combines base R
and dplyr
:
library(dplyr)
library(tidyr)
#Create empty col for index
i1 <- which(!is.na(df$Date.treatment))
i2 <- i1-7
i1 <- i1-1
i3 <- 1:length(i1)
#Create index for second var
j1 <- which(!is.na(df$Date.treatment))
j2 <- 1:length(j1)
# i3 <- i1+1
df$Var <- NA
df$Var[i1]<-i3
df$Var[i2]<-i3
df$Var[1] <- 0
df$Var <- ifelse(!is.na(df$Date.treatment),0,df$Var)
#Fill
df %>% fill(Var) -> df1
#Create aggregations
df1 %>% filter(Var!=0) %>% group_by(Var) %>% mutate(Cum=cumsum(Prcent_daily.mortality)) %>%
filter(Cum==max(Cum)) %>% filter(!duplicated(Cum)) %>% ungroup() %>% select(c(Unit,Cum)) -> Ag1
#Create another var
df$Var2 <- NA
df$Var2[j1] <- j2
df$Var2[1] <- 0
#Fill
df %>% fill(Var2) -> df2
#Create cums and days
df2 %>% group_by(Unit,Var2) %>% mutate(Day=(1:n())-1) %>% ungroup() %>% select(-c(Var2)) -> df3
#Empty var for cums
df3$Cum <- NA
df3$Cum[i1+1] <- Ag1$Cum
#Fill 2
df3 %>% fill(Cum,.direction = 'up') -> df4
#Some adjusts
df4$Day[1:i1[1]]<-NA
df4$Cum[1] <- NA
df4$Cum <- ifelse((df4$Day==1 | df4$Day==2) & !is.na(df4$Day),NA,df4$Cum)
This will produce:
Unit Date Prcent_daily.mortality Date.treatment Var Day Cum
1 A 20.07.2020 0.20 <NA> 0 NA NA
2 A 21.07.2020 0.00 <NA> 1 NA 1.35
3 A 22.07.2020 0.40 <NA> NA NA 1.35
4 A 23.07.2020 0.30 <NA> NA NA 1.35
5 A 24.07.2020 0.60 <NA> NA NA 1.35
6 A 25.07.2020 0.05 <NA> NA NA 1.35
7 A 26.07.2020 0.00 <NA> NA NA 1.35
8 A 27.07.2020 0.00 <NA> 1 NA 1.35
9 A 28.07.2020 0.01 28.07.2020 0 0 1.35
10 A 29.07.2020 0.10 <NA> NA 1 NA
11 A 30.07.2020 0.20 <NA> NA 2 NA
12 A 31.07.2020 0.00 <NA> 2 3 0.85
13 A 01.08.2020 0.20 <NA> NA 4 0.85
14 A 02.08.2020 0.30 <NA> NA 5 0.85
15 A 03.08.2020 0.30 <NA> NA 6 0.85
16 A 04.08.2020 0.05 <NA> NA 7 0.85
17 A 05.08.2020 0.00 <NA> NA 8 0.85
18 A 06.08.2020 0.00 <NA> 2 9 0.85
19 A 07.08.2020 0.01 05.08.2020 0 0 0.85
20 A 08.08.2020 0.10 <NA> NA 1 NA
21 A 09.08.2020 0.20 <NA> NA 2 NA
Update: Working on df4
you can get the cumsum of Prcent_daily.mortality
with next code:
#You can work with df4 to complete the rest of aggregations
#First create an dpuplicate var
df4$DateD <- df4$Date.treatment
#Now fill and mutate
df4 %>% fill(DateD) -> df4
#Create index for replacement
k <- df4$Date.treatment==df4$DateD & !is.na(df4$Date.treatment)
#Assign a value for aggregations not considered
df4$DateD[k]<-'NULL'
#Cumsum
df4 %>% group_by(DateD) %>% mutate(CumAfter=cumsum(Prcent_daily.mortality)) -> df4
#Now remove redundant values in the cum and drop the reference var
df4 %>% ungroup() %>% mutate(CumAfter=ifelse(is.na(DateD) | DateD=='NULL',NA,CumAfter)) %>%
select(-DateD) -> df4
The output will be next:
Unit Date Prcent_daily.mortality Date.treatment Var Day Cum CumAfter
1 A 20.07.2020 0.20 <NA> 0 NA NA NA
2 A 21.07.2020 0.00 <NA> 1 NA 1.35 NA
3 A 22.07.2020 0.40 <NA> NA NA 1.35 NA
4 A 23.07.2020 0.30 <NA> NA NA 1.35 NA
5 A 24.07.2020 0.60 <NA> NA NA 1.35 NA
6 A 25.07.2020 0.05 <NA> NA NA 1.35 NA
7 A 26.07.2020 0.00 <NA> NA NA 1.35 NA
8 A 27.07.2020 0.00 <NA> 1 NA 1.35 NA
9 A 28.07.2020 0.01 28.07.2020 0 0 1.35 NA
10 A 29.07.2020 0.10 <NA> NA 1 NA 0.10
11 A 30.07.2020 0.20 <NA> NA 2 NA 0.30
12 A 31.07.2020 0.00 <NA> 2 3 0.85 0.30
13 A 01.08.2020 0.20 <NA> NA 4 0.85 0.50
14 A 02.08.2020 0.30 <NA> NA 5 0.85 0.80
15 A 03.08.2020 0.30 <NA> NA 6 0.85 1.10
16 A 04.08.2020 0.05 <NA> NA 7 0.85 1.15
17 A 05.08.2020 0.00 <NA> NA 8 0.85 1.15
18 A 06.08.2020 0.00 <NA> 2 9 0.85 1.15
19 A 07.08.2020 0.01 05.08.2020 0 0 0.85 NA
20 A 08.08.2020 0.10 <NA> NA 1 NA 0.10
21 A 09.08.2020 0.20 <NA> NA 2 NA 0.30