I stumbled upon a difficult task in displaying a cumulative plot with cumulative uncertainty over a time series of greenhouse gas fluxes. My raw dataset is composed of 30-minute fluxes data, which was then aggregated to hourly data using cumsum() function.
My code looks like this:
### ---- Binning Net emission to times. Averaging half-hourly to Hourly Data
str(gp_NEE )
tibble [1,270 × 18] (S3: tbl_df/tbl/data.frame)
$ DateTime: POSIXct[1:1270], format: "2024-06-11 00:00:00" "2024-06-11 00:30:00" ...
$ daytime : num [1:1270] 0 0 0 0 0 0 0 0 0 0 ...
$ NEE : num [1:1270] 1.831 0.702 2.017 8.33 3.872 ...
gp_NEE_hour <- gp_NEE %>%
mutate(hourlyDate = floor_date(DateTime,unit='hour')) %>%
select(-c(DateTime)) %>% # removing Date and Time columns: cannot be averaged
group_by(hourlyDate) %>%
summarize(
across(everything(),mean))
## make cumulative data
gp_NEE_hour<- gp_NEE_hour %>%
mutate(
cum_NEE = cumsum(replace_na(NEE, 0)))
str(gp_NEE_hour)
tibble [635 × 19] (S3: tbl_df/tbl/data.frame)
$ hourlyDate: POSIXct[1:635], format: "2024-06-11 00:00:00" "2024-06-11 01:00:00" "2024-06-11 02:00:00" "2024-06-11 03:00:00" ...
$ daytime : num [1:635] 0 0 0 0 0 0 1 1 1 1 ...
$ NEE : num [1:635] 1.27 5.17 2.58 NA 1.62 ...
$ cum_NEE : num [1:635] 1.27 6.44 9.02 9.02 10.64 ...
cumplot <- gp_NEE_hour %>%
ggplot( aes(x=hourlyDate, y=cum_NEE)) +
geom_line( color="grey") +
geom_point(shape=21, color="black", fill="#69b3a2", size=1) +
theme_bw() +theme(legend.position="none",
text = element_text(size=15))+
labs(y=expression(CO[2]~Flux~Netto~Kumulatif~~"("~g~m^{-2}~~jam^{-1}~")"))+xlab ( "Waktu")
cumplot
I can code for calculating stdev using summarise()
in dplyr, but cannot propagate it over cumulative means. I possess the difficulty in finding the R package or tidyverse' function to develop graph like this (https://doi.org/10.1038/s41561-021-00785-2):
Thank you in advance.
EDIT.
I tried to improve the coding but somehow the geom_ribbon()
can not display the uncertainty.
gp_NEE_hour_no_NA <-gp_NEE %>%
mutate(NEE_Mgha = NEE * 0.24) %>%
select(DateTime, NEE_Mgha) %>%
mutate(hourlyDate = floor_date(DateTime,unit='hour')) %>%
group_by(hourlyDate) %>%
summarise(
meanH_NEE = mean(NEE_Mgha),
sd_NEE = sd(NEE_Mgha),
n = n(),
se_NEE = sd_NEE / sqrt(n),.groups = "drop") %>%
mutate(cum_NEE = cumsum(replace_na(meanH_NEE, 0)))%>%
mutate(cum_se_NEE = sqrt(cumsum(se_NEE^2)))
str(gp_NEE_hour_no_NA)
tibble [635 × 7] (S3: tbl_df/tbl/data.frame)
$ hourlyDate: POSIXct[1:635], format: ...
$ meanH_NEE : num [1:635] 0.304 1.242 0.619 NA 0.39 ...
$ sd_NEE : num [1:635] 0.1915 1.0714 0.4392 NA 0.0646 ...
$ n : int [1:635] 2 2 2 2 2 2 2 2 2 2 ...
$ se_NEE : num [1:635] 0.1354 0.7576 0.3106 NA 0.0457 ...
$ cum_NEE : num [1:635] 0.304 1.546 2.164 2.164 2.554 ...
$ cum_se_NEE: num [1:635] 0.135 0.77 0.83 NA NA ...
plot_NEE2 <- ggplot(data = gp_NEE_hour_no_NA, aes(x = hourlyDate, y = cum_NEE)) +
geom_line() +
geom_ribbon(aes(ymin = cum_NEE - 1.96 * cum_se_NEE, ymax = cum_NEE + 1.96 * cum_se_NEE), alpha = .5,
fill = "darkseagreen3", color = "transparent") +
#geom_point(aes(y = mean),color="aquamarine4")+
ylim(-1.5, 1.5) + theme(text = element_text(size=15))+
labs(y=expression(CO[2]~Flux~Netto~Kumulatif~~"("~Mg~ha^{-1}~~hari^{-1}~")"))+xlab("Waktu") +
theme_bw()
plot_NEE2
Yes, after many modifications and errors, I realized that I had not enough data to propagate errors for cumulative means. So I initially filled the gaps using time-constrained rolling windows. After that, the code worked successfully.
# gap-fill All variables using MDV
EFgp_NEE_mdv <- gp_NEE %>%
group_by(Time) %>%
mutate(across(c(NEE, H, LE, Rg, Tair, PPFD, VPD, Ustar, rH, Tsoil, SWC),
~ ifelse(is.na(.),
mean(.[which(DateTime >= DateTime - 604800 & DateTime <= DateTime + 604800)], na.rm = TRUE),
.))) %>%
ungroup()
write_xlsx(EFgp_NEE_mdv, "D:/Licor7500/EFgp_NEE_mdv.xlsx") # test if the process run well. Inspect the result = OK !!
## make cumulative data
gp_NEE_hour_cum <-EFgp_NEE_mdv %>%
mutate(NEE_Mgha = NEE * 0.01) %>%
select(DateTime, NEE_Mgha) %>%
mutate(hourlyDate = floor_date(DateTime,unit='hour')) %>%
group_by(hourlyDate) %>%
summarise(
meanH_NEE = mean(NEE_Mgha, na.rm = TRUE),
sd_NEE = sd(NEE_Mgha, na.rm = TRUE),
n = sum(!is.na(NEE_Mgha))
) %>%
mutate(se_NEE = sd_NEE / sqrt(n)) %>%
mutate(
cum_NEE = cumsum(replace_na(meanH_NEE, 0)),
cum_se_NEE = sqrt(cumsum(se_NEE^2))
)
str(gp_NEE_hour_cum)
tibble [867 × 7] (S3: tbl_df/tbl/data.frame)
$ hourlyDate: POSIXct[1:867], format: ...
$ meanH_NEE : num [1:867] 0.153 0.456 0.361 0.264 0.193 ...
$ sd_NEE : num [1:867] 0.4305 0.2814 0.3762 0.0306 0.1345 ...
$ n : int [1:867] 2 2 2 2 2 2 2 2 2 2 ...
$ se_NEE : num [1:867] 0.3044 0.199 0.266 0.0216 0.0951 ...
$ cum_NEE : num [1:867] 0.153 0.609 0.97 1.235 1.428 ...
$ cum_se_NEE: num [1:867] 0.304 0.364 0.451 0.451 0.461 ...
plot_NEE2 <- ggplot(data = gp_NEE_hour_cum, aes(x = hourlyDate, y = cum_NEE)) +
geom_line(aes(group = 1), size = 1.5, color = "black", stroke = 2) +
geom_ribbon(aes(y = cum_NEE, ymin = cum_NEE - 1.96 * cum_se_NEE, ymax = cum_NEE + 1.96 * cum_se_NEE),
alpha = 0.5, fill = "darkseagreen3") +
# geom_point(aes(y = mean), color = "aquamarine4") +
theme_bw() +
ylim(0,8)+
theme(text = element_text(size = 15)) +
labs(y = expression(CO[2] ~ Flux ~ Netto ~~ " (" ~ Mg ~ ha^{-1} ~ ")"),
x = "")+
scale_x_datetime(breaks = seq(min(gp_NEE_hour_cum$hourlyDate), max(gp_NEE_hour_cum$hourlyDate), by = "1 days"),
labels = function(x) format(x, "%Y-%m-%d %H:%M"))+
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 10))
plot_NEE2
Finally the nice keeling-like graph
I leave this for new scientist who studying GHG fluxes.