Search code examples
rggplot2dplyrcumulative-sumtimeserieschart

Plotting Cumulative Uncertainty Graph over Cumulative Means of Time Series Data in R


I stumbled upon a difficult task in displaying a cumulative plot with cumulative uncertainty over a time series of greenhouse gas fluxes. My raw dataset is composed of 30-minute fluxes data, which was then aggregated to hourly data using cumsum() function.

My code looks like this:

### ---- Binning Net emission to times. Averaging half-hourly to Hourly Data 
str(gp_NEE  )
    tibble [1,270 × 18] (S3: tbl_df/tbl/data.frame)
     $ DateTime: POSIXct[1:1270], format: "2024-06-11 00:00:00" "2024-06-11 00:30:00" ...
     $ daytime : num [1:1270] 0 0 0 0 0 0 0 0 0 0 ...
     $ NEE     : num [1:1270] 1.831 0.702 2.017 8.33 3.872 ...

gp_NEE_hour <- gp_NEE %>% 
        mutate(hourlyDate = floor_date(DateTime,unit='hour')) %>%
        select(-c(DateTime)) %>%  # removing Date and Time columns: cannot be averaged
        group_by(hourlyDate) %>% 
        summarize(
                across(everything(),mean))

    ## make cumulative data 
    
    gp_NEE_hour<- gp_NEE_hour %>%
            mutate(
                cum_NEE = cumsum(replace_na(NEE, 0)))
    
    str(gp_NEE_hour)
            tibble [635 × 19] (S3: tbl_df/tbl/data.frame)
             $ hourlyDate: POSIXct[1:635], format: "2024-06-11 00:00:00" "2024-06-11 01:00:00" "2024-06-11 02:00:00" "2024-06-11 03:00:00" ...
             $ daytime   : num [1:635] 0 0 0 0 0 0 1 1 1 1 ...
             $ NEE       : num [1:635] 1.27 5.17 2.58 NA 1.62 ...
             $ cum_NEE   : num [1:635] 1.27 6.44 9.02 9.02 10.64 ...


    cumplot <- gp_NEE_hour %>%
                ggplot( aes(x=hourlyDate, y=cum_NEE)) +
                        geom_line( color="grey") +
                        geom_point(shape=21, color="black", fill="#69b3a2", size=1) +
                        theme_bw() +theme(legend.position="none", 
            text = element_text(size=15))+
        labs(y=expression(CO[2]~Flux~Netto~Kumulatif~~"("~g~m^{-2}~~jam^{-1}~")"))+xlab ( "Waktu")
    cumplot

The resulting figure is this: My Cumulative Flux Curve

I can code for calculating stdev using summarise() in dplyr, but cannot propagate it over cumulative means. I possess the difficulty in finding the R package or tidyverse' function to develop graph like this (https://doi.org/10.1038/s41561-021-00785-2):

Cumulative NEE with cumulative flux uncertainty

Thank you in advance.

EDIT.

I tried to improve the coding but somehow the geom_ribbon() can not display the uncertainty.

gp_NEE_hour_no_NA <-gp_NEE %>% 
                    mutate(NEE_Mgha = NEE * 0.24) %>% 
                    select(DateTime, NEE_Mgha) %>% 
                    mutate(hourlyDate = floor_date(DateTime,unit='hour')) %>%
                    group_by(hourlyDate) %>% 
                    summarise(
                            meanH_NEE = mean(NEE_Mgha), 
                            sd_NEE = sd(NEE_Mgha), 
                            n = n(),
                            se_NEE = sd_NEE / sqrt(n),.groups = "drop") %>%
                    mutate(cum_NEE = cumsum(replace_na(meanH_NEE, 0)))%>% 
                    mutate(cum_se_NEE = sqrt(cumsum(se_NEE^2)))
                    
str(gp_NEE_hour_no_NA)
        tibble [635 × 7] (S3: tbl_df/tbl/data.frame)
         $ hourlyDate: POSIXct[1:635], format:  ...
         $ meanH_NEE : num [1:635] 0.304 1.242 0.619 NA 0.39 ...
         $ sd_NEE    : num [1:635] 0.1915 1.0714 0.4392 NA 0.0646 ...
         $ n         : int [1:635] 2 2 2 2 2 2 2 2 2 2 ...
         $ se_NEE    : num [1:635] 0.1354 0.7576 0.3106 NA 0.0457 ...
         $ cum_NEE   : num [1:635] 0.304 1.546 2.164 2.164 2.554 ...
         $ cum_se_NEE: num [1:635] 0.135 0.77 0.83 NA NA ...

plot_NEE2 <-  ggplot(data = gp_NEE_hour_no_NA, aes(x = hourlyDate, y = cum_NEE)) +                  
                geom_line() + 
                geom_ribbon(aes(ymin = cum_NEE - 1.96 * cum_se_NEE, ymax = cum_NEE + 1.96 * cum_se_NEE), alpha = .5,
                            fill = "darkseagreen3", color = "transparent") +                    
                #geom_point(aes(y = mean),color="aquamarine4")+
                ylim(-1.5, 1.5) + theme(text = element_text(size=15))+
                labs(y=expression(CO[2]~Flux~Netto~Kumulatif~~"("~Mg~ha^{-1}~~hari^{-1}~")"))+xlab("Waktu") + 
                theme_bw()
plot_NEE2

Solution

  • Yes, after many modifications and errors, I realized that I had not enough data to propagate errors for cumulative means. So I initially filled the gaps using time-constrained rolling windows. After that, the code worked successfully.

    # gap-fill All variables using MDV
    
    EFgp_NEE_mdv <- gp_NEE  %>%
      group_by(Time) %>%
      mutate(across(c(NEE, H, LE, Rg, Tair, PPFD, VPD, Ustar, rH, Tsoil, SWC), 
                    ~ ifelse(is.na(.), 
                             mean(.[which(DateTime >= DateTime - 604800 & DateTime <= DateTime + 604800)], na.rm = TRUE), 
                            .))) %>%
      ungroup()
          
    write_xlsx(EFgp_NEE_mdv, "D:/Licor7500/EFgp_NEE_mdv.xlsx") # test if the process run well. Inspect the result = OK !! 
        
    ## make cumulative data 
    
    gp_NEE_hour_cum <-EFgp_NEE_mdv %>% 
                        mutate(NEE_Mgha = NEE * 0.01) %>% 
                        select(DateTime, NEE_Mgha) %>% 
                        mutate(hourlyDate = floor_date(DateTime,unit='hour')) %>%
                        group_by(hourlyDate) %>% 
                        summarise(
                                meanH_NEE = mean(NEE_Mgha, na.rm = TRUE), 
                                sd_NEE = sd(NEE_Mgha, na.rm = TRUE), 
                                n = sum(!is.na(NEE_Mgha))
                                    ) %>% 
                        mutate(se_NEE = sd_NEE / sqrt(n)) %>% 
                        mutate(
                                cum_NEE = cumsum(replace_na(meanH_NEE, 0)), 
                                cum_se_NEE = sqrt(cumsum(se_NEE^2))                         
        )
                        
    str(gp_NEE_hour_cum)
            tibble [867 × 7] (S3: tbl_df/tbl/data.frame)
             $ hourlyDate: POSIXct[1:867], format:  ...
             $ meanH_NEE : num [1:867] 0.153 0.456 0.361 0.264 0.193 ...
             $ sd_NEE    : num [1:867] 0.4305 0.2814 0.3762 0.0306 0.1345 ...
             $ n         : int [1:867] 2 2 2 2 2 2 2 2 2 2 ...
             $ se_NEE    : num [1:867] 0.3044 0.199 0.266 0.0216 0.0951 ...
             $ cum_NEE   : num [1:867] 0.153 0.609 0.97 1.235 1.428 ...
             $ cum_se_NEE: num [1:867] 0.304 0.364 0.451 0.451 0.461 ...
             
             
    plot_NEE2 <- ggplot(data = gp_NEE_hour_cum, aes(x = hourlyDate, y = cum_NEE)) + 
      geom_line(aes(group = 1), size = 1.5, color = "black", stroke = 2) + 
      geom_ribbon(aes(y = cum_NEE, ymin = cum_NEE - 1.96 * cum_se_NEE, ymax = cum_NEE + 1.96 * cum_se_NEE), 
                  alpha = 0.5, fill = "darkseagreen3") + 
      # geom_point(aes(y = mean), color = "aquamarine4") + 
      theme_bw() + 
      ylim(0,8)+
      theme(text = element_text(size = 15)) + 
      labs(y = expression(CO[2] ~ Flux ~ Netto ~~ " (" ~ Mg ~ ha^{-1} ~ ")"), 
           x = "")+ 
      scale_x_datetime(breaks = seq(min(gp_NEE_hour_cum$hourlyDate), max(gp_NEE_hour_cum$hourlyDate), by = "1 days"), 
                       labels = function(x) format(x, "%Y-%m-%d %H:%M"))+ 
      theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 10))
    
    plot_NEE2
    

    Finally the nice keeling-like graph

    enter image description here

    I leave this for new scientist who studying GHG fluxes.