Search code examples
rfor-loopsum

Sum of selection


I need to find the sum of transpiration to know the water loss every day. I have a datatable with the transpiration of a plant every 5 minutes for several days and would like to know the water loss from midnight to a certain time in the day. Does anyone know how I could solve this problem, ideally without using a for loop ?

This is what my datatable looks like. enter image description here

This is the output of dput(resdf[sample(seq_len(nrow(resdf)), 20L, replace = TRUE), ])

structure(list(datetime = structure(c(1631829060, 1629452580, 
1633573500, 1632804540, 1632026580, 1630784460, 1629697440, 1630264740, 
1629618660, 1632762180, 1631546280, 1630984860, 1631536380, 1631223300, 
1631197680, 1633019220, 1630395600, 1633295760, 1631682840, 1629258780
), class = c("POSIXct", "POSIXt"), tzone = ""), id = structure(c(14L, 
14L, 3L, 7L, 5L, 14L, 24L, 24L, 1L, 3L, 18L, 9L, 19L, 1L, 5L, 
3L, 17L, 12L, 15L, 4L), .Label = c("A1", "A10", "A11", "A12", 
"A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "B1", "B10", 
"B11", "B12", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9"), class = "factor"), 
    date = structure(c(18885, 18858, 18906, 18897, 18888, 18873, 
    18861, 18867, 18860, 18896, 18882, 18876, 18882, 18878, 18878, 
    18899, 18869, 18902, 18884, 18856), class = "Date"), time = structure(c(0.99375, 
    0.488194444444444, 0.184027777777778, 0.284027777777778, 
    0.279861111111111, 0.903472222222222, 0.322222222222222, 
    0.888194444444444, 0.410416666666667, 0.79375, 0.720833333333333, 
    0.222916666666667, 0.60625, 0.982638888888889, 0.686111111111111, 
    0.76875, 0.402777777777778, 0.969444444444444, 0.301388888888889, 
    0.245138888888889), format = "h:m:s", class = "times"), row = c("B", 
    "B", "A", "A", "A", "B", "B", "B", "A", "A", "B", "A", "B", 
    "A", "A", "A", "B", "A", "B", "A"), column = c("10", "10", 
    "11", "4", "2", "10", "9", "9", "1", "11", "3", "6", "4", 
    "1", "2", "11", "2", "9", "11", "12"), weight = c(8540.2, 
    8550.2, 9101.9, 8148.2, 5827.8, 8530, 8635.2, 8555, 8531.5, 
    8958.5, 6926.7, 8046.2, 6738.9, 6555.9, 7094.1, 8932.3, 7919, 
    7829.1, 6708.3, 8488.1), ID = structure(c(7L, 7L, 6L, 23L, 
    11L, 7L, 14L, 14L, 10L, 6L, 13L, 17L, 4L, 10L, 11L, 6L, 22L, 
    24L, 21L, 9L), .Label = c("2", "3", "7", "8", "9", "10", 
    "11", "12", "13", "14", "15", "16", "18", "20", "21", "23", 
    "26", "27", "29", "30", "31", "32", "35", "36"), class = "factor"), 
    cultivar = structure(c(3L, 3L, 3L, 2L, 1L, 3L, 1L, 1L, 1L, 
    3L, 1L, 2L, 3L, 1L, 1L, 3L, 2L, 2L, 2L, 1L), .Label = c("Kluai Tiparot", 
    "Red Dacca", "Simili Radjah"), class = "factor"), treatment = structure(c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label = "21/14 °C", class = "factor"), 
    sg = c(NA, NA, 9102.72997996696, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, 6555.90116322923, NA, NA, 7918.36797840064, 
    NA, NA, NA), trans = c(NA, NA, 0.053774096781126, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, 0.0990147946668003, NA, NA, 
    0.666644920945146, NA, NA, NA), transh = c(NA, NA, 0.645289161373512, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1.1881775360016, 
    NA, NA, 7.99973905134175, NA, NA, NA), rel_trans = c(NA, 
    NA, 5.90800786441578e-06, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, 1.51031581730655e-05, NA, NA, 8.4182967665759e-05, 
    NA, NA, NA)), row.names = c(892101L, 86789L, 1498802L, 1237108L, 
968955L, 534540L, 188878L, 368232L, 130348L, 1203252L, 807585L, 
617076L, 808671L, 666972L, 671529L, 1291962L, 418660L, 1392510L, 
863133L, 15537L), class = "data.frame")

Solution

  • To be able to show that our function (daily_cumulative_trans, see below) works, I will first select an individual plant from resdf and replicate all rows of that plant which do not contain missing values for trans

    # function to create a subset of resdf and replicate rows
    # that do now have missing values for transpiration
    my_subset <- \(DF, individuals, times = 2L) {
      DF <- with(DF, DF[order(id, as.Date(datetime)),])
      sub <- subset(DF, id %in% individuals)
      row_to_replicate <-
        which(!rowSums(apply(subset(sub, select = c('id', 'trans')), 2, is.na)) > 0)
      replicated_rows <- do.call(rbind,
        replicate(times, sub[row_to_replicate, ], simplify = FALSE)
      )
      sub <- rbind(sub, replicated_rows)
      out <- with(sub, sub[order(id, as.Date(datetime)),])
      return(out)
    }
    

    Now we may for instance select plant A11

    resdf_sub <- my_subset(DF = resdf, individuals = 'A11')
    

    Finally, we need a function which computes the cumulative transpiration per plant per day and add this as new column to resdf. I will call this function daily_cumulative_trans and the column to be added cumulative_trans_per_day.

    daily_cumulative_trans <- \(DF) {
      DF <- with(DF, DF[order(id, as.Date(datetime)),])
      my_split <- split(DF, f = ~ DF$id + DF$date)
      out <- lapply(my_split, \(x) {
        cbind(x, cumulative_trans_per_day = cumsum(ifelse(is.na(x$trans), 0, x$trans)))
      })
      DF <- do.call(rbind, out)
      rownames(DF) <- NULL
      return(DF)
    }
    
    # execute function on plant A11
    resdf_sub <- daily_cumulative_trans(DF = resdf_sub)
    

    Output

    subset(resdf_sub, id %in% 'A11',
           select = c('id', 'datetime', 'trans', 'cumulative_trans_per_day'))
    # --------------------------------------------------------
       id            datetime     trans cumulative_trans_per_day
    1 A11 2021-09-27 19:03:00        NA                0.0000000
    2 A11 2021-09-30 18:27:00        NA                0.0000000
    3 A11 2021-10-07 04:25:00 0.0537741                0.0537741
    4 A11 2021-10-07 04:25:00 0.0537741                0.1075482
    5 A11 2021-10-07 04:25:00 0.0537741                0.1613223
    

    Note: use function(x) instead of \(x) if you use a version of R <4.1.0