I need to find the sum of transpiration to know the water loss every day. I have a datatable with the transpiration of a plant every 5 minutes for several days and would like to know the water loss from midnight to a certain time in the day. Does anyone know how I could solve this problem, ideally without using a for loop ?
This is what my datatable looks like.
This is the output of
dput(resdf[sample(seq_len(nrow(resdf)), 20L, replace = TRUE), ])
structure(list(datetime = structure(c(1631829060, 1629452580,
1633573500, 1632804540, 1632026580, 1630784460, 1629697440, 1630264740,
1629618660, 1632762180, 1631546280, 1630984860, 1631536380, 1631223300,
1631197680, 1633019220, 1630395600, 1633295760, 1631682840, 1629258780
), class = c("POSIXct", "POSIXt"), tzone = ""), id = structure(c(14L,
14L, 3L, 7L, 5L, 14L, 24L, 24L, 1L, 3L, 18L, 9L, 19L, 1L, 5L,
3L, 17L, 12L, 15L, 4L), .Label = c("A1", "A10", "A11", "A12",
"A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "B1", "B10",
"B11", "B12", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9"), class = "factor"),
date = structure(c(18885, 18858, 18906, 18897, 18888, 18873,
18861, 18867, 18860, 18896, 18882, 18876, 18882, 18878, 18878,
18899, 18869, 18902, 18884, 18856), class = "Date"), time = structure(c(0.99375,
0.488194444444444, 0.184027777777778, 0.284027777777778,
0.279861111111111, 0.903472222222222, 0.322222222222222,
0.888194444444444, 0.410416666666667, 0.79375, 0.720833333333333,
0.222916666666667, 0.60625, 0.982638888888889, 0.686111111111111,
0.76875, 0.402777777777778, 0.969444444444444, 0.301388888888889,
0.245138888888889), format = "h:m:s", class = "times"), row = c("B",
"B", "A", "A", "A", "B", "B", "B", "A", "A", "B", "A", "B",
"A", "A", "A", "B", "A", "B", "A"), column = c("10", "10",
"11", "4", "2", "10", "9", "9", "1", "11", "3", "6", "4",
"1", "2", "11", "2", "9", "11", "12"), weight = c(8540.2,
8550.2, 9101.9, 8148.2, 5827.8, 8530, 8635.2, 8555, 8531.5,
8958.5, 6926.7, 8046.2, 6738.9, 6555.9, 7094.1, 8932.3, 7919,
7829.1, 6708.3, 8488.1), ID = structure(c(7L, 7L, 6L, 23L,
11L, 7L, 14L, 14L, 10L, 6L, 13L, 17L, 4L, 10L, 11L, 6L, 22L,
24L, 21L, 9L), .Label = c("2", "3", "7", "8", "9", "10",
"11", "12", "13", "14", "15", "16", "18", "20", "21", "23",
"26", "27", "29", "30", "31", "32", "35", "36"), class = "factor"),
cultivar = structure(c(3L, 3L, 3L, 2L, 1L, 3L, 1L, 1L, 1L,
3L, 1L, 2L, 3L, 1L, 1L, 3L, 2L, 2L, 2L, 1L), .Label = c("Kluai Tiparot",
"Red Dacca", "Simili Radjah"), class = "factor"), treatment = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label = "21/14 °C", class = "factor"),
sg = c(NA, NA, 9102.72997996696, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, 6555.90116322923, NA, NA, 7918.36797840064,
NA, NA, NA), trans = c(NA, NA, 0.053774096781126, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 0.0990147946668003, NA, NA,
0.666644920945146, NA, NA, NA), transh = c(NA, NA, 0.645289161373512,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1.1881775360016,
NA, NA, 7.99973905134175, NA, NA, NA), rel_trans = c(NA,
NA, 5.90800786441578e-06, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 1.51031581730655e-05, NA, NA, 8.4182967665759e-05,
NA, NA, NA)), row.names = c(892101L, 86789L, 1498802L, 1237108L,
968955L, 534540L, 188878L, 368232L, 130348L, 1203252L, 807585L,
617076L, 808671L, 666972L, 671529L, 1291962L, 418660L, 1392510L,
863133L, 15537L), class = "data.frame")
To be able to show that our function (daily_cumulative_trans
, see below) works, I will first select an individual plant from resdf
and replicate all rows of that plant which do not contain missing values for trans
# function to create a subset of resdf and replicate rows
# that do now have missing values for transpiration
my_subset <- \(DF, individuals, times = 2L) {
DF <- with(DF, DF[order(id, as.Date(datetime)),])
sub <- subset(DF, id %in% individuals)
row_to_replicate <-
which(!rowSums(apply(subset(sub, select = c('id', 'trans')), 2, is.na)) > 0)
replicated_rows <- do.call(rbind,
replicate(times, sub[row_to_replicate, ], simplify = FALSE)
)
sub <- rbind(sub, replicated_rows)
out <- with(sub, sub[order(id, as.Date(datetime)),])
return(out)
}
Now we may for instance select plant A11
resdf_sub <- my_subset(DF = resdf, individuals = 'A11')
Finally, we need a function which computes the cumulative transpiration per plant per day and add this as new column to resdf
. I will call this function daily_cumulative_trans
and the column to be added cumulative_trans_per_day
.
daily_cumulative_trans <- \(DF) {
DF <- with(DF, DF[order(id, as.Date(datetime)),])
my_split <- split(DF, f = ~ DF$id + DF$date)
out <- lapply(my_split, \(x) {
cbind(x, cumulative_trans_per_day = cumsum(ifelse(is.na(x$trans), 0, x$trans)))
})
DF <- do.call(rbind, out)
rownames(DF) <- NULL
return(DF)
}
# execute function on plant A11
resdf_sub <- daily_cumulative_trans(DF = resdf_sub)
Output
subset(resdf_sub, id %in% 'A11',
select = c('id', 'datetime', 'trans', 'cumulative_trans_per_day'))
# --------------------------------------------------------
id datetime trans cumulative_trans_per_day
1 A11 2021-09-27 19:03:00 NA 0.0000000
2 A11 2021-09-30 18:27:00 NA 0.0000000
3 A11 2021-10-07 04:25:00 0.0537741 0.0537741
4 A11 2021-10-07 04:25:00 0.0537741 0.1075482
5 A11 2021-10-07 04:25:00 0.0537741 0.1613223
Note: use function(x)
instead of \(x)
if you use a version of R <4.1.0