The following script returns "NA" instead of the runoff coefficient function that has been defined in step 2. I suspect it is because I am no able to relate the Date column to my data. I have to run the runoff coefficient in different time windows (defined in step 3) and loop so it can be applied to all my sites (in this case "Att-Bissen and Rau.Merl), the variable "PET" is not used in the runoff coefficient function but in my original data set it is included so I would like to keep it in the data frame. The expected outcome is a data frame that contains all my sites in column 1, the runoff coefficient calculations from time window 1, 2 and 3 in columns 2, 3 and 4, respectively.
To reproduce the problem I have created the following data frame.
Any help would be truly appreciated!!
#Step 1. Define data frame
df <- data.frame(
stringsAsFactors = FALSE,
check.names = FALSE,
Date = c("01/11/1876","01/12/1876",
"01/01/1877","01/02/1877","01/03/1877",
"01/04/1877","01/05/1877","01/06/1877",
"01/07/1877","01/08/1877","01/09/1877",
"01/10/1877","01/11/1877","01/12/1877",
"01/01/1878"),
`Att-Bissen P [mm]` = c(48.5,111.2,29.7,139.4,90.1,25.9,
216,94.6,40.5,NA,64.4,68.8,44.7,
34.8,71.9),
`Att-Bissen PET [mm]` = c(88.4,88.3,80.5,53.4,36.7,20.2,
21.6,21.7,21.3,37.6,46.1,66.5,89.8,
121.5,87.7),
`Att-Bissen Q [mm]` = c(13.5,12.6,11.3,12.9,44.6,21.3,
194.9,NA,49.1,46.7,63.6,25.4,19.8,
15.3,16),
`Rau. Merl P [mm]` = c(43.7,104.2,25.5,131.3,83.7,21.9,
205.2,88.1,35.9,61,59,63.2,40,
30.4,66.2),
`Rau. Merl PET [mm]` = c(91.4,91.3,83.2,54.9,37.5,20.3,
21.8,21.8,21.4,38.4,47.3,68.6,NA,
125.9,90.7),
`Rau. Merl Q [mm]` = c(8.7,10.6,8.4,14.3,23.7,14.1,
131.6,106.7,40.1,42.4,50.3,24.6,16.7,
11.3,13.7))
df$Date <- as.Date(df$Date, format = "%d/%m/%Y")
# Step 2. Create the runoff coefficient function using "complete.cases" data
runoff_coef <- function(P, Q) {
complete_rows <- complete.cases(P, Q)
P_complete <- P[complete_rows]
Q_complete <- Q[complete_rows]
runoffcoef <- (mean(Q_complete))/(mean(P_complete))
return(data.frame(runoffcoef = runoffcoef))
}
# Step 3. Define time windows
time_windows <- list(
c("01/11/1876", "01/03/1877"),#time window 1
c("01/11/1876", "01/09/1877"),# time window 2
c("01/11/1876", "01/01/1878") #time window 3
)
#Step 4. Extract the name of the sites
site_names <- sub(" P \\[mm\\]| PET \\[mm\\]| Q \\[mm\\]", "", names(df)[-1]) |>
unique()
# Step 5. Loop through each time window
results <- list()
for (window in time_windows) {
window_df <- df[df$Date >= window[1] & df$Date <= window[2], ]
# Calculate the runoff coefficient for each site in the current time window
window_results <- list()
for (site in site_names) {
site_data <- window_df[, grepl(site, names(window_df))]
result <- runoff_coef(site_data[[paste0(site, " P [mm]")]],
site_data[[paste0(site, " Q [mm]")]])
window_results[[site]] <- result
}
results[[paste(window, collapse = " to ")]] <- window_results
}
# Step 6. Print the results
results
#Step 7. Unlist results
runoff_coefficients <- data.frame(Site = names(results), unlist(results))
df$Date
is class Date
and time_windows
's elements are all strings (and not in unambiguous date formats), so your problem starts in the middle of your code block. It helps to step through things line-by-line to see when things are not as you expect.
# df <- data.frame(...)
# time_windows <- list(...)
# site_names <- sub(...) |> unique()
# for (window in time_windows) {
window <- time_windows[[1]] # we'll step through one iteration of the loop
window
# [1] "01/11/1876" "01/03/1877"
class(window)
# [1] "character"
window_df <- df[df$Date >= window[1] & df$Date <= window[2], ]
window_df
# [1] Date Att-Bissen P [mm] Att-Bissen PET [mm] Att-Bissen Q [mm] Rau. Merl P [mm] Rau. Merl PET [mm] Rau. Merl Q [mm]
# <0 rows> (or 0-length row.names)
If instead you convert your time_windows
to proper Date
-class, things are different.
time_windows <- lapply(time_windows, as.Date, format = "%m/%d/%Y")
time_windows
# [[1]]
# [1] "1876-01-11" "1877-01-03"
# [[2]]
# [1] "1876-01-11" "1877-01-09"
# [[3]]
# [1] "1876-01-11" "1878-01-01"
class(time_windows[[1]])
# [1] "Date"
window <- time_windows[[1]]
window_df <- df[df$Date >= window[1] & df$Date <= window[2], ]
window_df
# Date Att-Bissen P [mm] Att-Bissen PET [mm] Att-Bissen Q [mm] Rau. Merl P [mm] Rau. Merl PET [mm] Rau. Merl Q [mm]
# 1 1876-11-01 48.5 88.4 13.5 43.7 91.4 8.7
# 2 1876-12-01 111.2 88.3 12.6 104.2 91.3 10.6
# 3 1877-01-01 29.7 80.5 11.3 25.5 83.2 8.4
and the rest of your processing should proceed normally.
# after the `for` loop
str(results)
# List of 3
# $ 1876-01-11 to 1877-01-03:List of 2
# ..$ Att-Bissen:'data.frame': 1 obs. of 1 variable:
# .. ..$ runoffcoef: num 0.197
# ..$ Rau. Merl :'data.frame': 1 obs. of 1 variable:
# .. ..$ runoffcoef: num 0.16
# $ 1876-01-11 to 1877-01-09:List of 2
# ..$ Att-Bissen:'data.frame': 1 obs. of 1 variable:
# .. ..$ runoffcoef: num 0.197
# ..$ Rau. Merl :'data.frame': 1 obs. of 1 variable:
# .. ..$ runoffcoef: num 0.16
# $ 1876-01-11 to 1878-01-01:List of 2
# ..$ Att-Bissen:'data.frame': 1 obs. of 1 variable:
# .. ..$ runoffcoef: num 0.507
# ..$ Rau. Merl :'data.frame': 1 obs. of 1 variable:
# .. ..$ runoffcoef: num 0.488
runoff_coefficients <- data.frame(Site = names(results), unlist(results))
runoff_coefficients
# Site unlist.results.
# 1876-01-11 to 1877-01-03.Att-Bissen.runoffcoef 1876-01-11 to 1877-01-03 0.1974657
# 1876-01-11 to 1877-01-03.Rau. Merl.runoffcoef 1876-01-11 to 1877-01-09 0.1597463
# 1876-01-11 to 1877-01-09.Att-Bissen.runoffcoef 1876-01-11 to 1878-01-01 0.1974657
# 1876-01-11 to 1877-01-09.Rau. Merl.runoffcoef 1876-01-11 to 1877-01-03 0.1597463
# 1876-01-11 to 1878-01-01.Att-Bissen.runoffcoef 1876-01-11 to 1877-01-09 0.5074551
# 1876-01-11 to 1878-01-01.Rau. Merl.runoffcoef 1876-01-11 to 1878-01-01 0.4882470