Search code examples
rfunctionfor-looptime-seriesna

Loop returns NA


The following script returns "NA" instead of the runoff coefficient function that has been defined in step 2. I suspect it is because I am no able to relate the Date column to my data. I have to run the runoff coefficient in different time windows (defined in step 3) and loop so it can be applied to all my sites (in this case "Att-Bissen and Rau.Merl), the variable "PET" is not used in the runoff coefficient function but in my original data set it is included so I would like to keep it in the data frame. The expected outcome is a data frame that contains all my sites in column 1, the runoff coefficient calculations from time window 1, 2 and 3 in columns 2, 3 and 4, respectively.

To reproduce the problem I have created the following data frame.

Any help would be truly appreciated!!

#Step 1. Define data frame

df <- data.frame(
  stringsAsFactors = FALSE,
  check.names = FALSE,
  Date = c("01/11/1876","01/12/1876",
           "01/01/1877","01/02/1877","01/03/1877",
           "01/04/1877","01/05/1877","01/06/1877",
           "01/07/1877","01/08/1877","01/09/1877",
           "01/10/1877","01/11/1877","01/12/1877",
           "01/01/1878"),
  `Att-Bissen P [mm]` = c(48.5,111.2,29.7,139.4,90.1,25.9,
                          216,94.6,40.5,NA,64.4,68.8,44.7,
                          34.8,71.9),
  `Att-Bissen PET [mm]` = c(88.4,88.3,80.5,53.4,36.7,20.2,
                            21.6,21.7,21.3,37.6,46.1,66.5,89.8,
                            121.5,87.7),
  `Att-Bissen Q [mm]` = c(13.5,12.6,11.3,12.9,44.6,21.3,
                          194.9,NA,49.1,46.7,63.6,25.4,19.8,
                          15.3,16),
  `Rau. Merl P [mm]` = c(43.7,104.2,25.5,131.3,83.7,21.9,
                         205.2,88.1,35.9,61,59,63.2,40,
                         30.4,66.2),
  `Rau. Merl PET [mm]` = c(91.4,91.3,83.2,54.9,37.5,20.3,
                           21.8,21.8,21.4,38.4,47.3,68.6,NA,
                           125.9,90.7),
  `Rau. Merl Q [mm]` = c(8.7,10.6,8.4,14.3,23.7,14.1,
                         131.6,106.7,40.1,42.4,50.3,24.6,16.7,
                         11.3,13.7))

df$Date <- as.Date(df$Date, format = "%d/%m/%Y")


# Step 2. Create the runoff coefficient function using "complete.cases" data
runoff_coef <- function(P, Q) {
  complete_rows <- complete.cases(P, Q)
  P_complete <- P[complete_rows]
  Q_complete <- Q[complete_rows]
  runoffcoef <- (mean(Q_complete))/(mean(P_complete))
  return(data.frame(runoffcoef = runoffcoef))
}

# Step 3. Define time windows
time_windows <- list(
  c("01/11/1876", "01/03/1877"),#time window 1
  c("01/11/1876", "01/09/1877"),# time window 2
  c("01/11/1876", "01/01/1878") #time window 3
)

#Step 4. Extract the name of the sites
site_names <- sub(" P \\[mm\\]| PET \\[mm\\]| Q \\[mm\\]", "", names(df)[-1]) |>
  unique()


# Step 5. Loop through each time window
results <- list()
for (window in time_windows) {
  window_df <- df[df$Date >= window[1] & df$Date <= window[2], ]
  

# Calculate the runoff coefficient for each site in the current time window
  window_results <- list()
  for (site in site_names) {
    site_data <- window_df[, grepl(site, names(window_df))]
    result <- runoff_coef(site_data[[paste0(site, " P [mm]")]], 
                          site_data[[paste0(site, " Q [mm]")]])
    window_results[[site]] <- result
  }
  
  results[[paste(window, collapse = " to ")]] <- window_results
}

# Step 6. Print the results
results

#Step 7. Unlist results
runoff_coefficients <- data.frame(Site = names(results), unlist(results))

Solution

  • df$Date is class Date and time_windows's elements are all strings (and not in unambiguous date formats), so your problem starts in the middle of your code block. It helps to step through things line-by-line to see when things are not as you expect.

    # df <- data.frame(...)
    # time_windows <- list(...)
    # site_names <- sub(...) |> unique()
    
    # for (window in time_windows) {
    window <- time_windows[[1]]        # we'll step through one iteration of the loop
    window
    # [1] "01/11/1876" "01/03/1877"
    class(window)
    # [1] "character"
    
    window_df <- df[df$Date >= window[1] & df$Date <= window[2], ]
    window_df
    # [1] Date                Att-Bissen P [mm]   Att-Bissen PET [mm] Att-Bissen Q [mm]   Rau. Merl P [mm]    Rau. Merl PET [mm]  Rau. Merl Q [mm]   
    # <0 rows> (or 0-length row.names)
    

    If instead you convert your time_windows to proper Date-class, things are different.

    time_windows <- lapply(time_windows, as.Date, format = "%m/%d/%Y")
    time_windows
    # [[1]]
    # [1] "1876-01-11" "1877-01-03"
    # [[2]]
    # [1] "1876-01-11" "1877-01-09"
    # [[3]]
    # [1] "1876-01-11" "1878-01-01"
    class(time_windows[[1]])
    # [1] "Date"
    
    window <- time_windows[[1]]
    window_df <- df[df$Date >= window[1] & df$Date <= window[2], ]
    window_df
    #         Date Att-Bissen P [mm] Att-Bissen PET [mm] Att-Bissen Q [mm] Rau. Merl P [mm] Rau. Merl PET [mm] Rau. Merl Q [mm]
    # 1 1876-11-01              48.5                88.4              13.5             43.7               91.4              8.7
    # 2 1876-12-01             111.2                88.3              12.6            104.2               91.3             10.6
    # 3 1877-01-01              29.7                80.5              11.3             25.5               83.2              8.4
    

    and the rest of your processing should proceed normally.

    # after the `for` loop
    str(results)
    # List of 3
    #  $ 1876-01-11 to 1877-01-03:List of 2
    #   ..$ Att-Bissen:'data.frame':    1 obs. of  1 variable:
    #   .. ..$ runoffcoef: num 0.197
    #   ..$ Rau. Merl :'data.frame':    1 obs. of  1 variable:
    #   .. ..$ runoffcoef: num 0.16
    #  $ 1876-01-11 to 1877-01-09:List of 2
    #   ..$ Att-Bissen:'data.frame':    1 obs. of  1 variable:
    #   .. ..$ runoffcoef: num 0.197
    #   ..$ Rau. Merl :'data.frame':    1 obs. of  1 variable:
    #   .. ..$ runoffcoef: num 0.16
    #  $ 1876-01-11 to 1878-01-01:List of 2
    #   ..$ Att-Bissen:'data.frame':    1 obs. of  1 variable:
    #   .. ..$ runoffcoef: num 0.507
    #   ..$ Rau. Merl :'data.frame':    1 obs. of  1 variable:
    #   .. ..$ runoffcoef: num 0.488
    
    runoff_coefficients <- data.frame(Site = names(results), unlist(results))
    runoff_coefficients
    #                                                                    Site unlist.results.
    # 1876-01-11 to 1877-01-03.Att-Bissen.runoffcoef 1876-01-11 to 1877-01-03       0.1974657
    # 1876-01-11 to 1877-01-03.Rau. Merl.runoffcoef  1876-01-11 to 1877-01-09       0.1597463
    # 1876-01-11 to 1877-01-09.Att-Bissen.runoffcoef 1876-01-11 to 1878-01-01       0.1974657
    # 1876-01-11 to 1877-01-09.Rau. Merl.runoffcoef  1876-01-11 to 1877-01-03       0.1597463
    # 1876-01-11 to 1878-01-01.Att-Bissen.runoffcoef 1876-01-11 to 1877-01-09       0.5074551
    # 1876-01-11 to 1878-01-01.Rau. Merl.runoffcoef  1876-01-11 to 1878-01-01       0.4882470