Search code examples
rtime-seriesxtsreplicateleap-year

How to replicate one year daily data to multiple years taking into account leap years?


I have 365 daily values from one year. Now I want to replicate these values for the year from 2014 to 2018. If there is a leap year, give February 29 the average value of February 28 and March 01.

How could I do this automatically? This is what I have so far. Thank you very much in advance!

library(xts)

set.seed(1)
myday <- seq(1, 365, by = 1)
myvalue <- rnorm(length(myday))
mydata <- data.frame(myday, myvalue)
head(mydata)
#>   myday    myvalue
#> 1     1 -0.6264538
#> 2     2  0.1836433
#> 3     3 -0.8356286
#> 4     4  1.5952808
#> 5     5  0.3295078
#> 6     6 -0.8204684

myyear <- seq(2014, 2018, by = 1)
myyear
#> [1] 2014 2015 2016 2017 2018

leapyearvalue <- 0.5 * (mydata$myvalue[mydata$myday == 28] + mydata$myvalue[mydata$myday == 29])
leapyearvalue
#> [1] -0.9744512

repdata <- coredata(mydata)[rep(seq(nrow(mydata)), length(myyear)), ]
head(repdata)
#>   myday    myvalue
#> 1     1 -0.6264538
#> 2     2  0.1836433
#> 3     3 -0.8356286
#> 4     4  1.5952808
#> 5     5  0.3295078
#> 6     6 -0.8204684

Solution

  • Edit: Added output for 2014-18.

    Here's a function to help with that. Feed in year and day_num (for that year), and it outputs the standard values for that date. I assume that you want April 1 in a leap year to output April 1 from the standard year, which requires shifting from day 92 (in the leap year) to day 91 in a 365-day year.

    daily_value <- function(year, day_num) {
      leap <- year %in% c(2008, 2012, 2016, 2020, 2024)
      leap_day_val <- 0.5 * (mydata[59,2] + mydata[60,2])
      day_num_adj <- day_num + ifelse(leap & day_num >= 61, -1, 0)
      day_value <- ifelse(leap & day_num == 60, 
                           leap_day_val,
                           mydata[day_num_adj,2])
      day_value
    }
    

    Tests

    mydata[59,]
    #   myday   myvalue
    #59    59 0.5697196
    daily_value(2016,59)
    #[1] 0.5697196
    
    mydata[59:60,]
    #   myday    myvalue
    #59    59  0.5697196
    #60    60 -0.1350546
    mean(c(0.5697196, -0.1350546))
    #[1] 0.2173325
    daily_value(2016,60)
    #[1] 0.2173325
    
    # Day 61 of 2016 was March 1, which is day 60 in years with 365 days
    mydata[60,]
    #   myday    myvalue
    #60    60 -0.1350546
    daily_value(2016,61)
    #[1] -0.1350546
    

    Now, we can apply that to all the days in 2014-18:

    output <- data.frame(dates = seq.Date(as.Date("2014-01-01"), as.Date("2018-12-31"), 1))
    output$day_of_year = lubridate::yday(output$dates)
    output$value       = daily_value(lubridate::year(output$dates), output$day_of_year)
    
    
    subset(output, day_of_year > 58 & day_of_year <= 61)
    #          dates day_of_year      value
    #59   2014-02-28          59  0.5697196
    #60   2014-03-01          60 -0.1350546
    #61   2014-03-02          61  2.4016178
    
    #424  2015-02-28          59  0.5697196
    #425  2015-03-01          60 -0.1350546
    #426  2015-03-02          61  2.4016178
    
    #789  2016-02-28          59  0.5697196
    #790  2016-02-29          60  0.2173325  # Leap day gets avg of 2/28 and 3/01
    #791  2016-03-01          61 -0.1350546  # Rest of leap year shifted back one day
    
    #1155 2017-02-28          59  0.5697196
    #1156 2017-03-01          60 -0.1350546
    #1157 2017-03-02          61  2.4016178
    
    #1520 2018-02-28          59  0.5697196
    #1521 2018-03-01          60 -0.1350546
    #1522 2018-03-02          61  2.4016178