Search code examples
rdplyrtime-seriestidyversepadr

Using padr::thicken() with an uneven timestamp interval


I have a dataset that looks like this:

   structure(list(Fish_ID = c("Fork1", "Fork10", "Fork15", "Fork20", 
"Fork21", "Fork22", "Fork23", "Fork4", "Fork5", "Fork7", "Fork9", 
"Fork12", "Fork13", "Fork14", "Fork16", "Fork17", "Fork18", "Fork19", 
"Fork20", "Fork21", "Fork22", "Fork23", "Fork3", "Fork1", "Fork10", 
"Fork12", "Fork13", "Fork15", "Fork16", "Fork17", "Fork18", "Fork19", 
"Fork20", "Fork21", "Fork22", "Fork4", "Fork5", "Fork7", "Fork9", 
"Fork1", "Fork10", "Fork12", "Fork13", "Fork14", "Fork15", "Fork16", 
"Fork17", "Fork18", "Fork19", "Fork20", "Fork21", "Fork22", "Fork4", 
"Fork5", "Fork7", "Fork8", "Fork9", "Fork2", "Fork1", "Fork13", 
"Fork14", "Fork15", "Fork16", "Fork17", "Fork18", "Fork19", "Fork20", 
"Fork21", "Fork4", "Fork5", "Fork7", "Fork9"), Date2 = structure(c(18428, 
18428, 18428, 18428, 18428, 18428, 18428, 18428, 18428, 18428, 
18428, 18438, 18438, 18438, 18438, 18438, 18438, 18438, 18438, 
18438, 18438, 18438, 18438, 18445, 18445, 18445, 18445, 18445, 
18445, 18445, 18445, 18445, 18445, 18445, 18445, 18445, 18445, 
18445, 18445, 18456, 18456, 18456, 18456, 18456, 18456, 18456, 
18456, 18456, 18456, 18456, 18456, 18456, 18456, 18456, 18456, 
18456, 18456, 18463, 18471, 18471, 18471, 18471, 18471, 18471, 
18471, 18471, 18471, 18471, 18471, 18471, 18471, 18471), class = "Date"), 
    Lat2 = c(32.9394, 32.92935, 32.9160666666667, 32.9455166666667, 
    32.9431, 32.90365, 32.9056166666667, 32.94325, 32.9288833333333, 
    32.9297, 32.9303, 32.9047333333333, 32.9093833333333, 32.9509833333333, 
    32.9074333333333, 32.9029, 32.90775, 32.9094, 32.9459166666667, 
    32.9437666666667, 32.9044333333333, 32.90585, 32.9475333333333, 
    32.9443666666667, 32.92935, 32.9047333333333, 32.9093333333333, 
    32.9161, 32.9075333333333, 32.9030333333333, 32.9088333333333, 
    32.9058666666667, 32.9461166666667, 32.9442666666667, 32.9042, 
    32.9442833333333, 32.9288833333333, 32.9298, 32.93135, 32.9432, 
    32.9289, 32.9051166666667, 32.9095166666667, 32.8966666666667, 
    32.9162666666667, 32.9074, 32.9052833333333, 32.90865, 32.9056333333333, 
    32.9461666666667, 32.9430666666667, 32.9037833333333, 32.9470666666667, 
    32.9291166666667, 32.9287666666667, 32.9297666666667, 32.9313, 
    32.9086333333333, 32.94365, 32.9084166666667, 32.91455, 32.9160166666667, 
    32.9063166666667, 32.8989833333333, 32.9086333333333, 32.9052333333333, 
    32.9453833333333, 32.9431666666667, 32.9467833333333, 32.9289833333333, 
    32.9291333333333, 32.9303833333333)), row.names = c(1L, 25L, 
78L, 173L, 198L, 229L, 239L, 242L, 259L, 277L, 281L, 27L, 32L, 
56L, 101L, 121L, 143L, 163L, 176L, 199L, 224L, 238L, 240L, 11L, 
24L, 29L, 34L, 89L, 100L, 133L, 148L, 160L, 182L, 201L, 225L, 
244L, 262L, 272L, 284L, 22L, 26L, 28L, 47L, 73L, 87L, 113L, 126L, 
150L, 155L, 193L, 209L, 227L, 243L, 261L, 273L, 279L, 286L, 170L, 
18L, 43L, 67L, 96L, 98L, 119L, 149L, 161L, 185L, 213L, 241L, 
246L, 276L, 293L), class = "data.frame")

I'm trying to use the padr::thicken() function so that I can fill in the missing Lat2 variables for each Fish_ID on each sampled day; however when I do this, I get the error Error: interval is not valid

I'm imagining this is because the interval isn't exactly week to week, with a week missing in there. I'm wondering if there is another way to pad this dataframe so that I have a value for each individual Fish_ID even though there is not an equal interval between each Fish_ID. I've been trying the padr::pad_cust() function, but I'm not sure how to set it up with a dataset that looks like this.


Solution

  • complete() from tidyr should do the job. For weekly observations:

    library(tidyr)
    
    tbl %>% 
      padr::thicken("week") %>% 
      complete(Fish_ID, Date2_week = full_seq(Date2_week, 7))
    #> # A tibble: 147 × 4
    #>    Fish_ID Date2_week Date2       Lat2
    #>    <chr>   <date>     <date>     <dbl>
    #>  1 Fork1   2020-06-14 2020-06-15  32.9
    #>  2 Fork1   2020-06-21 NA          NA  
    #>  3 Fork1   2020-06-28 2020-07-02  32.9
    #>  4 Fork1   2020-07-05 NA          NA  
    #>  5 Fork1   2020-07-12 2020-07-13  32.9
    #>  6 Fork1   2020-07-19 NA          NA  
    #>  7 Fork1   2020-07-26 2020-07-28  32.9
    #>  8 Fork10  2020-06-14 2020-06-15  32.9
    #>  9 Fork10  2020-06-21 NA          NA  
    #> 10 Fork10  2020-06-28 2020-07-02  32.9
    #> # … with 137 more rows
    

    Or for original observation dates with irregular intervals:

    tbl %>% 
      complete(Fish_ID, Date2)
    #> # A tibble: 126 × 3
    #>    Fish_ID Date2       Lat2
    #>    <chr>   <date>     <dbl>
    #>  1 Fork1   2020-06-15  32.9
    #>  2 Fork1   2020-06-25  NA  
    #>  3 Fork1   2020-07-02  32.9
    #>  4 Fork1   2020-07-13  32.9
    #>  5 Fork1   2020-07-20  NA  
    #>  6 Fork1   2020-07-28  32.9
    #>  7 Fork10  2020-06-15  32.9
    #>  8 Fork10  2020-06-25  NA  
    #>  9 Fork10  2020-07-02  32.9
    #> 10 Fork10  2020-07-13  32.9
    #> # … with 116 more rows