Search code examples
rloopsposixdifftime

time difference of consecutive rows with condition


I have a list of times:

> sapply(copy, class)
$timefact
[1] "POSIXct" "POSIXt" 

$timefact_hour
[1] "integer"

> head(copy)

             timefact timefact_hour
1 2016-04-07 23:42:00            23
2 2016-04-07 23:37:00            23
3 2016-04-07 23:31:00            23
4 2016-04-07 23:27:00            23
5 2016-04-07 23:19:00            23
6 2016-04-07 23:17:00            23

My objective is to create a third column which will be an integer value that represents the number of minutes difference between the given row and the row above it.

The output therefore should look something like this:

> output
             timefact timefact_hour timediff
1 2016-04-07 23:42:00            23       NA
2 2016-04-07 23:37:00            23        5
3 2016-04-07 23:31:00            23        6
4 2016-04-07 23:27:00            23        4
5 2016-04-07 23:19:00            23        8
6 2016-04-07 23:17:00            23        2

However, I want to exclude all cases where the hours are different and only find the differences of rows that share the same hour as the row above. For example:

> output
              timefact timefact_hour timediff
90 2016-04-07 12:14:00            12        6
91 2016-04-07 12:04:00            12       10
92 2016-04-07 11:56:00            11       NA
93 2016-04-07 11:49:00            11        7
94 2016-04-07 11:42:00            11        7
95 2016-04-07 11:36:00            11        6

So far, I have come up with the following code,

for(i in 2:nrow(copy)) {
   print(difftime(copy[i,"timefact"], copy[i-1,"timefact"], tz = "EST", units = "mins"))
  }

Which seems to work but I am not sure how to a) add it as a third column to the dataframe and b) skip over rows where the row above is not the same hour. Any advice would be appreciated! I have included a sample of the data below:

> dput(copy)
structure(list(timefact = structure(list(sec = c(0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), min = c(42L, 37L, 31L, 27L, 
19L, 17L, 10L, 6L, 1L, 56L, 50L, 45L, 34L, 27L, 18L, 4L, 58L, 
53L, 50L, 44L, 44L, 37L, 34L, 28L, 23L, 16L, 12L, 4L, 59L, 50L, 
19L, 13L, 46L, 26L, 26L, 19L, 11L, 8L, 2L, 55L, 51L, 44L, 37L, 
31L, 9L, 0L, 48L, 43L, 34L, 30L, 10L, 6L, 57L, 52L, 44L, 39L, 
30L, 23L, 23L, 1L, 1L, 54L, 48L, 32L, 23L, 16L, 12L, 5L, 1L, 
48L, 44L, 37L, 27L, 18L, 13L, 6L, 0L, 39L, 31L, 23L, 17L, 4L, 
54L, 49L, 44L, 38L, 33L, 24L, 20L, 14L, 4L, 56L, 49L, 42L, 36L, 
48L, 36L, 31L, 22L, 14L), hour = c(23L, 23L, 23L, 23L, 23L, 23L, 
23L, 23L, 23L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 21L, 21L, 21L, 
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 20L, 20L, 20L, 20L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 18L, 18L, 18L, 18L, 18L, 18L, 
18L, 17L, 17L, 17L, 17L, 17L, 17L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 16L, 16L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 14L, 14L, 
14L, 14L, 14L, 14L, 14L, 14L, 13L, 13L, 13L, 13L, 13L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 11L, 11L, 11L, 11L, 10L, 10L, 
10L, 10L, 10L), mday = c(7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), mon = c(3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L), year = c(116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 116L, 
116L, 116L, 116L, 116L), wday = c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), yday = c(97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L, 
97L, 97L, 97L, 97L, 97L, 97L, 97L, 97L), isdst = c(0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L), zone = c("EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", "EST", 
"EST", "EST", "EST"), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_)), .Names = c("sec", "min", "hour", 
"mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"
), class = c("POSIXlt", "POSIXt"), tzone = c("EST", "EST", "   "
)), timefact_hour = c(23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 
23L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 21L, 21L, 21L, 21L, 21L, 
21L, 21L, 21L, 21L, 21L, 21L, 21L, 20L, 20L, 20L, 20L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 
17L, 17L, 17L, 17L, 17L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
16L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 14L, 14L, 14L, 14L, 
14L, 14L, 14L, 14L, 13L, 13L, 13L, 13L, 13L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 11L, 11L, 11L, 11L, 10L, 10L, 10L, 10L, 
10L)), .Names = c("timefact", "timefact_hour"), row.names = c(NA, 
100L), class = "data.frame")

Solution

  • First convert timefact column to class POSIXct

    copy$timefact <- as.POSIXct(copy$timefact)
    

    Then, using lubridate and dplyr:

    library(lubridate)
    library(lubridate)
    
    copy %>%
      group_by(timefact_hour) %>%
      mutate(timediff = lag(minute(timefact) - lead(minute(timefact))))
    

    Or data.table:

    library(data.table)
    
    setDT(copy)[, timediff := shift(minute(timefact) - 
                                      shift(minute(timefact), type = "lead")), 
                by = timefact_hour]