Search code examples
rloopsrowdifftime

looping over rows with difftime


I have a column of times under timefact

> head(foo)
  cnrd_marsh            timefact timefact_hour
1         БЧ 2016-04-07 14:34:00            14
2         БЧ 2016-04-07 14:15:00            14
3         БЧ 2016-04-07 14:10:00            14
4         БЧ 2016-04-07 13:58:00            13
5         БЧ 2016-04-07 13:57:00            13
6         БЧ 2016-04-07 13:39:00            13

My objective is to create a fourth column containing integer values that represent the number of minutes difference (difftime) between the given row and the row above it. In addition, I need to exclude all cases where the timefact_houris different from the row above In other words, I only need to find the differences of rows that share the same hour.

I was able to write the following code, which works in that it takes the difference from one row to the next, but it does not convert posix times to character and it does not skip over instances where hour is different.

output1 <- data.frame("timefact" = character(), diff_time = numeric(0)) 

for (i in 2:nrow(foo)){

  diff_time <- difftime(foo[i, "timefact"], foo[i-1, "timefact"], tz = "EST", units = "mins")
  new_row <- c(foo[i, "timefact"], diff_time)
  output1[nrow(output1) + 1,] = new_row

}

> head(output1)
  timefact diff_time
1     <NA>       -19
2     <NA>        -5
3     <NA>       -12
4     <NA>        -1
5     <NA>       -18
6     <NA>       -31

Here is the sample data:

> dput(foo)
structure(list(cnrd_marsh = structure(c(91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 
91L, 91L, 91L, 91L), .Label = c("", "1", "10", "11", "11К", 
"12", "13", "14", "15", "16", "17", "18", "19", "1К", "2", "20", 
"21", "22", "23", "24", "26", "27", "28", "29", "3", "30", "31", 
"32", "33", "33К", "34", "34К", "35", "36", "37", "38", "39", 
"4", "40", "41", "42", "43", "43К", "44", "45", "47", "48", 
"49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", 
"6", "60", "61", "62", "63", "63К", "64", "65", "66", "67", 
"7", "70", "70К", "71", "72", "73", "74", "76", "77", "78", 
"79", "8", "80", "81", "82", "83", "84", "85", "86", "88", "9", 
"БК", "БЧ"), class = "factor"), timefact = structure(c(1460057640, 
1460056500, 1460056200, 1460055480, 1460055420, 1460054340, 1460052480, 
1460052360, 1460051340, 1460051040, 1460050440, 1460050380, 1460049360, 
1460048160, 1460048040, 1460046960, 1460046720, 1460046300, 1460045340, 
1460045160, 1460043540, 1460042880, 1460042520, 1460041920, 1460041140, 
1460040120, 1460039760, 1460038860, 1460038620, 1460038080, 1460038020, 
1460037240, 1460036280, 1460035800, 1460034960, 1460034780, 1460034120, 
1460034060, 1460033340, 1460032500, 1460032140, 1460031300, 1460031000, 
1460030340, 1460030340, 1460029800, 1460029200, 1460028300, 1460025720, 
1460090700, 1460088660, 1460088600, 1460087760, 1460087580, 1460086980, 
1460086980, 1460086320, 1460085300, 1460085180, 1460084340, 1460084160, 
1460083620, 1460083620, 1460082780, 1460081820, 1460081760, 1460080740, 
1460080500, 1460080200, 1460079360, 1460079180, 1460077260, 1460076840, 
1460076600, 1460076360, 1460075820, 1460074740, 1460070660, 1460070480, 
1460069100, 1460068800, 1460068140, 1460068140, 1460067060, 1460063880, 
1460063460, 1460062380, 1460062080, 1460061360, 1460061300, 1460059920, 
1460057880, 1460057640, 1460060760, 1460060580, 1460059080, 1460058780, 
1460058240, 1460058180, 1460057220), class = c("POSIXct", "POSIXt"
), tzone = "EST"), timefact_hour = c(14L, 14L, 14L, 13L, 13L, 
13L, 13L, 13L, 12L, 12L, 12L, 12L, 12L, 11L, 11L, 11L, 11L, 11L, 
11L, 11L, 10L, 10L, 10L, 10L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 7L, 7L, 7L, 7L, 7L, 6L, 6L, 6L, 6L, 6L, 
5L, 23L, 23L, 23L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 21L, 21L, 
21L, 21L, 21L, 21L, 21L, 20L, 20L, 20L, 20L, 20L, 20L, 19L, 19L, 
19L, 19L, 19L, 18L, 18L, 17L, 17L, 17L, 17L, 17L, 16L, 16L, 15L, 
15L, 15L, 15L, 15L, 14L, 14L, 15L, 15L, 14L, 14L, 14L, 14L, 14L
)), .Names = c("cnrd_marsh", "timefact", "timefact_hour"), row.names = c(NA, 
-100L), class = "data.frame")

I would like to ask how to modify the code to get the proper output. Here is an example of what the output should look like

   > head(output)
  cnrd_marsh            timefact timefact_hour timefact_diff
1         БЧ 2016-04-07 14:34:00            14            NA
2         БЧ 2016-04-07 14:15:00            14            19
3         БЧ 2016-04-07 14:10:00            14             5
4         БЧ 2016-04-07 13:58:00            13            NA
5         БЧ 2016-04-07 13:57:00            13             1
6         БЧ 2016-04-07 13:39:00            13            18

Solution

  • Here is a possible solution

    library(dplyr)
    
    #a is your data.frame
    
    #Create leads of your timefact  & timefact_hour column
    # Use ifelse to assign a value to difftime depending on if leading timefact and current timefact is equal or not
    
    b= lead(a$timefact)
    
    a$DiffTimes = c(NA, ifelse(a$timefact_hour[-nrow(a)] == lead(a$timefact_hour)[-nrow(a)], difftime(a$timefact[-nrow(a)], b[-nrow(a)], units = "mins",tz="EST"),NA))
    
    > head(a)
         cnrd_marsh       timefact              timefact_hour DiffTimes
     1 <U+0411><U+0427> 2016-04-07 14:34:00            14        NA
     2 <U+0411><U+0427> 2016-04-07 14:15:00            14        19
     3 <U+0411><U+0427> 2016-04-07 14:10:00            14         5
     4 <U+0411><U+0427> 2016-04-07 13:58:00            13        NA
     5 <U+0411><U+0427> 2016-04-07 13:57:00            13         1
     6 <U+0411><U+0427> 2016-04-07 13:39:00            13        18