I have a column of times under timefact
> head(foo)
cnrd_marsh timefact timefact_hour
1 БЧ 2016-04-07 14:34:00 14
2 БЧ 2016-04-07 14:15:00 14
3 БЧ 2016-04-07 14:10:00 14
4 БЧ 2016-04-07 13:58:00 13
5 БЧ 2016-04-07 13:57:00 13
6 БЧ 2016-04-07 13:39:00 13
My objective is to create a fourth column containing integer values that represent the number of minutes difference (difftime) between the given row and the row above it. In addition, I need to exclude all cases where the timefact_hour
is different from the row above In other words, I only need to find the differences of rows that share the same hour
.
I was able to write the following code, which works in that it takes the difference from one row to the next, but it does not convert posix times to character and it does not skip over instances where hour
is different.
output1 <- data.frame("timefact" = character(), diff_time = numeric(0))
for (i in 2:nrow(foo)){
diff_time <- difftime(foo[i, "timefact"], foo[i-1, "timefact"], tz = "EST", units = "mins")
new_row <- c(foo[i, "timefact"], diff_time)
output1[nrow(output1) + 1,] = new_row
}
> head(output1)
timefact diff_time
1 <NA> -19
2 <NA> -5
3 <NA> -12
4 <NA> -1
5 <NA> -18
6 <NA> -31
Here is the sample data:
> dput(foo)
structure(list(cnrd_marsh = structure(c(91L, 91L, 91L, 91L, 91L,
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L,
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L,
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L,
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L,
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L,
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L,
91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L, 91L,
91L, 91L, 91L, 91L), .Label = c("", "1", "10", "11", "11К",
"12", "13", "14", "15", "16", "17", "18", "19", "1К", "2", "20",
"21", "22", "23", "24", "26", "27", "28", "29", "3", "30", "31",
"32", "33", "33К", "34", "34К", "35", "36", "37", "38", "39",
"4", "40", "41", "42", "43", "43К", "44", "45", "47", "48",
"49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59",
"6", "60", "61", "62", "63", "63К", "64", "65", "66", "67",
"7", "70", "70К", "71", "72", "73", "74", "76", "77", "78",
"79", "8", "80", "81", "82", "83", "84", "85", "86", "88", "9",
"БК", "БЧ"), class = "factor"), timefact = structure(c(1460057640,
1460056500, 1460056200, 1460055480, 1460055420, 1460054340, 1460052480,
1460052360, 1460051340, 1460051040, 1460050440, 1460050380, 1460049360,
1460048160, 1460048040, 1460046960, 1460046720, 1460046300, 1460045340,
1460045160, 1460043540, 1460042880, 1460042520, 1460041920, 1460041140,
1460040120, 1460039760, 1460038860, 1460038620, 1460038080, 1460038020,
1460037240, 1460036280, 1460035800, 1460034960, 1460034780, 1460034120,
1460034060, 1460033340, 1460032500, 1460032140, 1460031300, 1460031000,
1460030340, 1460030340, 1460029800, 1460029200, 1460028300, 1460025720,
1460090700, 1460088660, 1460088600, 1460087760, 1460087580, 1460086980,
1460086980, 1460086320, 1460085300, 1460085180, 1460084340, 1460084160,
1460083620, 1460083620, 1460082780, 1460081820, 1460081760, 1460080740,
1460080500, 1460080200, 1460079360, 1460079180, 1460077260, 1460076840,
1460076600, 1460076360, 1460075820, 1460074740, 1460070660, 1460070480,
1460069100, 1460068800, 1460068140, 1460068140, 1460067060, 1460063880,
1460063460, 1460062380, 1460062080, 1460061360, 1460061300, 1460059920,
1460057880, 1460057640, 1460060760, 1460060580, 1460059080, 1460058780,
1460058240, 1460058180, 1460057220), class = c("POSIXct", "POSIXt"
), tzone = "EST"), timefact_hour = c(14L, 14L, 14L, 13L, 13L,
13L, 13L, 13L, 12L, 12L, 12L, 12L, 12L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 10L, 10L, 10L, 10L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 7L, 7L, 7L, 7L, 7L, 6L, 6L, 6L, 6L, 6L,
5L, 23L, 23L, 23L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 21L, 21L,
21L, 21L, 21L, 21L, 21L, 20L, 20L, 20L, 20L, 20L, 20L, 19L, 19L,
19L, 19L, 19L, 18L, 18L, 17L, 17L, 17L, 17L, 17L, 16L, 16L, 15L,
15L, 15L, 15L, 15L, 14L, 14L, 15L, 15L, 14L, 14L, 14L, 14L, 14L
)), .Names = c("cnrd_marsh", "timefact", "timefact_hour"), row.names = c(NA,
-100L), class = "data.frame")
I would like to ask how to modify the code to get the proper output. Here is an example of what the output should look like
> head(output)
cnrd_marsh timefact timefact_hour timefact_diff
1 БЧ 2016-04-07 14:34:00 14 NA
2 БЧ 2016-04-07 14:15:00 14 19
3 БЧ 2016-04-07 14:10:00 14 5
4 БЧ 2016-04-07 13:58:00 13 NA
5 БЧ 2016-04-07 13:57:00 13 1
6 БЧ 2016-04-07 13:39:00 13 18
Here is a possible solution
library(dplyr)
#a is your data.frame
#Create leads of your timefact & timefact_hour column
# Use ifelse to assign a value to difftime depending on if leading timefact and current timefact is equal or not
b= lead(a$timefact)
a$DiffTimes = c(NA, ifelse(a$timefact_hour[-nrow(a)] == lead(a$timefact_hour)[-nrow(a)], difftime(a$timefact[-nrow(a)], b[-nrow(a)], units = "mins",tz="EST"),NA))
> head(a)
cnrd_marsh timefact timefact_hour DiffTimes
1 <U+0411><U+0427> 2016-04-07 14:34:00 14 NA
2 <U+0411><U+0427> 2016-04-07 14:15:00 14 19
3 <U+0411><U+0427> 2016-04-07 14:10:00 14 5
4 <U+0411><U+0427> 2016-04-07 13:58:00 13 NA
5 <U+0411><U+0427> 2016-04-07 13:57:00 13 1
6 <U+0411><U+0427> 2016-04-07 13:39:00 13 18