Search code examples
rformatstrptimechron

How to convert variable to time format when values have different formatting


I am trying to convert a variable that contains that length of time of a phone call from a factor into a chronological format using the chron function. However, the values for calls that were less then 1 hour are reported as m:s, while calls that were longer then an hour are reported as h:m:s. As such when I use the chron function variables that are not in the h:m:s format are converted to missing.

How can I convert the variable to the correct formatting?

Using the Chron function

dat$Duration <- chron(times = as.character(dat$Duration), format = c(times = "h:m:s"))

> dat
    Duration              Type                date
346     <NA> Incoming FaceTime 2014-11-22 16:55:45
349     <NA> Outgoing FaceTime 2014-11-22 23:02:24
350     <NA> Incoming FaceTime 2014-11-23 05:25:11
351     <NA>          Canceled 2014-11-23 06:06:19
352     <NA>          Canceled 2014-11-24 10:11:38
353     <NA>          Canceled 2014-11-24 10:12:10
2       <NA>          Canceled 2014-12-26 14:41:02
3       <NA> Outgoing FaceTime 2015-01-03 14:14:21
4   01:33:35 Outgoing FaceTime 2015-01-04 15:16:34
5       <NA> Outgoing FaceTime 2015-01-05 23:57:34
6       <NA> Outgoing FaceTime 2015-01-06 13:57:42

the data.

structure(list(Duration = structure(c(290L, 301L, 263L, 1L, 1L, 
1L, 1L, 134L, 13L, 156L, 64L), .Label = c("0:00", "0:59", "1:00:38", 
"1:00:55", "1:01:06", "1:05", "1:10:07", "1:10:12", "1:12:59", 
"1:15:06", "1:15:55", "1:22:59", "1:33:35", "1:33:52", "1:35:35", 
"1:39:46", "1:40", "1:50:32", "1:57:40", "1:58:29", "10:03", 
"10:17", "10:19", "10:21", "10:34", "10:38", "10:48", "11:09", 
"11:22", "11:24", "11:28", "11:32", "11:48", "11:51", "12:09", 
"12:19", "12:22", "12:45", "12:49", "12:51", "13:05", "13:15", 
"13:25", "13:35", "13:49", "14:16", "14:23", "14:34", "14:43", 
"14:50", "14:55", "15:20", "15:32", "15:33", "15:43", "15:44", 
"16:08", "16:09", "16:20", "16:40", "16:41", "16:50", "16:59", 
"17:02", "17:13", "17:17", "17:25", "17:46", "18:29", "18:30", 
"18:37", "18:48", "19:35", "19:38", "19:58", "2:02", "2:07", 
"2:16:59", "20:40", "21:42", "22:07", "22:42", "22:48", "23:10", 
"24:17", "25:26", "25:45", "26:36", "27:00", "27:16", "27:48", 
"28:01", "28:32", "28:42", "29:47", "29:51", "3:05", "3:10:17", 
"3:53", "3:58", "30:22", "30:27", "30:28", "30:38", "30:41", 
"31:22", "32:18", "33:35", "34:29", "37:44", "38:10", "4:15", 
"4:22", "4:37", "4:52", "40:14", "41:06", "41:51", "43:21", "44:13", 
"44:27", "44:58", "46:14", "46:56", "49:27", "5:03", "5:13", 
"5:43", "5:51", "50:15", "50:27", "51:06", "55:11", "58:10", 
"58:27", "6:27", "6:29", "6:35", "6:37", "6:49", "6:57", "7:08", 
"7:09", "7:29", "7:41", "7:47", "8:20", "8:25", "8:30", "8:41", 
"8:47", "8:55", "8:56", "8:57", "8:58", "9:35", "9:40", "9:55", 
"0:02", "0:03", "0:04", "0:06", "0:08", "0:10", "0:11", "0:12", 
"0:13", "0:14", "0:19", "0:22", "0:23", "0:25", "0:28", "0:29", 
"0:32", "0:36", "0:37", "0:38", "0:40", "0:41", "0:42", "0:43", 
"0:44", "0:47", "0:51", "0:53", "0:54", "0:56", "0:58", "1:00", 
"1:02", "1:02:22", "1:03", "1:05:59", "1:08", "1:13", "1:13:34", 
"1:24:20", "1:25", "1:31", "1:41", "1:42", "1:43", "1:48", "1:50", 
"1:55", "10:07", "10:12", "10:31", "10:35", "11:06", "11:07", 
"11:12", "11:27", "11:36", "12:01", "12:59", "13:31", "13:34", 
"13:36", "14:22", "15:04", "15:27", "15:48", "15:54", "15:57", 
"16:10", "16:12", "16:23", "17:12", "17:29", "17:44", "17:54", 
"18:12", "18:46", "2:01", "2:04", "2:08", "2:38", "2:57", "20:36", 
"20:39", "20:41", "21:03", "21:58", "22:04", "23:35", "24:02", 
"26:06", "26:25", "26:33", "26:43", "26:59", "27:09", "27:13", 
"27:19", "28:09", "28:30", "28:34", "3:00", "3:30", "3:37", "3:38", 
"3:41", "30:23", "30:31", "31:03", "31:09", "31:39", "31:47", 
"31:59", "32:12", "32:15", "33:17", "33:31", "34:51", "35:24", 
"36:01", "36:26", "36:47", "37:40", "37:50", "39:16", "4:02", 
"4:08", "4:20", "4:24", "4:31", "40:39", "43:49", "45:48", "47:28", 
"48:39", "5:05", "5:44", "5:55", "5:57", "54:04", "6:06", "6:43", 
"6:46", "6:50", "7:07", "7:22", "7:58", "8:00", "8:21", "8:26", 
"9:09", "9:22"), class = "factor"), Type = structure(c(3L, 5L, 
3L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L), .Label = c("Canceled", "Incoming", 
"Incoming FaceTime", "Missed", "Outgoing FaceTime", "Outgoing"
), class = "factor"), date = structure(list(sec = c(45, 24, 11, 
19, 38, 10, 2, 21, 34, 34, 42), min = c(55L, 2L, 25L, 6L, 11L, 
12L, 41L, 14L, 16L, 57L, 57L), hour = c(16L, 23L, 5L, 6L, 10L, 
10L, 14L, 14L, 15L, 23L, 13L), mday = c(22L, 22L, 23L, 23L, 24L, 
24L, 26L, 3L, 4L, 5L, 6L), mon = c(10L, 10L, 10L, 10L, 10L, 10L, 
11L, 0L, 0L, 0L, 0L), year = c(114L, 114L, 114L, 114L, 114L, 
114L, 114L, 115L, 115L, 115L, 115L), wday = c(6L, 6L, 0L, 0L, 
1L, 1L, 5L, 6L, 0L, 1L, 2L), yday = c(325L, 325L, 326L, 326L, 
327L, 327L, 359L, 2L, 3L, 4L, 5L), isdst = c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L), zone = c("AEDT", "AEDT", "AEDT", 
"AEDT", "AEDT", "AEDT", "AEDT", "AEDT", "AEDT", "AEDT", "AEDT"
), gmtoff = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
NA_integer_, NA_integer_)), .Names = c("sec", "min", "hour", 
"mday", "mon", "year", "wday", "yday", "isdst", "zone", "gmtoff"
), class = c("POSIXlt", "POSIXt"))), .Names = c("Duration", "Type", 
"date"), row.names = c(346L, 349L, 350L, 351L, 352L, 353L, 2L, 
3L, 4L, 5L, 6L), class = "data.frame")

Solution

  • The obvious approach would be to manipulate your data in a first pass, then call the chron function. You could pass every date through something like this:

    date = re.sub(r'^(\d+:\d+)$', r'00:\1', date)
    

    or

    if date.count(':') == 1: date = '00:'+date
    

    EDIT: How did I get here? I don't even know R, sorry. The above is sample code in Python. Thanks for @thelatemail for translating this to R

    dat$Duration <- chron(times=gsub("^(\\d+:\\d+)$","00:\\1",dat$Duration),
                    format=c(times="h:m:s"))