Search code examples
dateparsingtimelubridate

How to update number into time format in R for numeric smaller than 0


I have this original dataset of time series, where V2 is the date (class: integer) and V3 is the timestamp but it is written now in number starting from 0 to 2355 (every 5-min frequency) in which 0 means 00:00, 5 means 00:05, ..., 55 means 00:55, 100 means 01:00, 105 means 01:05 and so on. The format of column V3 (time) as follows:

> head(data0$V3, 20)
[1]   0   5  10  15  20  25  30  35  40  45  50  55 100 105 110 115 120 125
[19] 130 135

I want to combine my date and time columns into one column called 'datetime'. I tried these codes below for this purpose.

data0$V3 <- sub("(\\d+)(\\d{2})", "\\1:\\2", data0$V3) # put in delimitter

The result:

> head(data0$V3, 20)
 [1] "0"    "5"    "10"   "15"   "20"   "25"   "30"   "35"   "40"   "45" 
[11] "50"   "55"   "1:00" "1:05" "1:10" "1:15" "1:20" "1:25" "1:30" "1:35"

Then

library(lubridate)
data0$datetime = ymd_hm(paste(data0$V2, data0$V3))

The results:

> head(data0$datetime, 20)
[1] NA                        NA
[3] NA                        NA
...
[11] NA                        NA
[13] "2015-01-01 01:00:00 UTC" "2015-01-01 01:05:00 UTC"
[15] "2015-01-01 01:10:00 UTC" "2015-01-01 01:15:00 UTC"
...
[19] "2015-01-01 01:30:00 UTC" "2015-01-01 01:35:00 UTC"

From this result, they seem to work OK for the time from 100 to 2355 but fails to parse for time from 0 to 55. My guess is that these numbers are not understood as time format like the others from the beginning.

Can anyone help to solve the problem with the timestamp from 0 to 55 in this case so that I have the same result throughout the dataset? My idea is maybe to convert the [0;55] into format like [00;055] before applying the first code line but is it possible because as number, 0 is always omitted?


Solution

  • The problem is that there are no leading zeros in the first numbers. You can add leading zeros with answers from this question: How to add leading zeros?. I chose formatC() because it allows to format the output to a fixed width. In case of Hour:Minute format, desirable width is 4 digits: Like 00:00 or 22:45.

    I expanded your example data until 10:55 so we can have some cases with two-digit-hours.

    Below is the code:

    times <- vector(mode = "integer", length = 0)
    for (i in (0:10)*100) {
      times <- c(times, 
                 seq(from = i,
                   to = i + 55,
                   by = 5))
    }
    times
    #>   [1]    0    5   10   15   20   25   30   35   40   45   50   55  100  105  110
    #>  [16]  115  120  125  130  135  140  145  150  155  200  205  210  215  220  225
    #>  [31]  230  235  240  245  250  255  300  305  310  315  320  325  330  335  340
    #>  [46]  345  350  355  400  405  410  415  420  425  430  435  440  445  450  455
    #>  [61]  500  505  510  515  520  525  530  535  540  545  550  555  600  605  610
    #>  [76]  615  620  625  630  635  640  645  650  655  700  705  710  715  720  725
    #>  [91]  730  735  740  745  750  755  800  805  810  815  820  825  830  835  840
    #> [106]  845  850  855  900  905  910  915  920  925  930  935  940  945  950  955
    #> [121] 1000 1005 1010 1015 1020 1025 1030 1035 1040 1045 1050 1055
    
    # add leading zeros so that total width is always 4
    (times <- formatC(times, width = 4, format = "d", flag = "0"))
    #>   [1] "0000" "0005" "0010" "0015" "0020" "0025" "0030" "0035" "0040" "0045"
    #>  [11] "0050" "0055" "0100" "0105" "0110" "0115" "0120" "0125" "0130" "0135"
    #>  [21] "0140" "0145" "0150" "0155" "0200" "0205" "0210" "0215" "0220" "0225"
    #>  [31] "0230" "0235" "0240" "0245" "0250" "0255" "0300" "0305" "0310" "0315"
    #>  [41] "0320" "0325" "0330" "0335" "0340" "0345" "0350" "0355" "0400" "0405"
    #>  [51] "0410" "0415" "0420" "0425" "0430" "0435" "0440" "0445" "0450" "0455"
    #>  [61] "0500" "0505" "0510" "0515" "0520" "0525" "0530" "0535" "0540" "0545"
    #>  [71] "0550" "0555" "0600" "0605" "0610" "0615" "0620" "0625" "0630" "0635"
    #>  [81] "0640" "0645" "0650" "0655" "0700" "0705" "0710" "0715" "0720" "0725"
    #>  [91] "0730" "0735" "0740" "0745" "0750" "0755" "0800" "0805" "0810" "0815"
    #> [101] "0820" "0825" "0830" "0835" "0840" "0845" "0850" "0855" "0900" "0905"
    #> [111] "0910" "0915" "0920" "0925" "0930" "0935" "0940" "0945" "0950" "0955"
    #> [121] "1000" "1005" "1010" "1015" "1020" "1025" "1030" "1035" "1040" "1045"
    #> [131] "1050" "1055"
    
    # add divider : 
    times <- sub("(\\d+)(\\d{2})", "\\1:\\2", times)
    
    lubridate::ymd_hm(paste("2015-01-01", times))
    #>   [1] "2015-01-01 00:00:00 UTC" "2015-01-01 00:05:00 UTC"
    #>   [3] "2015-01-01 00:10:00 UTC" "2015-01-01 00:15:00 UTC"
    #>   [5] "2015-01-01 00:20:00 UTC" "2015-01-01 00:25:00 UTC"
    #>   [7] "2015-01-01 00:30:00 UTC" "2015-01-01 00:35:00 UTC"
    #>   [9] "2015-01-01 00:40:00 UTC" "2015-01-01 00:45:00 UTC"
    #>  [11] "2015-01-01 00:50:00 UTC" "2015-01-01 00:55:00 UTC"
    #>  [13] "2015-01-01 01:00:00 UTC" "2015-01-01 01:05:00 UTC"
    #>  [15] "2015-01-01 01:10:00 UTC" "2015-01-01 01:15:00 UTC"
    #>  [17] "2015-01-01 01:20:00 UTC" "2015-01-01 01:25:00 UTC"
    #>  [19] "2015-01-01 01:30:00 UTC" "2015-01-01 01:35:00 UTC"
    #>  [21] "2015-01-01 01:40:00 UTC" "2015-01-01 01:45:00 UTC"
    #>  [23] "2015-01-01 01:50:00 UTC" "2015-01-01 01:55:00 UTC"
    #>  [25] "2015-01-01 02:00:00 UTC" "2015-01-01 02:05:00 UTC"
    #>  [27] "2015-01-01 02:10:00 UTC" "2015-01-01 02:15:00 UTC"
    #>  [29] "2015-01-01 02:20:00 UTC" "2015-01-01 02:25:00 UTC"
    #>  [31] "2015-01-01 02:30:00 UTC" "2015-01-01 02:35:00 UTC"
    #>  [33] "2015-01-01 02:40:00 UTC" "2015-01-01 02:45:00 UTC"
    #>  [35] "2015-01-01 02:50:00 UTC" "2015-01-01 02:55:00 UTC"
    #>  [37] "2015-01-01 03:00:00 UTC" "2015-01-01 03:05:00 UTC"
    #>  [39] "2015-01-01 03:10:00 UTC" "2015-01-01 03:15:00 UTC"
    #>  [41] "2015-01-01 03:20:00 UTC" "2015-01-01 03:25:00 UTC"
    #>  [43] "2015-01-01 03:30:00 UTC" "2015-01-01 03:35:00 UTC"
    #>  [45] "2015-01-01 03:40:00 UTC" "2015-01-01 03:45:00 UTC"
    #>  [47] "2015-01-01 03:50:00 UTC" "2015-01-01 03:55:00 UTC"
    #>  [49] "2015-01-01 04:00:00 UTC" "2015-01-01 04:05:00 UTC"
    #>  [51] "2015-01-01 04:10:00 UTC" "2015-01-01 04:15:00 UTC"
    #>  [53] "2015-01-01 04:20:00 UTC" "2015-01-01 04:25:00 UTC"
    #>  [55] "2015-01-01 04:30:00 UTC" "2015-01-01 04:35:00 UTC"
    #>  [57] "2015-01-01 04:40:00 UTC" "2015-01-01 04:45:00 UTC"
    #>  [59] "2015-01-01 04:50:00 UTC" "2015-01-01 04:55:00 UTC"
    #>  [61] "2015-01-01 05:00:00 UTC" "2015-01-01 05:05:00 UTC"
    #>  [63] "2015-01-01 05:10:00 UTC" "2015-01-01 05:15:00 UTC"
    #>  [65] "2015-01-01 05:20:00 UTC" "2015-01-01 05:25:00 UTC"
    #>  [67] "2015-01-01 05:30:00 UTC" "2015-01-01 05:35:00 UTC"
    #>  [69] "2015-01-01 05:40:00 UTC" "2015-01-01 05:45:00 UTC"
    #>  [71] "2015-01-01 05:50:00 UTC" "2015-01-01 05:55:00 UTC"
    #>  [73] "2015-01-01 06:00:00 UTC" "2015-01-01 06:05:00 UTC"
    #>  [75] "2015-01-01 06:10:00 UTC" "2015-01-01 06:15:00 UTC"
    #>  [77] "2015-01-01 06:20:00 UTC" "2015-01-01 06:25:00 UTC"
    #>  [79] "2015-01-01 06:30:00 UTC" "2015-01-01 06:35:00 UTC"
    #>  [81] "2015-01-01 06:40:00 UTC" "2015-01-01 06:45:00 UTC"
    #>  [83] "2015-01-01 06:50:00 UTC" "2015-01-01 06:55:00 UTC"
    #>  [85] "2015-01-01 07:00:00 UTC" "2015-01-01 07:05:00 UTC"
    #>  [87] "2015-01-01 07:10:00 UTC" "2015-01-01 07:15:00 UTC"
    #>  [89] "2015-01-01 07:20:00 UTC" "2015-01-01 07:25:00 UTC"
    #>  [91] "2015-01-01 07:30:00 UTC" "2015-01-01 07:35:00 UTC"
    #>  [93] "2015-01-01 07:40:00 UTC" "2015-01-01 07:45:00 UTC"
    #>  [95] "2015-01-01 07:50:00 UTC" "2015-01-01 07:55:00 UTC"
    #>  [97] "2015-01-01 08:00:00 UTC" "2015-01-01 08:05:00 UTC"
    #>  [99] "2015-01-01 08:10:00 UTC" "2015-01-01 08:15:00 UTC"
    #> [101] "2015-01-01 08:20:00 UTC" "2015-01-01 08:25:00 UTC"
    #> [103] "2015-01-01 08:30:00 UTC" "2015-01-01 08:35:00 UTC"
    #> [105] "2015-01-01 08:40:00 UTC" "2015-01-01 08:45:00 UTC"
    #> [107] "2015-01-01 08:50:00 UTC" "2015-01-01 08:55:00 UTC"
    #> [109] "2015-01-01 09:00:00 UTC" "2015-01-01 09:05:00 UTC"
    #> [111] "2015-01-01 09:10:00 UTC" "2015-01-01 09:15:00 UTC"
    #> [113] "2015-01-01 09:20:00 UTC" "2015-01-01 09:25:00 UTC"
    #> [115] "2015-01-01 09:30:00 UTC" "2015-01-01 09:35:00 UTC"
    #> [117] "2015-01-01 09:40:00 UTC" "2015-01-01 09:45:00 UTC"
    #> [119] "2015-01-01 09:50:00 UTC" "2015-01-01 09:55:00 UTC"
    #> [121] "2015-01-01 10:00:00 UTC" "2015-01-01 10:05:00 UTC"
    #> [123] "2015-01-01 10:10:00 UTC" "2015-01-01 10:15:00 UTC"
    #> [125] "2015-01-01 10:20:00 UTC" "2015-01-01 10:25:00 UTC"
    #> [127] "2015-01-01 10:30:00 UTC" "2015-01-01 10:35:00 UTC"
    #> [129] "2015-01-01 10:40:00 UTC" "2015-01-01 10:45:00 UTC"
    #> [131] "2015-01-01 10:50:00 UTC" "2015-01-01 10:55:00 UTC"
    

    Created on 2024-02-07 with reprex v2.1.0