Search code examples
rtime-seriesxtsas.date

Converting yearly data into xts format


I have a very beginners question. I would like to work with the time series package xts. Therefore, to convert my data to xts format.

My current dataset "data" is a "data.table" "data.frame" format. The Year column is an "integer". The filtered data$Year column comprises data from years 1999-2018. On the bottom of my code, I have provided a sample of my data.

I am facing the following issue:

as.Date doesn't recognize my yearly data as dates. It automatically converts them to daily data, starting from the beginning of 1975...

I tried the following commands:

data$Year <- as.Date(data$Year)
data_xts <- as.xts(data, data[, -1], order.by = data$Year)

# now xts format
class(data_xts)

# here is the problem: > 0 seconds periodicity
# from 1975-06-23 to 1975-07-12
periodicity(data_xts$year) 

Here is the head of the wrong output

> head(data_xts)
           Year         ReporterName PartnerName      TradeValue in 1000 USD year_group  total_average_period_in_1000USD
1975-06-23 "1975-06-23" "Comoros"    "France"         "    1360.758"         "1999-2002" "    524.8275"                 
1975-06-23 "1975-06-23" "Comoros"    "United States"  "    1392.263"         "1999-2002" "    524.8275"                 
1975-06-23 "1975-06-23" "Comoros"    "Germany"        "     633.666"         "1999-2002" "    524.8275"                 
1975-06-23 "1975-06-23" "Comoros"    "United Kingdom" "     152.029"         "1999-2002" "    524.8275"                 
1975-06-23 "1975-06-23" "Comoros"    "Singapore"      "     450.452"         "1999-2002" "    524.8275"                 
1975-06-23 "1975-06-23" "Comoros"    "Indonesia"      "     194.580"         "1999-2002" "    524.8275"                 
           total_average_period_byPartner_in_1000USD percentage_of_group
1975-06-23 "3.638645e+03"                            "6.933030e+00"     
1975-06-23 "1.449703e+03"                            "2.762247e+00"     
1975-06-23 "6.692080e+02"                            "1.275101e+00"     
1975-06-23 "4.821123e+02"                            "9.186109e-01"     
1975-06-23 "4.325665e+02"                            "8.242070e-01"     
1975-06-23 "1.945800e+02"                            "3.707504e-01" 

Here is a sample of my initial data

dput(head(data, n = 100))
structure(list(Year = structure(c(2015, 2016, 2017, 2018, 2011, 
2012, 2013, 2014, 2007, 2009, 2010, 2015, 2016, 2017, 2018, 2007, 
2009, 2010, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2015, 
2016, 2017, 2018, 2015, 2016, 2017, 2018, 2011, 2012, 2013, 2014, 
2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 
2018, 2015, 2016, 2017, 2018, 2011, 2012, 2013, 2014, 2007, 2009, 
2010, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2007, 2009, 
2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2015, 2016, 
2017, 2018, 2007, 2009, 2010, 2012, 2013, 2014, 2015, 2016, 2017, 
2018, 2007, 2009, 2010, 2015, 2016, 2017), class = "Date"), ReporterName = c("Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola"), PartnerName = c("China", "China", "China", "China", 
"China", "China", "China", "China", "China", "China", "China", 
"India", "India", "India", "India", "United States", "United States", 
"United States", "Spain", "Spain", "Spain", "Spain", "United States", 
"United States", "United States", "United States", "South Africa", 
"South Africa", "South Africa", "South Africa", "Other Asia, nes", 
"Other Asia, nes", "Other Asia, nes", "Other Asia, nes", "United States", 
"United States", "United States", "United States", "Canada", 
"Canada", "Canada", "Canada", "France", "France", "France", "France", 
"Portugal", "Portugal", "Portugal", "Portugal", "United Arab Emirates", 
"United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
"India", "India", "India", "India", "India", "India", "India", 
"Italy", "Italy", "Italy", "Italy", "United Kingdom", "United Kingdom", 
"United Kingdom", "United Kingdom", "Canada", "Canada", "Canada", 
"Other Asia, nes", "Other Asia, nes", "Other Asia, nes", "Other Asia, nes", 
"Indonesia", "Indonesia", "Indonesia", "Indonesia", "Netherlands", 
"Netherlands", "Netherlands", "Netherlands", "France", "France", 
"France", "Canada", "Canada", "Canada", "Malaysia", "Malaysia", 
"Malaysia", "Malaysia", "Other Asia, nes", "Other Asia, nes", 
"Other Asia, nes", "Singapore", "Singapore", "Singapore"), `TradeValue in 1000 USD` = c(14320565.527, 
13923091.96, 19487066.539, 24517058.342, 24360792.847, 33710030.023, 
31947235.081, 27527110.851, 13459326.563, 15954060.922, 20963245.476, 
2676339.583, 1948845.077, 2890061.159, 3768940.47, 10875646.624, 
7708378.359, 9965785.888, 2245976.426, 882089.095, 1025777.275, 
1250554.873, 1265801.316, 1525650.265, 1079503.617, 1470132.736, 
1376041.349, 1309031.634, 1342549.642, 1161852.097, 1410793.303, 
1136068.366, 1388765.375, 145025.028, 16475024.144, 6594525.851, 
5018390.939, 2548807.59, 1035618.609, 873616.866, 1079684.282, 
647164.297, 1599581.068, 910864.068, 330799.771, 734551.345, 
1199355.049, 851431.606, 334787.698, 1074137.369, 665253.613, 
801908.541, 1016519.507, 884725.078, 6842018.3, 6932060.8, 6764232.765, 
4507416.181, 2376843.352, 3659557.185, 5117824.926, 1105765.643, 
488551.728, 460504.642, 468914.918, 1011126.417, 411203.618, 
373206.578, 425616.975, 2913186.035, 2324861.006, 4039116.578, 
5386493.281, 4699797.618, 4007020.057, 2329013.301, 566597.802, 
376715.236, 415879.351, 575477.283, 1107123.4, 507950.826, 93143.377, 
162760.789, 2592972.627, 3030206.205, 2213064.563, 3519981.595, 
3305027.169, 2719654.992, 94484.681, 480779.397, 571648.578, 
242727.975, 1913906.941, 1154653.223, 2699439.575, 456600.595, 
114849.93, 273956.82), year_group = structure(c(5L, 5L, 5L, 5L, 
4L, 4L, 4L, 4L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 4L, 4L, 
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 5L, 5L, 5L
), .Label = c("1999-2002", "2003-2007", "2008-2010", "2011-2014", 
"2015-2018"), class = "factor"), total_average_period_in_1000USD = c(251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 1938789.42919853, 
1938789.42919853, 1938789.42919853, 1938789.42919853, 1416797.00579381, 
1416797.00579381, 1416797.00579381, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 1416797.00579381, 1416797.00579381, 
1416797.00579381, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 1938789.42919853, 1938789.42919853, 1938789.42919853, 
1938789.42919853, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 1938789.42919853, 1938789.42919853, 1938789.42919853, 
1938789.42919853, 1416797.00579381, 1416797.00579381, 1416797.00579381, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
1416797.00579381, 1416797.00579381, 1416797.00579381, 1938789.42919853, 
1938789.42919853, 1938789.42919853, 1938789.42919853, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 251327.404028933, 251327.404028933, 1416797.00579381, 
1416797.00579381, 1416797.00579381, 1938789.42919853, 1938789.42919853, 
1938789.42919853, 251327.404028933, 251327.404028933, 251327.404028933, 
251327.404028933, 1416797.00579381, 1416797.00579381, 1416797.00579381, 
251327.404028933, 251327.404028933, 251327.404028933), total_average_period_byPartner_in_1000USD = c(18061945.592, 
18061945.592, 18061945.592, 18061945.592, 29386292.2005, 29386292.2005, 
29386292.2005, 29386292.2005, 16792210.987, 16792210.987, 16792210.987, 
2821046.57225, 2821046.57225, 2821046.57225, 2821046.57225, 9516603.62366667, 
9516603.62366667, 9516603.62366667, 1351099.41725, 1351099.41725, 
1351099.41725, 1351099.41725, 1335271.9835, 1335271.9835, 1335271.9835, 
1335271.9835, 1297368.6805, 1297368.6805, 1297368.6805, 1297368.6805, 
1020163.018, 1020163.018, 1020163.018, 1020163.018, 7659187.131, 
7659187.131, 7659187.131, 7659187.131, 909021.0135, 909021.0135, 
909021.0135, 909021.0135, 893949.063, 893949.063, 893949.063, 
893949.063, 864927.9305, 864927.9305, 864927.9305, 864927.9305, 
842101.68475, 842101.68475, 842101.68475, 842101.68475, 6261432.0115, 
6261432.0115, 6261432.0115, 6261432.0115, 3718075.15433333, 3718075.15433333, 
3718075.15433333, 630934.23275, 630934.23275, 630934.23275, 630934.23275, 
555288.397, 555288.397, 555288.397, 555288.397, 3092387.873, 
3092387.873, 3092387.873, 4105581.06425, 4105581.06425, 4105581.06425, 
4105581.06425, 483667.418, 483667.418, 483667.418, 483667.418, 
467744.598, 467744.598, 467744.598, 467744.598, 2612081.13166667, 
2612081.13166667, 2612081.13166667, 3181554.58533333, 3181554.58533333, 
3181554.58533333, 347410.15775, 347410.15775, 347410.15775, 347410.15775, 
1922666.57966667, 1922666.57966667, 1922666.57966667, 339847.48225, 
339847.48225, 339847.48225), percentage_of_group = c(71.8662004320097, 
71.8662004320097, 71.8662004320097, 71.8662004320097, 15.1570313711933, 
15.1570313711933, 15.1570313711933, 15.1570313711933, 11.8522349485003, 
11.8522349485003, 11.8522349485003, 11.2245880354744, 11.2245880354744, 
11.2245880354744, 11.2245880354744, 6.71698456783132, 6.71698456783132, 
6.71698456783132, 5.3758539482406, 5.3758539482406, 5.3758539482406, 
5.3758539482406, 5.31287858822702, 5.31287858822702, 5.31287858822702, 
5.31287858822702, 5.16206613247255, 5.16206613247255, 5.16206613247255, 
5.16206613247255, 4.05909981023223, 4.05909981023223, 4.05909981023223, 
4.05909981023223, 3.95049973743988, 3.95049973743988, 3.95049973743988, 
3.95049973743988, 3.61687981066861, 3.61687981066861, 3.61687981066861, 
3.61687981066861, 3.55691042309532, 3.55691042309532, 3.55691042309532, 
3.55691042309532, 3.4414390020136, 3.4414390020136, 3.4414390020136, 
3.4414390020136, 3.35061625294572, 3.35061625294572, 3.35061625294572, 
3.35061625294572, 3.22955753585287, 3.22955753585287, 3.22955753585287, 
3.22955753585287, 2.62428219365846, 2.62428219365846, 2.62428219365846, 
2.51040763018969, 2.51040763018969, 2.51040763018969, 2.51040763018969, 
2.2094224032015, 2.2094224032015, 2.2094224032015, 2.2094224032015, 
2.18266121424175, 2.18266121424175, 2.18266121424175, 2.11760029347137, 
2.11760029347137, 2.11760029347137, 2.11760029347137, 1.92445157291451, 
1.92445157291451, 1.92445157291451, 1.92445157291451, 1.8610966830587, 
1.8610966830587, 1.8610966830587, 1.8610966830587, 1.84365235173768, 
1.84365235173768, 1.84365235173768, 1.64100058387906, 1.64100058387906, 
1.64100058387906, 1.38230114257658, 1.38230114257658, 1.38230114257658, 
1.38230114257658, 1.35705155488342, 1.35705155488342, 1.35705155488342, 
1.35221021186721, 1.35221021186721, 1.35221021186721)), row.names = c(NA, 
-100L), groups = structure(list(ReporterName = c("Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola", "Angola", "Angola", "Angola", 
"Angola", "Angola", "Angola", "Angola"), PartnerName = c("Canada", 
"Canada", "Canada", "China", "China", "China", "France", "France", 
"India", "India", "India", "Indonesia", "Italy", "Malaysia", 
"Netherlands", "Other Asia, nes", "Other Asia, nes", "Other Asia, nes", 
"Portugal", "Singapore", "South Africa", "Spain", "United Arab Emirates", 
"United Kingdom", "United States", "United States", "United States"
), year_group = structure(c(3L, 4L, 5L, 3L, 4L, 5L, 3L, 5L, 3L, 
4L, 5L, 5L, 5L, 5L, 5L, 3L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 3L, 
4L, 5L), .Label = c("1999-2002", "2003-2007", "2008-2010", "2011-2014", 
"2015-2018"), class = "factor"), .rows = structure(list(70:72, 
    88:90, 39:42, 9:11, 5:8, 1:4, 85:87, 43:46, 59:61, 55:58, 
    12:15, 77:80, 62:65, 91:94, 81:84, 95:97, 73:76, 31:34, 47:50, 
    98:100, 27:30, 19:22, 51:54, 66:69, 16:18, 35:38, 23:26), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, 27L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

Solution

  • At first, in the main dataset, it seems you should change class = "Date" to class = "Integer" for the Year variable in your dput data to prevent it to create year "1975"

    structure(list(Year = structure(c(2015, 2016, 2017, 2018, 2011, 
    2012, 2013, 2014, 2007, 2009, 2010, 2015, 2016, 2017, 2018, 2007, 
    2009, 2010, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2015, 
    2016, 2017, 2018, 2015, 2016, 2017, 2018, 2011, 2012, 2013, 2014, 
    2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 
    2018, 2015, 2016, 2017, 2018, 2011, 2012, 2013, 2014, 2007, 2009, 
    2010, 2015, 2016, 2017, 2018, 2015, 2016, 2017, 2018, 2007, 2009, 
    2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2015, 2016, 
    2017, 2018, 2007, 2009, 2010, 2012, 2013, 2014, 2015, 2016, 2017, 
    2018, 2007, 2009, 2010, 2015, 2016, 2017), class = "Integer"), ...
    
    

    R does not recognize just year as a Date format, so you should convert year to y/m/d format.
    You can do it by converting for example "2018" to "2018-12-31" (considering end of the year as year (or whatever you want).
    So the codes below will do it:

    library(xts)
    
    data_new <- data #creating a new dataset to preserve original  
    class(data_new$Year)  #now class of year is integer
    
    ## [1] "Integer"
    
    
    #formatting integer year to a Date format
    data_new$Year <- as.Date(paste(data_new$Year, 12, 31, sep = "-")) 
    
    class(data_new$Year) #check changed format of year
    
    ## [1] "Date"
    
    #creating xts object
    data_xts <- as.xts(data_new, data_new[,-c("Year")], order.by = data_new$Year)
    
    head(data_new)
    
    ##         Year ReporterName PartnerName TradeValue in 1000 USD year_group
    ## 1 2015-12-31       Angola       China               14320566  2015-2018
    ## 2 2016-12-31       Angola       China               13923092  2015-2018
    ## 3 2017-12-31       Angola       China               19487067  2015-2018
    ## 4 2018-12-31       Angola       China               24517058  2015-2018
    ## 5 2011-12-31       Angola       China               24360793  2011-2014
    ## 6 2012-12-31       Angola       China               33710030  2011-2014
    ##   total_average_period_in_1000USD total_average_period_byPartner_in_1000USD
    ## 1                        251327.4                                  18061946
    ## 2                        251327.4                                  18061946
    ## 3                        251327.4                                  18061946
    ## 4                        251327.4                                  18061946
    ## 5                       1938789.4                                  29386292
    ## 6                       1938789.4                                  29386292
    ##   percentage_of_group
    ## 1            71.86620
    ## 2            71.86620
    ## 3            71.86620
    ## 4            71.86620
    ## 5            15.15703
    ## 6            15.15703
    
    
    # check periodicity
    periodicity(data_xts$Year)
    
    ## 0 seconds periodicity from 2007-12-31 to 2018-12-31