Search code examples
rdatepredictspline

Problems with predict() using new data


My problem goes as follows. I have the following dataset.

This is Ibex (Spanish stockmarket) ranging from 2020-01-01 to 2022-05-01

tail(ibex, 3)
Date Open High Low Close Adj.Close Volume
2022-04-01 8482 8878 8345 8584 8584 3824235500
2022-05-01 8517 8651 8139 8139 8139 1454670100
2022-05-10 8219 8259 8139 8139 8139 204415904

dim(ibex) [1] 270 7

Step 1. I select Date and Open
ibex$Date <- as.Date(ibex$Date) ibex <- as_tibble(ibex) ibex.1 <- ibex[,c(1,2)]

tail(ibex.1, 3)

Date Open
2022-04-01 8482
2022-05-01 8517
2022-05-10 8219

Step 2. I create a ts format

ibex.1 <- ts(data = ibex.1, start = c(2000,1), end = c(2022,5), frequency = 12)

str(ibex.1)

Time-Series [1:269, 1:2] from 2000 to 2022: 10957 10988 11017 11048 11078 ...

  • attr(*, "dimnames")=List of 2 ..$ : NULL ..$ : chr [1:2] "Date" "Open"

tail(ibex.1, 3)

Date Open
Mar 2022 19052 8462.1
Apr 2022 19083 8481.7
May 2022 19113 8516.6

Step 3. I produce a spline model using cross validation
ibex.spl <- smooth.spline(ibex.1[,1], ibex.1[,2], cv=TRUE)

Step 4. I want to predict new Open values for June, July, August 2022, i.e., values which are not in the dataset. I do this:

nuevafecha <- seq(as.Date("2022-06-01"),as.Date("2022-08-01"),by="1 month") nuevafecha <- as_tibble(nuevafecha)`

head(nuevafecha)

value
2022-06-01
2022-07-01
2022-08-01

Step 5. And finally this:

ibex11 <- ibex.1 %>% add_row(Date=nuevafecha$value)

l<- predict(ibex.spl, ibex.1=ibex11[270:273], se=TRUE)

tail(l$fit)

264 265 266 267 268 269
8354.129 8371.194 8391.543 8412.822 8439.675 8469.041

In other words: values 270:273, corresponding to June, July, August 2022 are not shown.

I just obtain predictions for the original values.

How can I get predictions for 270,271,273?


Solution

  • Make sure that your x value is numeric:

    # Fit model
    dat$Date2 <- as.numeric(as.Date(dat$Date, format = "%Y-%m-%d"))
    mymod <- smooth.spline(x = dat$Date2, y = dat$Open, cv = TRUE)
    
    # Predict
    class(mymod)
    #> [1] "smooth.spline"
    x_new <- as.numeric(seq(as.Date("2022-06-01"), as.Date("2022-08-01"), by = "day"))
    mypreds <- predict(mymod, x = x_new)
    
    # Visualize results
    plot(mymod, xlim = c(min(dat$Date2), max(x_new)),
         ylim = c(min(dat$Open), max(mypreds$y)),
         xlab = "date", ylab = "open value")
    points(x = dat$Date2, y = dat$Open, col = "red")
    points(mypreds$x, y = mypreds$y, col = "green")
    legend("topleft",
           legend = c("fitted", "actual", "predicted"),
           pch = c(1, 1, 1),
           col = c("black", "red", "green"), inset = 0.05)
    
    Created on 2022-06-15 by the reprex package (v2.0.1)
    

    enter image description here

    data:

    structure(list(Date = c("2020-01-02", "2020-01-03", "2020-01-06", 
    "2020-01-07", "2020-01-08", "2020-01-09", "2020-01-10", "2020-01-13", 
    "2020-01-14", "2020-01-15", "2020-01-16", "2020-01-17", "2020-01-20", 
    "2020-01-21", "2020-01-22", "2020-01-23", "2020-01-24", "2020-01-27", 
    "2020-01-28", "2020-01-29", "2020-01-30", "2020-01-31", "2020-02-03", 
    "2020-02-04", "2020-02-05", "2020-02-06", "2020-02-07", "2020-02-10", 
    "2020-02-11", "2020-02-12", "2020-02-13", "2020-02-14", "2020-02-17", 
    "2020-02-18", "2020-02-19", "2020-02-20", "2020-02-21", "2020-02-24", 
    "2020-02-25", "2020-02-26", "2020-02-27", "2020-02-28", "2020-03-02", 
    "2020-03-03", "2020-03-04", "2020-03-05", "2020-03-06", "2020-03-09", 
    "2020-03-10", "2020-03-11", "2020-03-12", "2020-03-13", "2020-03-16", 
    "2020-03-17", "2020-03-18", "2020-03-19", "2020-03-20", "2020-03-23", 
    "2020-03-24", "2020-03-25", "2020-03-26", "2020-03-27", "2020-03-30", 
    "2020-03-31", "2020-04-01", "2020-04-02", "2020-04-03", "2020-04-06", 
    "2020-04-07", "2020-04-08", "2020-04-09", "2020-04-14", "2020-04-15", 
    "2020-04-16", "2020-04-17", "2020-04-20", "2020-04-21", "2020-04-22", 
    "2020-04-23", "2020-04-24", "2020-04-27", "2020-04-28", "2020-04-29", 
    "2020-04-30"), Open = c(9639.099609, 9631.200195, 9585.400391, 
    9623.099609, 9535.099609, 9629.200195, 9611.299805, 9586.599609, 
    9548.099609, 9521.200195, 9516.700195, 9616.099609, 9676.599609, 
    9593.400391, 9622, 9549.400391, 9576.799805, 9447.099609, 9401.799805, 
    9532.700195, 9460.799805, 9519.299805, 9404.400391, 9465.900391, 
    9553.400391, 9767.099609, 9789.299805, 9790.599609, 9863.400391, 
    9897.900391, 9893.900391, 9910.900391, 9979.200195, 9977.400391, 
    10042, 10048.700195, 9893, 9649.799805, 9507, 9213, 9182.200195, 
    8748.099609, 8910.200195, 8860.200195, 8827.799805, 8960.799805, 
    8532.599609, 7884, 7815.600098, 7589.299805, 7040.799805, 6763.700195, 
    6331, 6444.5, 6370.5, 6368.200195, 6645.399902, 6223.700195, 
    6433.299805, 6985.899902, 6789, 6916.700195, 6748.600098, 6746.799805, 
    6627.299805, 6633.700195, 6563.600098, 6807.899902, 6986.899902, 
    6930.200195, 7049.299805, 7209.700195, 7065.600098, 6930.799805, 
    6942.5, 6932.5, 6741.299805, 6685.100098, 6758.700195, 6641.299805, 
    6780.899902, 6724.600098, 6829.399902, 7084.799805), High = c(9705.400391, 
    9650.700195, 9618.200195, 9657.900391, 9604.299805, 9644.799805, 
    9623.599609, 9586.599609, 9548.400391, 9530.200195, 9579.5, 9709.900391, 
    9680.900391, 9620.700195, 9632.799805, 9604.700195, 9639.900391, 
    9482.799805, 9488, 9574.299805, 9528.400391, 9545.099609, 9431.599609, 
    9577.299805, 9721.900391, 9816.200195, 9816.200195, 9820.400391, 
    9884, 9946.599609, 9910.099609, 9969.700195, 10022.200195, 10041.5, 
    10100.200195, 10050.299805, 9946.400391, 9676.799805, 9518.900391, 
    9362.799805, 9204.400391, 8818.599609, 8913.5, 9014.299805, 8963.700195, 
    8962.5, 8542, 8022.899902, 8007.700195, 7717.100098, 7077.700195, 
    7140.5, 6362.5, 6557.299805, 6524.200195, 6506.399902, 6769.899902, 
    6472, 6717.299805, 7058.100098, 7033.200195, 6936.700195, 6789.100098, 
    6802, 6687.799805, 6658.899902, 6632.5, 6874.899902, 7119.100098, 
    6952.5, 7116.799805, 7209.700195, 7086.799805, 6950.899902, 7000.299805, 
    6933.299805, 6760.899902, 6729.600098, 6797.600098, 6710.799805, 
    6790.100098, 6857.899902, 7055.700195, 7128.399902), Low = c(9615.099609, 
    9581.200195, 9492.700195, 9557.900391, 9520.299805, 9573.799805, 
    9557.900391, 9507.400391, 9466, 9475, 9481.599609, 9611.299805, 
    9623.099609, 9550, 9557, 9499.299805, 9552.5, 9357.799805, 9363.799805, 
    9510.700195, 9439.299805, 9365.099609, 9361.5, 9462.400391, 9540.200195, 
    9757, 9761.799805, 9773.5, 9817.799805, 9888.599609, 9794.299805, 
    9905.200195, 9955.200195, 9967.799805, 10020.700195, 9931, 9843.5, 
    9459.599609, 9248, 9030.700195, 8877.400391, 8582.700195, 8541.099609, 
    8776.400391, 8745.900391, 8639.900391, 8310.400391, 7621.399902, 
    7440.600098, 7364.600098, 6347, 6468.299805, 5814.5, 6083, 6174.100098, 
    6228.299805, 6371.100098, 6148, 6403.600098, 6625.799805, 6759.100098, 
    6661.899902, 6507, 6624.700195, 6565.200195, 6424.5, 6509.399902, 
    6728.600098, 6903.600098, 6860.600098, 6918.700195, 7080.299805, 
    6816.399902, 6733.100098, 6845, 6715, 6634.899902, 6654.200195, 
    6713.600098, 6578.100098, 6667.299805, 6707.899902, 6816.799805, 
    6918.299805), Close = c(9691.200195, 9646.599609, 9600.900391, 
    9579.799805, 9591.400391, 9581.799805, 9573.599609, 9543.900391, 
    9528.299805, 9511.700195, 9572.5, 9681.299805, 9658.799805, 9611.299805, 
    9573.700195, 9518.5, 9562, 9366.299805, 9484.200195, 9546.700195, 
    9477.900391, 9367.900391, 9404.700195, 9562.900391, 9717.799805, 
    9811.299805, 9811, 9816, 9882.599609, 9940.400391, 9909.799805, 
    9956.799805, 10022.200195, 10005.799805, 10083.599609, 9931, 
    9886.200195, 9483.5, 9250.799805, 9316.799805, 8985.900391, 8723.200195, 
    8741.5, 8811.599609, 8910, 8683, 8375.599609, 7708.700195, 7461.5, 
    7436.399902, 6390.899902, 6629.600098, 6107.200195, 6498.5, 6274.799805, 
    6395.799805, 6443.299805, 6230.200195, 6717.299805, 6942.399902, 
    7033.200195, 6777.899902, 6659.899902, 6785.399902, 6579.399902, 
    6574.100098, 6581.600098, 6844.299805, 7002, 6951.799805, 7070.600098, 
    7108.600098, 6839.5, 6763.399902, 6875.799805, 6831.5, 6634.899902, 
    6719.799805, 6746.5, 6613.899902, 6731.799805, 6836.399902, 7055.700195, 
    6922.299805), Adj.Close = c(9691.200195, 9646.599609, 9600.900391, 
    9579.799805, 9591.400391, 9581.799805, 9573.599609, 9543.900391, 
    9528.299805, 9511.700195, 9572.5, 9681.299805, 9658.799805, 9611.299805, 
    9573.700195, 9518.5, 9562, 9366.299805, 9484.200195, 9546.700195, 
    9477.900391, 9367.900391, 9404.700195, 9562.900391, 9717.799805, 
    9811.299805, 9811, 9816, 9882.599609, 9940.400391, 9909.799805, 
    9956.799805, 10022.200195, 10005.799805, 10083.599609, 9931, 
    9886.200195, 9483.5, 9250.799805, 9316.799805, 8985.900391, 8723.200195, 
    8741.5, 8811.599609, 8910, 8683, 8375.599609, 7708.700195, 7461.5, 
    7436.399902, 6390.899902, 6629.600098, 6107.200195, 6498.5, 6274.799805, 
    6395.799805, 6443.299805, 6230.200195, 6717.299805, 6942.399902, 
    7033.200195, 6777.899902, 6659.899902, 6785.399902, 6579.399902, 
    6574.100098, 6581.600098, 6844.299805, 7002, 6951.799805, 7070.600098, 
    7108.600098, 6839.5, 6763.399902, 6875.799805, 6831.5, 6634.899902, 
    6719.799805, 6746.5, 6613.899902, 6731.799805, 6836.399902, 7055.700195, 
    6922.299805), Volume = c(142379600L, 135130000L, 103520400L, 
    133476100L, 133957600L, 151793500L, 132894500L, 124149000L, 143774600L, 
    161995000L, 133841500L, 159823200L, 99959200L, 126448800L, 134567800L, 
    169141000L, 154731100L, 164515500L, 154919400L, 187145800L, 164094200L, 
    303553700L, 168665700L, 174133900L, 221922900L, 248416700L, 179586900L, 
    132132500L, 167628300L, 199830200L, 161753500L, 127061800L, 105784200L, 
    184042000L, 151866100L, 178849000L, 194954500L, 256063800L, 263346200L, 
    299256300L, 341792600L, 497983900L, 355485000L, 351219900L, 321258300L, 
    342803500L, 345901100L, 598769200L, 506034000L, 369150100L, 723609100L, 
    473165800L, 583614700L, 388451800L, 398609500L, 346225500L, 486796200L, 
    282033200L, 301211900L, 322631900L, 248806600L, 225316500L, 232005200L, 
    258181200L, 235113700L, 233309800L, 215387500L, 225965400L, 260857500L, 
    155910100L, 240348600L, 259986200L, 289924500L, 237962600L, 251272600L, 
    186644500L, 200783000L, 177928500L, 163723500L, 182925300L, 147263100L, 
    233782400L, 247762400L, 314979200L)), class = "data.frame", row.names = c(NA, 
    -84L))