Search code examples
rmatrixforecastingtraining-data

How to fill a matrix that receives vectors of different sizes in R?


I'm trying to fill a matrix with vectors of different sizes. In fact, my goal here is to generate multiple subsets of data to perform a process of retraining a time series for a forecasting model.

To operationalize this, I tried the following code in R:

ST_Nile <- Nile
n <- length(ST_Nile)
k <- 5
qty_modelos <- 6
m <- c(rep(NA,qty_modelos))
t <- c(rep(NA,qty_modelos))
TrainNile <- c(rep(NA,95))
Matriz_train <- matrix(ncol = 95, nrow = qty_modelos, byrow = TRUE)

for (i in 1:qty_modelos) {
  if (i==1) {
    m[i] <- 30
  } else { m[i] <- m[i-1] - k}
  t[i] <- n - m[i]
  TrainNile[i] <- ST_Nile[1:t[i]]
  if (length(TrainNile[i])==95) {
    Matriz_train <- append(Matriz_train[i,],TrainNile[i])
  } else{
    TrainNile[i] = c(TrainNile[i], rep(NA, 95 - length(TrainNile[i])))
    Matriz_train <- append(Matriz_train[i,],TrainNile[i])
  }
}

I expected fill the vectors with size different from the largest one with 'NA', but I get the error "number of items to replace is not a multiple of replacement length". Any ideas on how to fill this matrix?

Output of dput(Nile):

structure(c(1120, 1160, 963, 1210, 1160, 1160, 813, 1230, 1370, 1140, 995, 935, 1110, 994, 1020, 960, 1180, 799, 958, 1140, 1100, 1210, 1150, 1250, 1260, 1220, 1030, 1100, 774, 840, 874, 694, 940, 833, 701, 916, 692, 1020, 1050, 969, 831, 726, 456, 824, 702, 1120, 1100, 832, 764, 821, 768, 845, 864, 862, 698, 845, 744, 796, 1040, 759, 781, 865, 845, 944, 984, 897, 822, 1010, 771, 676, 649, 846, 812, 742, 801, 1040, 860, 874, 848, 890, 744, 749, 838, 1050, 918, 986, 797, 923, 975, 815, 1020, 906, 901, 1170, 912, 746, 919, 718, 714, 740), .Tsp = c(1871, 1970, 1), class = "ts")

Solution

  • There's a good chance I interpreted this somewhat differently, but it should still illustrate the main point: vectors can be set to a length and right-padded with NAs with lenght(x) <- y

    To build a 6x95 matrix of varying-length vector subsets where vectors are padded with NA values to match the length of the longest vector, something like this should be enough:

    Nile <- structure(c(1120, 1160, 963, 1210, 1160, 1160, 813, 1230, 1370, 1140, 995, 935, 1110, 994, 1020, 960, 1180, 799, 958, 1140, 1100, 1210, 1150, 1250, 1260, 1220, 1030, 1100, 774, 840, 874, 694, 940, 833, 701, 916, 692, 1020, 1050, 969, 831, 726, 456, 824, 702, 1120, 1100, 832, 764, 821, 768, 845, 864, 862, 698, 845, 744, 796, 1040, 759, 781, 865, 845, 944, 984, 897, 822, 1010, 771, 676, 649, 846, 812, 742, 801, 1040, 860, 874, 848, 890, 744, 749, 838, 1050, 918, 986, 797, 923, 975, 815, 1020, 906, 901, 1170, 912, 746, 919, 718, 714, 740), .Tsp = c(1871, 1970, 1), class = "ts")
    
    ST_Nile <- Nile
    (n <- length(ST_Nile))
    #> [1] 100
    
    # sequence of end indeces for subsets
    (t <- n - seq(from = 30, length.out = 6, by = -5))
    #> [1] 70 75 80 85 90 95
    
    # varying-length subsets to a list
    train_lst <- lapply(t, \(x) ST_Nile[1:x])
    str(train_lst)
    #> List of 6
    #>  $ : num [1:70] 1120 1160 963 1210 1160 1160 813 1230 1370 1140 ...
    #>  $ : num [1:75] 1120 1160 963 1210 1160 1160 813 1230 1370 1140 ...
    #>  $ : num [1:80] 1120 1160 963 1210 1160 1160 813 1230 1370 1140 ...
    #>  $ : num [1:85] 1120 1160 963 1210 1160 1160 813 1230 1370 1140 ...
    #>  $ : num [1:90] 1120 1160 963 1210 1160 1160 813 1230 1370 1140 ...
    #>  $ : num [1:95] 1120 1160 963 1210 1160 1160 813 1230 1370 1140 ...
    
    # length of the longest vector in the list
    (max_len <- lengths(train_lst) |> max())
    #> [1] 95
    
    # apply length(x)<-max_len on all vectors in the list
    train_lst_padded <- lapply(train_lst, `length<-`, max_len) 
    # updated lengths:
    str(train_lst_padded)
    #> List of 6
    #>  $ : num [1:95] 1120 1160 963 1210 1160 1160 813 1230 1370 1140 ...
    #>  $ : num [1:95] 1120 1160 963 1210 1160 1160 813 1230 1370 1140 ...
    #>  $ : num [1:95] 1120 1160 963 1210 1160 1160 813 1230 1370 1140 ...
    #>  $ : num [1:95] 1120 1160 963 1210 1160 1160 813 1230 1370 1140 ...
    #>  $ : num [1:95] 1120 1160 963 1210 1160 1160 813 1230 1370 1140 ...
    #>  $ : num [1:95] 1120 1160 963 1210 1160 1160 813 1230 1370 1140 ...
    # first vector, padded with NA values:
    train_lst_padded[[1]]
    #>  [1] 1120 1160  963 1210 1160 1160  813 1230 1370 1140  995  935 1110  994 1020
    #> [16]  960 1180  799  958 1140 1100 1210 1150 1250 1260 1220 1030 1100  774  840
    #> [31]  874  694  940  833  701  916  692 1020 1050  969  831  726  456  824  702
    #> [46] 1120 1100  832  764  821  768  845  864  862  698  845  744  796 1040  759
    #> [61]  781  865  845  944  984  897  822 1010  771  676   NA   NA   NA   NA   NA
    #> [76]   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
    #> [91]   NA   NA   NA   NA   NA
    
    # bind all rows, return 6x95 matrix
    (Matriz_train <- do.call(rbind, train_lst_padded))
    #>      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
    #> [1,] 1120 1160  963 1210 1160 1160  813 1230 1370  1140   995   935  1110   994
    #> [2,] 1120 1160  963 1210 1160 1160  813 1230 1370  1140   995   935  1110   994
    #> [3,] 1120 1160  963 1210 1160 1160  813 1230 1370  1140   995   935  1110   994
    #> [4,] 1120 1160  963 1210 1160 1160  813 1230 1370  1140   995   935  1110   994
    #> [5,] 1120 1160  963 1210 1160 1160  813 1230 1370  1140   995   935  1110   994
    #> [6,] 1120 1160  963 1210 1160 1160  813 1230 1370  1140   995   935  1110   994
    #>      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
    #> [1,]  1020   960  1180   799   958  1140  1100  1210  1150  1250  1260  1220
    #> [2,]  1020   960  1180   799   958  1140  1100  1210  1150  1250  1260  1220
    #> [3,]  1020   960  1180   799   958  1140  1100  1210  1150  1250  1260  1220
    #> [4,]  1020   960  1180   799   958  1140  1100  1210  1150  1250  1260  1220
    #> [5,]  1020   960  1180   799   958  1140  1100  1210  1150  1250  1260  1220
    #> [6,]  1020   960  1180   799   958  1140  1100  1210  1150  1250  1260  1220
    #>      [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
    #> [1,]  1030  1100   774   840   874   694   940   833   701   916   692  1020
    #> [2,]  1030  1100   774   840   874   694   940   833   701   916   692  1020
    #> [3,]  1030  1100   774   840   874   694   940   833   701   916   692  1020
    #> [4,]  1030  1100   774   840   874   694   940   833   701   916   692  1020
    #> [5,]  1030  1100   774   840   874   694   940   833   701   916   692  1020
    #> [6,]  1030  1100   774   840   874   694   940   833   701   916   692  1020
    #>      [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
    #> [1,]  1050   969   831   726   456   824   702  1120  1100   832   764   821
    #> [2,]  1050   969   831   726   456   824   702  1120  1100   832   764   821
    #> [3,]  1050   969   831   726   456   824   702  1120  1100   832   764   821
    #> [4,]  1050   969   831   726   456   824   702  1120  1100   832   764   821
    #> [5,]  1050   969   831   726   456   824   702  1120  1100   832   764   821
    #> [6,]  1050   969   831   726   456   824   702  1120  1100   832   764   821
    #>      [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62]
    #> [1,]   768   845   864   862   698   845   744   796  1040   759   781   865
    #> [2,]   768   845   864   862   698   845   744   796  1040   759   781   865
    #> [3,]   768   845   864   862   698   845   744   796  1040   759   781   865
    #> [4,]   768   845   864   862   698   845   744   796  1040   759   781   865
    #> [5,]   768   845   864   862   698   845   744   796  1040   759   781   865
    #> [6,]   768   845   864   862   698   845   744   796  1040   759   781   865
    #>      [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74]
    #> [1,]   845   944   984   897   822  1010   771   676    NA    NA    NA    NA
    #> [2,]   845   944   984   897   822  1010   771   676   649   846   812   742
    #> [3,]   845   944   984   897   822  1010   771   676   649   846   812   742
    #> [4,]   845   944   984   897   822  1010   771   676   649   846   812   742
    #> [5,]   845   944   984   897   822  1010   771   676   649   846   812   742
    #> [6,]   845   944   984   897   822  1010   771   676   649   846   812   742
    #>      [,75] [,76] [,77] [,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86]
    #> [1,]    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
    #> [2,]   801    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
    #> [3,]   801  1040   860   874   848   890    NA    NA    NA    NA    NA    NA
    #> [4,]   801  1040   860   874   848   890   744   749   838  1050   918    NA
    #> [5,]   801  1040   860   874   848   890   744   749   838  1050   918   986
    #> [6,]   801  1040   860   874   848   890   744   749   838  1050   918   986
    #>      [,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95]
    #> [1,]    NA    NA    NA    NA    NA    NA    NA    NA    NA
    #> [2,]    NA    NA    NA    NA    NA    NA    NA    NA    NA
    #> [3,]    NA    NA    NA    NA    NA    NA    NA    NA    NA
    #> [4,]    NA    NA    NA    NA    NA    NA    NA    NA    NA
    #> [5,]   797   923   975   815    NA    NA    NA    NA    NA
    #> [6,]   797   923   975   815  1020   906   901  1170   912
    

    Created on 2023-11-21 with reprex v2.0.2