Search code examples
rtime-seriesforecasting

Error in `[<-.ts`(`*tmp*`, ri, value = c(135.945603813953, Only element replacements are allowed when try forecast in R


Here dput of my train sample

dput(head(train,10))
train=structure(list(d_event_date = structure(c(19702, 19702, 19702, 
19702, 19702, 19702, 19702, 19702, 19702, 19702), class = "Date"), 
    id_placement = c(20199549L, 20396189L, 20420889L, 16751495L, 
    17220762L, 15387714L, 20355507L, 20385438L, 17752382L, 21253771L
    ), id_zone = c(1632853L, 2836041L, 2919363L, 1709628L, 9937L, 
    1274157L, 1400727L, 2907891L, 1948324L, 1863050L), id_publisher = c(302501L, 
    480683L, 1204726L, 4468L, 5515L, 163955L, 176741L, 421345L, 
    441862L, 467632L), id_advertiser = c(914L, 914L, 914L, 914L, 
    914L, 914L, 914L, 914L, 914L, 914L), id_campaign = c(687314L, 
    687314L, 687314L, 723900L, 723900L, 723900L, 723900L, 723900L, 
    723900L, 723900L), id_banner = c(2032222L, 2032222L, 2032222L, 
    2136674L, 2136674L, 2136674L, 2136674L, 2136674L, 2136674L, 
    2136674L), id_landing = c(4772102L, 4772102L, 4699256L, 4821730L, 
    4821514L, 4821513L, 4821514L, 4823676L, 4821514L, 4821730L
    ), id_ad_unit = c(28L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 
    28L, 28L), n_landing_pricing_type = c(3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L), n_impression_qd = c(1L, 8L, 1L, 1L, 
    25L, 4L, 35L, 1L, 5L, 1L), n_click_qd = structure(c(0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0), class = "integer64"), n_conversion_qd = c(1L, 
    1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L), n_gross = c(0.0024, 
    0.0024, 0.0024, 0.008, 0.032499998, 0.0054, 0.064999996, 
    0.0076, 0.0129, 0.0239), n_net = c(0.001632, 0.00036, 0.001632, 
    0.0052, 0, 0.0027, 0.05135, 0.0038, 0.00774, 0.016252)), row.names = c(NA, 
10L), class = "data.frame")

i create time series model using Arima. Here my code

# Function for data normalization
normalize_data <- function(data, method) {
   normalized_data <- NULL
   if (method == "none") {
     normalized_data <- data
   } else if (method == "min-max") {
     min_val <- min(data)
     max_val <- max(data)
     normalized_data <- (data - min_val) / (max_val - min_val)
   } else if (method == "z-score") {
     mean_val <- mean(data)
     std_val <- sd(data)
     normalized_data <- (data - mean_val) / std_val
   } else if (method == "cubic-root") {
     normalized_data <- sign(data) * abs(data)^(1/3)
   } else if (method == "logarithmic") {
     normalized_data <- log(1 + data)
   }
   return(normalized_data)
}

# Function for calculating MAPE
calculate_mape <- function(actual, forecasted) {
   mape <- mean(abs((actual - forecasted) / actual)) * 100
   return(map)
}

# Normalization and model building for each variable
variables <- c("n_impression_qd", "n_conversion_qd", "n_gross")
normalization_methods <- c("none", "min-max", "z-score", "cubic-root", "logarithmic")
mape_results <- data.frame(variable = character(), normalization = character(), mape = numeric(), stringsAsFactors = FALSE)

for (variable in variables) {
   for (method in normalization_methods) {
     normalized_data <- normalize_data(data_filtered[[variable]], method)
    
     # Convert to time series object
     ts_data <- ts(normalized_data)
    
     # Building an ARIMA model
     model <- auto.arima(ts_data)
    
     #Forecasting
     forecast <- forecast(model, h = 10)$mean
    
     # MAPE calculation
     actual <- tail(data_filtered[[variable]], 10)
     mape <- calculate_mape(actual, forecast)
    
     # Saving results
     result <- data.frame(variable = variable, normalization = method, mape = mape)
     mape_results <- rbind(mape_results, result)
   }
}

# Output results
print(map_results)

it works, but when i try apply model on my test sample for example like that

# Convert 'd_event_date' column to date format
test_data$d_event_date <- as.Date(test_data$d_event_date)

# Function to denormalize data
denormalize_data <- function(data, method, min_val, max_val, mean_val, std_val) {
   denormalized_data <- NULL
   if (method == "none") {
     denormalized_data <- data
   } else if (method == "min-max") {
     denormalized_data <- data * (max_val - min_val) + min_val
   } else if (method == "z-score") {
     denormalized_data <- data * std_val + mean_val
   } else if (method == "cubic-root") {
     denormalized_data <- sign(data) * abs(data)^3
   } else if (method == "logarithmic") {
     denormalized_data <- exp(data) - 1
   }
   return(denormalized_data)
}

# Variables and normalization methods
variables <- c("n_impression_qd", "n_conversion_qd", "n_gross")
normalization_methods <- c("none", "min-max", "z-score", "cubic-root", "logarithmic")

# Result storage
prediction_results <- data.frame(variable = character(), actual = numeric(), predicted = numeric(), stringsAsFactors = FALSE)

# Predict and calculate MAPE for each variable and normalization method
for (variable in variables) {
   for (method in normalization_methods) {
     # Retrieving data and normalization parameters for a variable
     data <- test_data[[variable]]
     min_val <- min(data)
     max_val <- max(data)
     mean_val <- mean(data)
     std_val <- sd(data)
    
     # Data normalization
     normalized_data <- normalize_data(data, method)
    
     # Convert to time series object
     ts_data <- ts(normalized_data)
    
     #Forecasting
     forecast <- forecast(model, h = length(ts_data))$mean
    
     # Denormalization of predicted values
     denormalized_forecast <- denormalize_data(forecast, method, min_val, max_val, mean_val, std_val)
    
     # MAPE calculation
     actual <- data
     mape <- calculate_mape(actual, denormalized_forecast)
    
     # Saving results
     result <- data.frame(variable = variable, actual = actual, predicted = denormalized_forecast)
     prediction_results <- rbind(prediction_results, result)
   }
}

i get the error
Error in `[<-.ts`(`*tmp*`, ri, value = c(135.945603813953, 177.486351819609, :
   Only element replacements are allowed

example test sample

 dput(test_data)
    structure(list(d_event_date = structure(c(19702L, 19702L, 19702L, 
    19702L, 19702L, 19702L, 19702L, 19702L, 19702L, 19702L), class = c("IDate", 
    "Date")), id_placement = c(20794491L, 20794491L, 17788422L, 20654348L, 
    21496171L, 18582636L, 19840341L, 17218292L, 18423615L, 10973945L
    ), id_zone = c(2859002L, 2859002L, 1781319L, 1733870L, 2096492L, 
    1933910L, 2717419L, 1850302L, 2245073L, 46564L), id_publisher = c(352465L, 
    352465L, 143258L, 304470L, 627080L, 59862L, 1052120L, 425742L, 
    38L, 34624L), id_advertiser = c(914L, 914L, 914L, 914L, 914L, 
    2834L, 2834L, 8579L, 15862L, 15862L), id_campaign = c(689656L, 
    689656L, 723900L, 723900L, 723900L, 873762L, 882336L, 360584L, 
    872210L, 872213L), id_banner = c(2038567L, 2038567L, 2136674L, 
    2136674L, 2136674L, 2546849L, 2568794L, 1181453L, 2543069L, 2543072L
    ), id_landing = c(3814050L, 4590592L, 4823675L, 4821514L, 4823676L, 
    4601283L, 4811884L, 4801168L, 4595695L, 4595698L), id_ad_unit = c(5L, 
    5L, 28L, 28L, 28L, 27L, 29L, 28L, 28L, 28L), n_landing_pricing_type = c(3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L), n_impression_qd = c(13L, 
    11L, 2L, 1L, 8L, 5L, 349L, 2L, 132L, 7L), n_click_qd = structure(c(0, 
    0, 0, 0, 0, 0, 1.15611361126852e-321, 0, 0, 0), class = "integer64"), 
        n_conversion_qd = c(1L, 2L, 1L, 1L, 3L, 2L, 5L, 1L, 3L, 1L
        ), n_gross = c(0.003, 0.0065, 0.0124, 0.0167, 0.090299998, 
        0.00054504, 0.03256, 0.0263, 0.08052, 0.00404), n_net = c(9e-04, 
        0.00195, 0.00868, 0.012024, 0.0686280016, 0.0003706272, 0.0221408, 
        0.017884, 0.032208, 0.00202)), row.names = c(NA, 10L), class = "data.frame")

why this error occured, and how can i fix it? Thank you for your help. After the forecast, I denormalize the data back to absolute values. The desired result is for each variable in the test sample the value that was and the one that was predicted for each model built using one or another normalization method

structure(list(n_impression_qd_actual = 12:16, n_impression_qd_pred = 12:16, 
    model.normalize_n_impression_qd = c("none", "min-max", "z-score", 
    "cubic-root", "logarithmic"), n_conversion_qd_actual = 12:16, 
    n_conversion_qd_pred = 12:16, model.normalize_n_n_conversion_qd = c("none", 
    "min-max", "z-score", "cubic-root", "logarithmic"), n_gross_actual = 12:16, 
    n_gross_pred = 12:16, n_gross_model.normalize = c("none", 
    "min-max", "z-score", "cubic-root", "logarithmic")), class = "data.frame", row.names = c(NA, 
-5L))

Solution

  • This could be a comment but I have a suggestion to make on the normalization/denormalization functions and will post them as an answer.

    The question

    The solution to the problem is to coerce the "ts" object to numeric before rbinding with the result.

    for (variable in variables) {
      for (method in normalization_methods) {
        # [rest of code omitted]
        # Saving results
        result <- data.frame(variable = variable, actual = actual, predicted = denormalized_forecast)
    
        # include this line
        result$predicted <- as.numeric(result$predicted)
        #
        prediction_results <- rbind(prediction_results, result)
      }
    }
    

    Data normalization

    To normalize the data raises no problems unless it is to be denormalized later in which case the denormalization code can be made simpler. If the normalization function saves the method and parameters as returned object attributes then those attributes can be used later with no need to recompute them and to have so many denormalization function arguments.

    # New function for data normalization
    normalize_data <- function(data, method) {
      normalized_data <- NULL
      params <- NULL
      if (method == "none") {
        normalized_data <- data
      } else if (method == "min-max") {
        min_val <- min(data)
        max_val <- max(data)
        normalized_data <- (data - min_val) / (max_val - min_val)
        params <- c(min = min_val, max = max_val)
      } else if (method == "z-score") {
        mean_val <- mean(data)
        std_val <- sd(data)
        normalized_data <- (data - mean_val) / std_val
        params <- c(mean = mean_val, sd = std_val)
      } else if (method == "cubic-root") {
        normalized_data <- sign(data) * abs(data)^(1/3)
      } else if (method == "logarithmic") {
        normalized_data <- log1p(data)
      }
      attr(normalized_data, "method") <- method
      if(!is.null(params))
        attr(normalized_data, "params") <- params
      normalized_data
    }
    
    # New denormalization function
    denormalize_data <- function(data, attrib) {
      method <- attrib$method
      denormalized_data <- NULL
      if (method == "none") {
        denormalized_data <- data
      } else if (method == "min-max") {
        min_val <- attrib$params["min"]
        max_val <- attrib$params["max"]
        denormalized_data <- data * (max_val - min_val) + min_val
      } else if (method == "z-score") {
        mean_val <- attrib$params["mean"]
        std_val <- attrib$params["sd"]
        denormalized_data <- data * std_val + mean_val
      } else if (method == "cubic-root") {
        denormalized_data <- sign(data) * abs(data)^3
      } else if (method == "logarithmic") {
        denormalized_data <- expm1(data)
      }
      denormalized_data
    }
    

    The call to denormalize the data is now

    denormalized_forecast <- denormalize_data(forecast, attributes(normalized_data))