Search code examples
rtidymodels

Error in package "DALEXtra": Can't convert from `data$sqft` <double> to `sqft` <integer> due to loss of precision


I am trying to create a Partial Dependence Plot for tidymodels using the R package DALEXtra, but an error occurs: Error in scream(): Can't convert from data$sqft to sqft due to loss of precision.

library(tidymodels) 
data(Sacramento, package = "modeldata") 
Sacramento <- Sacramento %>%
          mutate_if(is.character, as.factor)

set.seed(123)
data_split <- initial_split(Sacramento, prop = 0.75, strata = price)
Sac_train <- training(data_split)
Sac_test <- testing(data_split)

rf_mod <- rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% 
      set_engine("ranger", importance = "permutation", keep.inbag = TRUE) %>% 
      set_mode("regression")

Sac_recipe <- recipe(price ~ ., data = Sac_train) %>% 
          step_rm(zip, latitude, longitude) %>% 
          step_normalize(all_numeric_predictors()) %>%
          step_dummy(all_nominal_predictors())

rf_workflow <- workflow() %>% 
           add_model(rf_mod) %>% 
           add_recipe(Sac_recipe)

set.seed(123)
Sac_folds <- vfold_cv(Sac_train, v = 10, repeats = 2, strata = price)

set.seed(123)
rf_res <- rf_workflow %>% 
          tune_grid(grid = 3,
                    resamples = Sac_folds, 
                    control = control_grid(save_pred = TRUE),
                    metrics = metric_set(rmse))

rf_best <- rf_res %>%
       select_best(metric = "rmse")
last_wf <- rf_workflow %>% 
       finalize_workflow(rf_best)
last_fit <- last_wf %>%
        last_fit(data_split)
final_model <- extract_workflow(last_fit)

library(DALEXtra)
rf_explanier  <- explain_tidymodels(model = final_model, 
                                    data = select(Sac_train, -price), 
                                    y = Sac_train$price)
pdp_sqft <- model_profile(explainer = rf_explanier, variables = "sqft", 
                          N = NULL, groups = "type")

# Error
# Error in `scream()`:  Can't convert from `data$sqft` <double> to `sqft` <integer> due to loss of precision.

Solution

  • You problem appears to come from using out-of-date packages. Updating the tidymodels packages should fix your problem.

    library(tidymodels) 
    data(Sacramento, package = "modeldata") 
    Sacramento <- Sacramento %>%
      mutate_if(is.character, as.factor)
    
    set.seed(123)
    data_split <- initial_split(Sacramento, prop = 0.75, strata = price)
    Sac_train <- training(data_split)
    Sac_test <- testing(data_split)
    
    rf_mod <- rand_forest(mtry = tune(), min_n = tune(), trees = 1000) %>% 
      set_engine("ranger", importance = "permutation", keep.inbag = TRUE) %>% 
      set_mode("regression")
    
    Sac_recipe <- recipe(price ~ ., data = Sac_train) %>% 
      step_rm(zip, latitude, longitude) %>% 
      step_normalize(all_numeric_predictors()) %>%
      step_dummy(all_nominal_predictors())
    
    rf_workflow <- workflow() %>% 
      add_model(rf_mod) %>% 
      add_recipe(Sac_recipe)
    
    set.seed(123)
    Sac_folds <- vfold_cv(Sac_train, v = 10, repeats = 2, strata = price)
    
    set.seed(123)
    rf_res <- rf_workflow %>% 
      tune_grid(grid = 3,
                resamples = Sac_folds, 
                control = control_grid(save_pred = TRUE),
                metrics = metric_set(rmse))
    #> i Creating pre-processing data to finalize unknown parameter: mtry
    
    rf_best <- rf_res %>%
      select_best(metric = "rmse")
    last_wf <- rf_workflow %>% 
      finalize_workflow(rf_best)
    last_fit <- last_wf %>%
      last_fit(data_split)
    final_model <- extract_workflow(last_fit)
    
    library(DALEXtra)
    
    rf_explanier  <- explain_tidymodels(model = final_model, 
                                        data = select(Sac_train, -price), 
                                        y = Sac_train$price)
    #> Preparation of a new explainer is initiated
    #>   -> model label       :  workflow  (  default  )
    #>   -> data              :  698  rows  8  cols 
    #>   -> data              :  tibble converted into a data.frame 
    #>   -> target variable   :  698  values 
    #>   -> predict function  :  yhat.workflow  will be used (  default  )
    #>   -> predicted values  :  No value for predict function target column. (  default  )
    #>   -> model_info        :  package tidymodels , ver. 1.0.0 , task regression (  default  ) 
    #>   -> predicted values  :  numerical, min =  100683.5 , mean =  245393.1 , max =  692722.4  
    #>   -> residual function :  difference between y and yhat (  default  )
    #>   -> residuals         :  numerical, min =  -133229.2 , mean =  834.6177 , max =  300979.8  
    #>   A new explainer has been created!
    
    pdp_sqft <- model_profile(explainer = rf_explanier, variables = "sqft", 
                              N = NULL, groups = "type")
    #> Warning in FUN(X[[i]], ...): Variable: < sqft > has more than 201 unique
    #> values and all of them will be used as variable splits in calculating
    #> variable profiles. Use the `variable_splits` parameter to mannualy change this
    #> behaviour. If you believe this warning to be a false positive, raise issue at
    #> <https://github.com/ModelOriented/ingredients/issues>.
    
    pdp_sqft
    #> Top profiles    : 
    #>   _vname_        _label_ _x_ _groups_   _yhat_ _ids_
    #> 1    sqft workflow_Condo 484    Condo 146246.5     0
    #> 2    sqft workflow_Condo 539    Condo 148405.1     0
    #> 3    sqft workflow_Condo 610    Condo 121844.8     0
    #> 4    sqft workflow_Condo 611    Condo 121768.2     0
    #> 5    sqft workflow_Condo 623    Condo 122501.8     0
    #> 6    sqft workflow_Condo 625    Condo 123659.1     0
    
    sessioninfo::session_info()
    #> ─ Session info ───────────────────────────────────────────────────────────────
    #>  setting  value
    #>  version  R version 4.2.1 (2022-06-23)
    #>  os       macOS Monterey 12.6
    #>  system   aarch64, darwin20
    #>  ui       X11
    #>  language (EN)
    #>  collate  en_US.UTF-8
    #>  ctype    en_US.UTF-8
    #>  tz       America/Los_Angeles
    #>  date     2023-01-16
    #>  pandoc   2.19.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/ (via rmarkdown)
    #> 
    #> ─ Packages ───────────────────────────────────────────────────────────────────
    #>  package      * version    date (UTC) lib source
    #>  assertthat     0.2.1      2019-03-21 [2] CRAN (R 4.2.0)
    #>  backports      1.4.1      2021-12-13 [2] CRAN (R 4.2.0)
    #>  broom        * 1.0.2      2022-12-15 [1] CRAN (R 4.2.0)
    #>  class          7.3-20     2022-01-16 [2] CRAN (R 4.2.1)
    #>  cli            3.6.0      2023-01-09 [1] CRAN (R 4.2.1)
    #>  codetools      0.2-18     2020-11-04 [2] CRAN (R 4.2.1)
    #>  colorspace     2.0-3      2022-02-21 [1] CRAN (R 4.2.0)
    #>  DALEX        * 2.4.3      2023-01-15 [1] CRAN (R 4.2.0)
    #>  DALEXtra     * 2.2.1      2022-06-14 [1] CRAN (R 4.2.0)
    #>  DBI            1.1.3      2022-06-18 [1] CRAN (R 4.2.0)
    #>  dials        * 1.1.0      2022-11-04 [1] CRAN (R 4.2.1)
    #>  DiceDesign     1.9        2021-02-13 [1] CRAN (R 4.2.0)
    #>  digest         0.6.31     2022-12-11 [1] CRAN (R 4.2.0)
    #>  dplyr        * 1.0.10     2022-09-01 [1] CRAN (R 4.2.0)
    #>  ellipsis       0.3.2      2021-04-29 [1] CRAN (R 4.2.0)
    #>  evaluate       0.19       2022-12-13 [1] CRAN (R 4.2.0)
    #>  fansi          1.0.3      2022-03-24 [1] CRAN (R 4.2.0)
    #>  fastmap        1.1.0      2021-01-25 [2] CRAN (R 4.2.0)
    #>  foreach        1.5.2      2022-02-02 [1] CRAN (R 4.2.0)
    #>  fs             1.5.2      2021-12-08 [2] CRAN (R 4.2.0)
    #>  furrr          0.3.1      2022-08-15 [1] CRAN (R 4.2.0)
    #>  future         1.30.0     2022-12-16 [1] CRAN (R 4.2.0)
    #>  future.apply   1.10.0     2022-11-05 [1] CRAN (R 4.2.1)
    #>  generics       0.1.3      2022-07-05 [1] CRAN (R 4.2.0)
    #>  ggplot2      * 3.4.0      2022-11-04 [1] CRAN (R 4.2.1)
    #>  globals        0.16.2     2022-11-21 [1] CRAN (R 4.2.0)
    #>  glue           1.6.2      2022-02-24 [1] CRAN (R 4.2.0)
    #>  gower          1.0.1      2022-12-22 [1] CRAN (R 4.2.0)
    #>  GPfit          1.0-8      2019-02-08 [1] CRAN (R 4.2.0)
    #>  gtable         0.3.1      2022-09-01 [1] CRAN (R 4.2.0)
    #>  hardhat        1.2.0      2022-06-30 [1] CRAN (R 4.2.0)
    #>  highr          0.10       2022-12-22 [1] CRAN (R 4.2.0)
    #>  htmltools      0.5.4      2022-12-07 [1] CRAN (R 4.2.0)
    #>  infer        * 1.0.4      2022-12-02 [1] CRAN (R 4.2.1)
    #>  ingredients    2.3.0      2023-01-15 [1] CRAN (R 4.2.0)
    #>  ipred          0.9-13     2022-06-02 [1] CRAN (R 4.2.0)
    #>  iterators      1.0.14     2022-02-05 [1] CRAN (R 4.2.0)
    #>  jsonlite       1.8.4      2022-12-06 [1] CRAN (R 4.2.0)
    #>  knitr          1.41       2022-11-18 [1] CRAN (R 4.2.0)
    #>  lattice        0.20-45    2021-09-22 [2] CRAN (R 4.2.1)
    #>  lava           1.7.1      2023-01-06 [1] CRAN (R 4.2.1)
    #>  lhs            1.1.6      2022-12-17 [1] CRAN (R 4.2.0)
    #>  lifecycle      1.0.3      2022-10-07 [1] CRAN (R 4.2.0)
    #>  listenv        0.9.0      2022-12-16 [1] CRAN (R 4.2.0)
    #>  lubridate      1.9.0      2022-11-06 [1] CRAN (R 4.2.1)
    #>  magrittr       2.0.3      2022-03-30 [1] CRAN (R 4.2.0)
    #>  MASS           7.3-57     2022-04-22 [2] CRAN (R 4.2.1)
    #>  Matrix         1.5-3      2022-11-11 [1] CRAN (R 4.2.0)
    #>  modeldata    * 1.0.1      2022-09-06 [1] CRAN (R 4.2.1)
    #>  munsell        0.5.0      2018-06-12 [1] CRAN (R 4.2.0)
    #>  nnet           7.3-17     2022-01-16 [2] CRAN (R 4.2.1)
    #>  parallelly     1.34.0     2023-01-13 [1] CRAN (R 4.2.0)
    #>  parsnip      * 1.0.3      2022-11-11 [1] CRAN (R 4.2.0)
    #>  pillar         1.8.1      2022-08-19 [1] CRAN (R 4.2.0)
    #>  pkgconfig      2.0.3      2019-09-22 [1] CRAN (R 4.2.0)
    #>  png            0.1-8      2022-11-29 [1] CRAN (R 4.2.0)
    #>  prodlim        2019.11.13 2019-11-17 [1] CRAN (R 4.2.0)
    #>  purrr        * 1.0.1      2023-01-10 [1] CRAN (R 4.2.0)
    #>  R.cache        0.16.0     2022-07-21 [2] CRAN (R 4.2.0)
    #>  R.methodsS3    1.8.2      2022-06-13 [2] CRAN (R 4.2.0)
    #>  R.oo           1.25.0     2022-06-12 [2] CRAN (R 4.2.0)
    #>  R.utils        2.12.2     2022-11-11 [1] CRAN (R 4.2.0)
    #>  R6             2.5.1      2021-08-19 [1] CRAN (R 4.2.0)
    #>  ranger       * 0.14.1     2022-06-18 [1] CRAN (R 4.2.0)
    #>  Rcpp           1.0.9      2022-07-08 [1] CRAN (R 4.2.0)
    #>  recipes      * 1.0.4      2023-01-11 [1] CRAN (R 4.2.0)
    #>  reprex         2.0.2      2022-08-17 [1] CRAN (R 4.2.0)
    #>  reticulate     1.27       2023-01-07 [1] CRAN (R 4.2.1)
    #>  rlang          1.0.6      2022-09-24 [1] CRAN (R 4.2.0)
    #>  rmarkdown      2.19       2022-12-15 [1] CRAN (R 4.2.0)
    #>  rpart          4.1.16     2022-01-24 [2] CRAN (R 4.2.1)
    #>  rsample      * 1.1.1      2022-12-07 [1] CRAN (R 4.2.0)
    #>  rstudioapi     0.14       2022-08-22 [1] CRAN (R 4.2.0)
    #>  scales       * 1.2.1      2022-08-20 [1] CRAN (R 4.2.0)
    #>  sessioninfo    1.2.2      2021-12-06 [2] CRAN (R 4.2.0)
    #>  stringi        1.7.12     2023-01-11 [1] CRAN (R 4.2.0)
    #>  stringr        1.5.0      2022-12-02 [1] CRAN (R 4.2.1)
    #>  styler         1.9.0      2023-01-15 [1] CRAN (R 4.2.0)
    #>  survival       3.3-1      2022-03-03 [2] CRAN (R 4.2.1)
    #>  tibble       * 3.1.8      2022-07-22 [1] CRAN (R 4.2.0)
    #>  tidymodels   * 1.0.0      2022-07-13 [1] CRAN (R 4.2.0)
    #>  tidyr        * 1.2.1      2022-09-08 [1] CRAN (R 4.2.0)
    #>  tidyselect     1.2.0      2022-10-10 [1] CRAN (R 4.2.0)
    #>  timechange     0.2.0      2023-01-11 [1] CRAN (R 4.2.0)
    #>  timeDate       4022.108   2023-01-07 [1] CRAN (R 4.2.1)
    #>  tune         * 1.0.1      2022-10-09 [1] CRAN (R 4.2.0)
    #>  utf8           1.2.2      2021-07-24 [1] CRAN (R 4.2.0)
    #>  vctrs          0.5.1      2022-11-16 [1] CRAN (R 4.2.0)
    #>  withr          2.5.0      2022-03-03 [1] CRAN (R 4.2.0)
    #>  workflows    * 1.1.2      2022-11-16 [1] CRAN (R 4.2.0)
    #>  workflowsets * 1.0.0      2022-07-12 [1] CRAN (R 4.2.0)
    #>  xfun           0.36       2022-12-21 [1] CRAN (R 4.2.0)
    #>  yaml           2.3.6      2022-10-18 [1] CRAN (R 4.2.0)
    #>  yardstick    * 1.1.0      2022-09-07 [1] CRAN (R 4.2.0)
    #> 
    #>  [1] /Users/emilhvitfeldt/Library/R/arm64/4.2/library
    #>  [2] /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library
    #> 
    #> ──────────────────────────────────────────────────────────────────────────────
    

    Created on 2023-01-16 with reprex v2.0.2