I'm struggling with how the obtain the AUC from a logistic regression model using tidymodels.
Here's an example using the built-in mpg
dataset.
library(tidymodels)
library(tidyverse)
# Use mpg dataset
df <- mpg
# Create an indicator variable for class="suv"
df$is_suv <- as.factor(df$class == "suv")
# Create the split object
df_split <- initial_split(df, prop=1/2)
# Create the training and testing sets
df_train <- training(df_split)
df_test <- testing(df_split)
# Create workflow
rec <-
recipe(is_suv ~ cty + hwy + cyl, data=df_train)
glm_spec <-
logistic_reg() %>%
set_engine(engine = "glm")
glm_wflow <-
workflow() %>%
add_recipe(rec) %>%
add_model(glm_spec)
# Fit the model
model1 <- fit(glm_wflow, df_train)
# Attach predictions to training dataset
training_results <- bind_cols(df_train, predict(model1, df_train))
# Calculate accuracy
accuracy(training_results, truth = is_suv, estimate = .pred_class)
# Calculate AUC??
roc_auc(training_results, truth = is_suv, estimate = .pred_class)
The last line returns this error:
> roc_auc(training_results, truth = is_suv, estimate = .pred_class)
Error in metric_summarizer(metric_nm = "roc_auc", metric_fn = roc_auc_vec, :
formal argument "estimate" matched by multiple actual arguments
Since you are doing binary classification, roc_auc()
is expecting a vector of class probabilities corresponding to the "relevant" class, not the predicted class.
You can get this using predict(model1, df_train, type = "prob")
. Alternatively, if you are using workflows version 0.2.2 or newer you can use the augment()
to get class predictions and probabilities without using bind_cols()
.
library(tidymodels)
library(tidyverse)
# Use mpg dataset
df <- mpg
# Create an indicator variable for class="suv"
df$is_suv <- as.factor(df$class == "suv")
# Create the split object
df_split <- initial_split(df, prop=1/2)
# Create the training and testing sets
df_train <- training(df_split)
df_test <- testing(df_split)
# Create workflow
rec <-
recipe(is_suv ~ cty + hwy + cyl, data=df_train)
glm_spec <-
logistic_reg() %>%
set_engine(engine = "glm")
glm_wflow <-
workflow() %>%
add_recipe(rec) %>%
add_model(glm_spec)
# Fit the model
model1 <- fit(glm_wflow, df_train)
# Attach predictions to training dataset
training_results <- augment(model1, df_train)
# Calculate accuracy
accuracy(training_results, truth = is_suv, estimate = .pred_class)
#> # A tibble: 1 x 3
#> .metric .estimator .estimate
#> <chr> <chr> <dbl>
#> 1 accuracy binary 0.795
# Calculate AUC
roc_auc(training_results, truth = is_suv, estimate = .pred_FALSE)
#> # A tibble: 1 x 3
#> .metric .estimator .estimate
#> <chr> <chr> <dbl>
#> 1 roc_auc binary 0.879
Created on 2021-04-12 by the reprex package (v1.0.0)