Multinomial ridge regression using glmnet gives results with probabilities above one and negative for different target classes, any idea why?

This is my code

library(glmnet)
ridge_fit <- cv.glmnet(data.matrix(df_final_f1[,colnames(df_final_f1) !="actual"]),df_final_f1$actual ,family="multinomial", alpha = 0)
best_lambda_ridge <- ridge_fit$lambda.min
ridge_bestfit <- ridge_fit$glmnet.fit
ridge_pred <- predict(ridge_bestfit, s = best_lambda_ridge, newx = data.matrix(dfv_final_f1[,colnames(dfv_final_f1) !="actualv"]))
ridge_pred

This is the prediction result I'm getting.

ridge_pred
, , 1

                    0            1           2          3            4           5
    [1,]   7.37383424  0.525928689 -2.23806671 -2.9309818 -2.368180965 -0.36253343
    [2,]   7.77258252  0.270937267 -2.25405600 -2.9441268 -2.388026563 -0.45731041
    [3,]   7.78996160  0.241205035 -2.24645786 -2.9399885 -2.380972142 -0.46374813
    [4,]   7.54554714  0.394904472 -2.23646676 -2.9375340 -2.369309211 -0.39714164
    [5,]   7.46650714  0.462126911 -2.24061675 -2.9371638 -2.375932091 -0.37492140
    [6,]   7.49111566  0.402200807 -2.26717439 -2.9343288 -2.261654242 -0.43015904
    [7,]   7.39178188  0.542378444 -2.23568005 -2.9366850 -2.361800070 -0.39999516
    [8,]   7.63922801  0.364963127 -2.24852235 -2.9369348 -2.379869020 -0.43886497
    [9,]   7.58101590  0.355279364 -2.23016083 -2.9370365 -2.361696205 -0.40740175
   [10,]   7.72415594  0.305360997 -2.25231781 -2.9356740 -2.381878181 -0.45964695

continues......

Why there is negative probabilities and probabilities greater than 1. Any help would be appreciated.

Solution

you need to specify type="response" in predict.glmnet, otherwise by default (i think) it uses link.

library(glmnet)
# simulate data
g4 = sample(1:4,100,replace=TRUE)
x = matrix(rnorm(100 * 20), 100, 20)
# fit multinomial
ridge_fit = cv.glmnet(x, g4, family = "multinomial", alpha = 0)
best_lambda_ridge <- ridge_fit$lambda.min
ridge_bestfit <- ridge_fit$glmnet.fit
ridge_pred <- predict(ridge_bestfit, s = best_lambda_ridge, 
newx = x,type="response")
> head(ridge_pred[,,1])
        1    2    3    4
[1,] 0.26 0.24 0.24 0.26
[2,] 0.26 0.24 0.24 0.26
[3,] 0.26 0.24 0.24 0.26
[4,] 0.26 0.24 0.24 0.26
[5,] 0.26 0.24 0.24 0.26
[6,] 0.26 0.24 0.24 0.26
ridge_pred_link <- predict(ridge_bestfit, s = best_lambda_ridge, 
newx = x,type="link")
head(ridge_pred_link[,,1])
> head(ridge_pred_link[,,1])
              1           2           3          4
[1,] 0.04002135 -0.04002135 -0.04002135 0.04002135
[2,] 0.04002135 -0.04002135 -0.04002135 0.04002135

ridge_pred_default <- predict(ridge_bestfit, s = best_lambda_ridge, 
newx = x)

head(ridge_pred_default[,,1])
              1           2           3          4
[1,] 0.04002135 -0.04002135 -0.04002135 0.04002135
[2,] 0.04002135 -0.04002135 -0.04002135 0.04002135