Search code examples
rglmnet

How to add glmnet prediction vector to a dataframe as a regular column


I would like to use glmnet package for classification project. I managed to build the model and get the prdiction per row but I don't see a way to use it as a regular variable. How can I add the prediction vector to the data frame as a regular column that I'll be able to work upon (like calculate its mean etc..) Here is my toy data frame and code.Also, I see that glmnet called the prediction vector as "1", why is that nad how can I change it?

df <- read.table(text = "target birds    wolfs     
                     1        9         7 
                     1        8         4 
                     0        2         8 
                     1        2         3 
                     1        8         3 
                     0        1         2 
                     1        7         1 
                     0        1         5 
                     1        9         7 
                     1        8         7 
                     0        2         7 
                     0        2         3 
                     1        6         3 
                     0        1         1 
                     0        3         9 
                     0        1         1  ",header = TRUE)
library(Matrix)
some_matrix <- data.matrix(df[,2:3])
Matrix(some_matrix, sparse=TRUE)
set.seed(2)
split <- df[1:10,]
split <- NULL
train <- df[1:10,]
test<-df[11:16,]
train_sparse <- sparse.model.matrix(~.,train[2:3])
test_sparse <- sparse.model.matrix(~.,test[2:3])
library(glmnet)  
fit <- glmnet(train_sparse,train[,1],family='binomial')
cv <- cv.glmnet(train_sparse,train[,1],nfolds=3)
pred <- predict(fit, test_sparse,type="response", s=cv$lambda.min)
pred
            1
11 0.09691107
12 0.82760730
13 1.00000000
14 0.26871408
15 0.58367863
16 0.26871408
c<-cbind(test,pred)
target birds wolfs          1
11      0     2     7 0.09691107
12      0     2     3 0.82760730
13      1     6     3 1.00000000
14      0     1     1 0.26871408
15      0     3     9 0.58367863
16      0     1     1 0.26871408
typeof(c$1)
Error: unexpected numeric constant in "typeof(c$1"
mean(c$1)
Error: unexpected numeric constant in "mean(c$1"

> str(c)
'data.frame':   6 obs. of  5 variables:
 $ target: int  0 0 1 0 0 0
 $ birds : int  2 2 6 1 3 1
 $ wolfs : int  7 3 3 1 9 1
 $ 1     : num  0.0969 0.8276 1 0.2687 0.5837 ...
 $ pred  : num [1:6, 1] 0.0969 0.8276 1 0.2687 0.5837 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr  "11" "12" "13" "14" ...
  .. ..$ : chr "1"

Solution

  • df <- read.table(text = "target birds    wolfs     
                         1        9         7 
                         1        8         4 
                         0        2         8 
                         1        2         3 
                         1        8         3 
                         0        1         2 
                         1        7         1 
                         0        1         5 
                         1        9         7 
                         1        8         7 
                         0        2         7 
                         0        2         3 
                         1        6         3 
                         0        1         1 
                         0        3         9 
                         0        1         1  ",header = TRUE)
    set.seed(44)
    
    library(Matrix)
    library(glmnet)
    some_matrix <- data.matrix(df[,2:3])
    Matrix(some_matrix, sparse=TRUE)
    set.seed(2)
    split <- df[1:10,]
    split <- NULL
    train <- df[1:10,]
    test<-df[11:16,]
    train_sparse <- model.matrix(~.,train[2:3]) #changed to model.matrix 
    test_sparse <- model.matrix(~.,test[2:3])
    library(glmnet)  
    fit <- glmnet(train_sparse,train[,1],family='binomial')
    cv <- cv.glmnet(train_sparse,train[,1],nfolds=3) # i believe you can do all of the modeling in here. You should skip the "fit" part.
    pred <- predict(fit, test_sparse,type="response", s=cv$lambda.min)
    
    test_sparse<-as.data.frame(test_sparse[,-1]) #removing intercept
    test_sparse$predictionColumn<-c(pred) #adding in new column