Search code examples
rfunctionsurvival-analysis

How do I wrap this cv.glmnet into a function?


This is the basic code which I need to run quite a number of times given the genes sets I have.

x <-as.matrix(pca_matrix1)
y <- Surv(surv_obj1$OS_MONTHS,surv_obj1$Status)



set.seed(02042019) ## for reproducibility    
ix <- sample(1:nrow(x), 0.7*nrow(x)) 
y_tr <- y[ix]
x_tr <- x[ix,]
y_te <- y[-ix]
x_te <- x[-ix,]

#library(glmnet)
cvfit_tr <- cv.glmnet(x_tr,y_tr,family="cox",nfolds = 10, alpha = 1)
plot(cvfit_tr)





co_mat <- as.matrix(coef(cvfit_tr,s="lambda.min"))
co_mat[co_mat[,1]!=0,]



## get predictions x^Tbeta
preds <- predict(cvfit_tr,x_te,s="lambda.min",type="link")
preds
## split into low, medium, high risk groups
#library(ggplot2)
levs <- cut_number(preds,2)
fit <- survfit(y_te~levs)
out <- survdiff(y_te~levs)
out
broom::glance(out)$p.value

One of my attempt to create the function

lasso_surv <- function(exprs, surv, alpha = 1, nfolds = 10, lambda = NULL, genes = NULL) {
  

  x <- as.matrix(exprs)
  y <- Surv(surv$OS_MONTHS, surv$Status)
  
  cv.fit <- cv.glmnet(x, y, nfolds = nfolds, alpha = alpha, type.measure = "cox")
  
  best.lambda <- cv.fit$lambda.min
  fit <- glmnet(x, y, alpha = alpha, lambda = best.lambda)
  coef <- as.matrix(fit$beta[, drop = FALSE])
  
  lasso.result <- list(cvfit = cv.fit, fit = fit, coef = coef)
  return(lasso.result)
}

When i run this i get this error

lasso_result <- lasso_surv(as.matrix(x_te), y_te)
Error in surv$OS_MONTHS : $ operator is invalid for atomic vectors

Here I have already created surv object

y <- Surv(surv_obj1$OS_MONTHS,surv_obj1$Status)

I'm sure there is something very basic error in the code which Im not able to figure it out

Any suggestion or help would be really appreciated

UPDATED

Code suggested by itIsNaz

lasso_surv <- function(exprs, OS_MONTHS, Status, alpha = 1, nfolds = 10, lambda = NULL, genes = NULL) {
  
  y <- Surv(OS_MONTHS, Status) 
  x <- as.matrix(exprs)     
  cv.fit <- cv.glmnet(x, y, nfolds = nfolds, alpha = alpha, family="cox")     
  best.lambda <- cv.fit$lambda.min   
  fit <- glmnet(x, y, alpha = alpha, lambda = best.lambda)  
  coef <- as.matrix(fit$beta[, drop = FALSE])      
  lasso.result <- list(cvfit = cv.fit, fit = fit, coef = coef)  
  return(lasso.result) 
  
}

resullt <- lasso_surv(x,surv_obj1$OS_MONTHS,surv_obj1$Status)

Error

Error in Ops.Surv(x, w) : Invalid operation on a survival time

I have modified type.measure to family= cox

UPDATE My survival data

dput(surv_obj1)
structure(list(OS_MONTHS = c(5.3, 95.5, 25.8, 7.1, 21.5, 43.4, 
7.7, 81.9, 32.6, 44.4, 11.3, 32.3, 99.9, 15.4, 20.5, 6.3, 6.6, 
8.1, 1.2, 19, 11.5, 55.4, 73, 11.2, 10.2, 52.6, 57.3, 4.6, 7.5, 
30.5, 6.3, 45.8, 69, 46.8, 22.6, 95.6, 10.5, 11.8, 28.4, 26.3, 
43.5, 4.5, 52.7, 3.9, 30.6, 61.2, 47.5, 0.6, 45.3, 12.2, 2.4, 
9.3, 8.1, 59.3, 19.2, 26.3, 41.4, 36.9, 0.5, 8.2, 7.4, 36.1, 
1.4, 5.5, 0.3, 88.3, 0.2, 0.5, 0.8, 8.4, 4.2, 29.7, 0.8, 67.7, 
18.1, 5.7, 0.1, 5.7, 13.8, 6.6, 48.3, 56.3, 27.1, 24.4, 5.6, 
53.9, 4, 26, 10.2, 34, 35.2, 47, 24.1, 42.1, 76.2, 7, 77.3, 32.7, 
16.4, 9.3, 17, 7.9, 24.8, 0.3, 22.3, 27, 83.5, 34.3, 3.1, 7.5, 
6.6, 5.2, 83.3, 33.5, 16.3, 2.3, 75.8, 42.1, 30, 7.7, 0.7, 20.5, 
1, 1.3, 11, 73.6, 40.3, 1.3, 1.6, 27.4, 1.9, 29.4, 18.5, 59, 
118.1, 8, 2.4, 47.2, 37.1, 26.8, 86.6, 46.5, 27.7, 71.3, 10.7, 
4.6), Status = c(1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 
1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 
1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 
1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 
1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 
1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 
0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 
0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 
0L, 0L, 1L, 0L, 0L, 1L, 1L)), row.names = c(1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 
58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 69L, 70L, 71L, 
72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 
85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 
98L, 99L, 100L, 101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 
109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L, 117L, 118L, 119L, 
120L, 121L, 122L, 123L, 124L, 125L, 126L, 127L, 128L, 129L, 130L, 
131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L, 139L, 140L, 141L, 
142L, 143L, 144L, 145L, 146L, 147L), class = "data.frame")

and expression dummy dataframe matching the survival dimension

genes <- paste0("Gene", 1:30)

# Create a matrix with 146 rows and 30 columns
exprs <- matrix(rnorm(146*30), nrow = 146, ncol = 30)

# Set the column names to be the gene names
colnames(exprs) <- genes

Solution

  • library(glmnet)
    library(survival)
    
    # Function for Lasso regularization on survival data
    lasso_surv <- function(exprs, OS_MONTHS, Status, alpha = 1, nfolds = 10, lambda = NULL, genes = NULL) {
      
      y <- Surv(OS_MONTHS, Status) 
      x <- as.matrix(exprs)     
      cv.fit <- cv.glmnet(x, y, nfolds = nfolds, alpha = alpha, family="cox")     
      best.lambda <- cv.fit$lambda.min   
      fit <- glmnet(x, y, alpha = alpha, lambda = best.lambda, family = "cox")  
      coef <- as.matrix(fit$beta[, drop = FALSE])      
      lasso.result <- list(cvfit = cv.fit, fit = fit, coef = coef)  
      return(lasso.result) 
      
    }
    
    # Generate simulated survival data
    set.seed(123)
    n <- 100
    p <- 50
    x <- matrix(rnorm(n*p), n, p)
    beta <- c(runif(20, 0.5, 1), rep(0, p-20))
    colnames(x) <- paste0("gene", 1:p)
    y <- c(rep(1, 50), rep(0, 50))
    t <- rexp(n, exp(x %*% beta))
    
    # Create survival object
    surv_obj1 <- data.frame(OS_MONTHS = t, Status = y)
    
    # Run lasso_surv on survival object
    res <- lasso_surv(x, surv_obj1$OS_MONTHS, surv_obj1$Status, nfolds = 10, alpha = 1)
    
    print(res)
    

    enter image description here