This is the basic code which I need to run quite a number of times given the genes sets I have.
x <-as.matrix(pca_matrix1)
y <- Surv(surv_obj1$OS_MONTHS,surv_obj1$Status)
set.seed(02042019) ## for reproducibility
ix <- sample(1:nrow(x), 0.7*nrow(x))
y_tr <- y[ix]
x_tr <- x[ix,]
y_te <- y[-ix]
x_te <- x[-ix,]
#library(glmnet)
cvfit_tr <- cv.glmnet(x_tr,y_tr,family="cox",nfolds = 10, alpha = 1)
plot(cvfit_tr)
co_mat <- as.matrix(coef(cvfit_tr,s="lambda.min"))
co_mat[co_mat[,1]!=0,]
## get predictions x^Tbeta
preds <- predict(cvfit_tr,x_te,s="lambda.min",type="link")
preds
## split into low, medium, high risk groups
#library(ggplot2)
levs <- cut_number(preds,2)
fit <- survfit(y_te~levs)
out <- survdiff(y_te~levs)
out
broom::glance(out)$p.value
One of my attempt to create the function
lasso_surv <- function(exprs, surv, alpha = 1, nfolds = 10, lambda = NULL, genes = NULL) {
x <- as.matrix(exprs)
y <- Surv(surv$OS_MONTHS, surv$Status)
cv.fit <- cv.glmnet(x, y, nfolds = nfolds, alpha = alpha, type.measure = "cox")
best.lambda <- cv.fit$lambda.min
fit <- glmnet(x, y, alpha = alpha, lambda = best.lambda)
coef <- as.matrix(fit$beta[, drop = FALSE])
lasso.result <- list(cvfit = cv.fit, fit = fit, coef = coef)
return(lasso.result)
}
When i run this i get this error
lasso_result <- lasso_surv(as.matrix(x_te), y_te)
Error in surv$OS_MONTHS : $ operator is invalid for atomic vectors
Here I have already created surv
object
y <- Surv(surv_obj1$OS_MONTHS,surv_obj1$Status)
I'm sure there is something very basic error in the code which Im not able to figure it out
Any suggestion or help would be really appreciated
UPDATED
Code suggested by itIsNaz
lasso_surv <- function(exprs, OS_MONTHS, Status, alpha = 1, nfolds = 10, lambda = NULL, genes = NULL) {
y <- Surv(OS_MONTHS, Status)
x <- as.matrix(exprs)
cv.fit <- cv.glmnet(x, y, nfolds = nfolds, alpha = alpha, family="cox")
best.lambda <- cv.fit$lambda.min
fit <- glmnet(x, y, alpha = alpha, lambda = best.lambda)
coef <- as.matrix(fit$beta[, drop = FALSE])
lasso.result <- list(cvfit = cv.fit, fit = fit, coef = coef)
return(lasso.result)
}
resullt <- lasso_surv(x,surv_obj1$OS_MONTHS,surv_obj1$Status)
Error
Error in Ops.Surv(x, w) : Invalid operation on a survival time
I have modified type.measure to family= cox
UPDATE My survival data
dput(surv_obj1)
structure(list(OS_MONTHS = c(5.3, 95.5, 25.8, 7.1, 21.5, 43.4,
7.7, 81.9, 32.6, 44.4, 11.3, 32.3, 99.9, 15.4, 20.5, 6.3, 6.6,
8.1, 1.2, 19, 11.5, 55.4, 73, 11.2, 10.2, 52.6, 57.3, 4.6, 7.5,
30.5, 6.3, 45.8, 69, 46.8, 22.6, 95.6, 10.5, 11.8, 28.4, 26.3,
43.5, 4.5, 52.7, 3.9, 30.6, 61.2, 47.5, 0.6, 45.3, 12.2, 2.4,
9.3, 8.1, 59.3, 19.2, 26.3, 41.4, 36.9, 0.5, 8.2, 7.4, 36.1,
1.4, 5.5, 0.3, 88.3, 0.2, 0.5, 0.8, 8.4, 4.2, 29.7, 0.8, 67.7,
18.1, 5.7, 0.1, 5.7, 13.8, 6.6, 48.3, 56.3, 27.1, 24.4, 5.6,
53.9, 4, 26, 10.2, 34, 35.2, 47, 24.1, 42.1, 76.2, 7, 77.3, 32.7,
16.4, 9.3, 17, 7.9, 24.8, 0.3, 22.3, 27, 83.5, 34.3, 3.1, 7.5,
6.6, 5.2, 83.3, 33.5, 16.3, 2.3, 75.8, 42.1, 30, 7.7, 0.7, 20.5,
1, 1.3, 11, 73.6, 40.3, 1.3, 1.6, 27.4, 1.9, 29.4, 18.5, 59,
118.1, 8, 2.4, 47.2, 37.1, 26.8, 86.6, 46.5, 27.7, 71.3, 10.7,
4.6), Status = c(1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L,
1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L,
1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 1L,
1L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L,
0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 1L, 1L)), row.names = c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L,
45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L,
58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 69L, 70L, 71L,
72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L,
85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L,
98L, 99L, 100L, 101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L,
109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L, 117L, 118L, 119L,
120L, 121L, 122L, 123L, 124L, 125L, 126L, 127L, 128L, 129L, 130L,
131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L, 139L, 140L, 141L,
142L, 143L, 144L, 145L, 146L, 147L), class = "data.frame")
and expression dummy dataframe matching the survival dimension
genes <- paste0("Gene", 1:30)
# Create a matrix with 146 rows and 30 columns
exprs <- matrix(rnorm(146*30), nrow = 146, ncol = 30)
# Set the column names to be the gene names
colnames(exprs) <- genes
library(glmnet)
library(survival)
# Function for Lasso regularization on survival data
lasso_surv <- function(exprs, OS_MONTHS, Status, alpha = 1, nfolds = 10, lambda = NULL, genes = NULL) {
y <- Surv(OS_MONTHS, Status)
x <- as.matrix(exprs)
cv.fit <- cv.glmnet(x, y, nfolds = nfolds, alpha = alpha, family="cox")
best.lambda <- cv.fit$lambda.min
fit <- glmnet(x, y, alpha = alpha, lambda = best.lambda, family = "cox")
coef <- as.matrix(fit$beta[, drop = FALSE])
lasso.result <- list(cvfit = cv.fit, fit = fit, coef = coef)
return(lasso.result)
}
# Generate simulated survival data
set.seed(123)
n <- 100
p <- 50
x <- matrix(rnorm(n*p), n, p)
beta <- c(runif(20, 0.5, 1), rep(0, p-20))
colnames(x) <- paste0("gene", 1:p)
y <- c(rep(1, 50), rep(0, 50))
t <- rexp(n, exp(x %*% beta))
# Create survival object
surv_obj1 <- data.frame(OS_MONTHS = t, Status = y)
# Run lasso_surv on survival object
res <- lasso_surv(x, surv_obj1$OS_MONTHS, surv_obj1$Status, nfolds = 10, alpha = 1)
print(res)