Search code examples
rggplot2plotplotlydata-visualization

Plotting "staircases" using ggplot2/plotly


I am trying to follow this tutorial over here: https://rviews.rstudio.com/2017/09/25/survival-analysis-with-r/ (bottom of the page).

I have slightly modified the code for this tutorial and have plotted the "staircases" (i.e. "survival functions", in the below picture "red", "blue", "green") corresponding to 3 of the observations in the data:

 library(survival)
    library(dplyr)
    library(ranger)
    library(data.table)
library(ggplot2)
library(plotly)
    
a = na.omit(lung)
a$ID <- seq_along(a[,1])

r_fit <- ranger(Surv(time,status) ~ age + sex + ph.ecog + ph.karno + pat.karno + meal.cal + wt.loss, data = a, mtry = 4, 
importance = "permutation", splitrule = "extratrees", verbose = TRUE)

death_times <- r_fit$unique.death.times
surv_prob  <-data.frame(r_fit$survival)
avg_prob <- sapply(surv_prob, mean)

plot(r_fit$unique.death.times, r_fit$survival[1,], type = "l", ylim = c(0,1), col = "red", xlab = "Days", ylab = "survival", main = "Survival Curves")

new = a[1:3,]

pred <- predict(r_fit, new, type = 'response')$survival
pred <- data.table(pred)
colnames(pred) <- as.character(r_fit$unique.death.times)

plot(r_fit$unique.death.times, pred[1,], type = "l", col = "red")

lines(r_fit$unique.death.times, r_fit$survival[2,], type = "l", col = "green")

lines(r_fit$unique.death.times, r_fit$survival[3,], type = "l", col = "blue")

enter image description here

From here, I want to make the above plot as "interactive". I want to make so that when you move the mouse over one of the curves:

  1. The "properties" belonging to that curve (from object "a") hover (e.g. ID, age, sex, ph.ecog, etc.)

  2. In the same "hover box" from 1), also show the x-coordinate (r_fit$unique) and the y-coordinate (from "pred") for each position the mouse is hovering over (for a given curve)

My plan was first to take the "grob" object and convert it to a "ggplot" object, and then to convert the "ggplot" object into a "plotly" object:

 grob= plot(r_fit$unique.death.times, pred[1,], type = "l", col = "red")
basic_plot = ggpubr::as_ggplot(grob)

But when I try to inspect the "basic_plot", it shows up as "NULL".

 ggplot(f)
Error: `data` must be a data frame, or other object coercible by `fortify()`, not an S3 object with class gg/ggplot

Had this worked, I would have eventually converted the ggplot object to plotly:

plotly_plot = ggplotly(final_plot)

How to make this interactive plot?

I am trying to achieve something similar to this: https://plotly.com/python/v3/ipython-notebooks/survival-analysis-r-vs-python/ (towards the bottom of the page, plot with the title "lifespan of different tumor DNA profiles")

enter image description here

(Please Note: I am working with a computer that has no USB port or Internet connection, only R with a few pre-installed libraries... I do not have "ggplotify" or "survminer")


Solution

  • The issue is that when you draw a plot in base graphics draw directly on a device. The line of your code grob= plot(r_fit$unique.death.times, pred[1,], type = "l", col = "red") creates a NULL object (unlike ggplot which would return a plot object).

    You can make the plot directly in ggplot (there are a few ways of doing this but I've done a simple example bolow) and convert it with ggplotly:

    fig_dat <- data.frame(time = r_fit$unique.death.times,
                          pred_1 = t(pred[1,]),
                          fit_1 = r_fit$survival[2,],
                          fit_2 = r_fit$survival[3,])
    
    fig_dat_long <- fig_dat %>% pivot_longer(-time, names_to = "pred_fit", values_to = "pred_fit_values")
    
    gg_p <- ggplot(fig_dat_long, aes(x = time, y = pred_fit_values, colour = pred_fit)) +
      geom_line()
    
    ggplotly(gg_p)
    

    Alternatively you could also plot in plotly directly:

    fig_dat <- data.frame(time = r_fit$unique.death.times,
                          pred_1 = t(pred[1,]),
                          fit_1 <- r_fit$survival[2,],
                          fit_2 <- r_fit$survival[3,])
    
    
    fig <- plot_ly(fig_dat, x = ~time, y = ~pred_1, name = 'pred1', type = 'scatter', mode = 'lines')
    fig <- fig %>% add_trace(y = ~fit_1, name = 'fit 1', mode = 'lines') 
    fig <- fig %>% add_trace(y = ~fit_2, name = 'fit 2', mode = 'lines')
    
    
    ## make dataframe of variables to plot:
    fig_dat <- data.frame(time = r_fit$unique.death.times,
                          pred_1 = t(pred[1,]),
                          fit_1 = r_fit$survival[2,],
                          fit_2 = r_fit$survival[3,])
    
    # to include the variables from 'a' we need to put them in the same dataframe for plotting
    # Trouble is they are different lengths the predicted data are a little shorter
    dim(fig_dat)
    dim(a)
    # We can join the two with inner join: https://stackoverflow.com/questions/1299871/how-to-join-merge-data-frames-inner-outer-left-right
    fig_dat_join <- inner_join(fig_dat, a, by = "time")
    dim(fig_dat_join)
    # now they are equal dimensions and joined together but we have a slight issue with duplicate values:
    sort(a$time) # we can see here that time 53 appears twice for example
    a$time[duplicated(a$time)] # this tells us which values in time are duplicated
    sort(death_times) # some
    death_times[duplicated(death_times)] #none
    # because of the duplicates some combinations are returned: see rows 9 and 10
    fig_dat_join 
    
    # I'm not familiar with the analysis so I'm not sure what the correct way in this case is to remove the duplicates in 'a' so that the dimentions of 'a' match the output of 'r-fit'
    # You might need to look into that but it might not make much difference to the visualisation
    
    # I've not used plotly a great deal so there is probably a better way of doing this but I've done my best and included the links as comments: https://plotly-r.com/overview.html
    # labels: https://plotly.com/r/figure-labels/
    x_labs <- list(
      title = "Time")
    
    y_labs <- list(
      title = "y axis")
    
    # T include extra info in hovertext: I https://stackoverflow.com/questions/49901771/how-to-set-different-text-and-hoverinfo-text
    
    p1 <- plot_ly(data = fig_dat_join,
                  x = ~time,
                  # text = ~n,
                  # textposition = "auto",
                  # hoverinfo = "text",
                  hovertext = paste("Time :", fig_dat_join$time,
                                    "<br> Sex :", fig_dat_join$sex,
                                    "<br> Inst :", fig_dat_join$inst,
                                    "<br> ID :", fig_dat_join$ID,
                                    "<br> Age :", fig_dat_join$age
                                    )) %>% 
      add_trace(y = ~pred_1,
                type = 'scatter',
                name = 'Predictor 1',
                mode = 'lines') %>% 
      add_trace( y = ~fit_1,
                type = 'scatter',
                name = 'Fit 1',
                mode = 'lines') %>% 
      add_trace( y = ~fit_2,
                 type = 'scatter',
                 name = 'Fit 2',
                 mode = 'lines') %>% 
      layout(xaxis = x_labs, yaxis = y_labs)
    
    p1
    
    
    

    I was making dataframe a match up with the unique.death.times by using left_join() above. If you don't need that then we could just move the hovertext code into each add_trace?

    fig_dat <- data.frame(time = r_fit$unique.death.times,
                          pred_1 = t(pred[1,]),
                          fit_1 = r_fit$survival[2,],
                          fit_2 = r_fit$survival[3,])
    
    
    p2 <- plot_ly(data = fig_dat,
                  x = ~time,
                  # text = ~n,
                  # textposition = "auto",
                  hoverinfo = "text"
    ) %>% 
      add_trace(y = ~pred_1,
                type = 'scatter',
                name = 'Predictor 1',
                mode = 'lines',
                hovertext = paste("Time :", fig_dat$time,
                                  "<br> y axis :", fig_dat$pred_1,
                                  "<br> Sex :", a$sex[1],
                                  "<br> Inst :", a$inst[1],
                                  "<br> ID :", a$ID[1],
                                  "<br> Age :", a$age[1]
                )) %>% 
      add_trace( y = ~fit_1,
                 type = 'scatter',
                 name = 'Fit 1',
                 mode = 'lines',
                 hovertext = paste("Time :", fig_dat$time,
                                   "<br> y axis :", fig_dat$fit_1,
                                   "<br> Sex :", a$sex[2],
                                   "<br> Inst :", a$inst[2],
                                   "<br> ID :", a$ID[2],
                                   "<br> Age :", a$age[2]
                 )) %>% 
      add_trace( y = ~fit_2,
                 type = 'scatter',
                 name = 'Fit 2',
                 mode = 'lines',
                 hovertext = paste("Time :", fig_dat$time,
                                   "<br> y axis :", fig_dat$fit_2,
                                   "<br> Sex :", a$sex[3],
                                   "<br> Inst :", a$inst[3],
                                   "<br> ID :", a$ID[3],
                                   "<br> Age :", a$age[3]
                 )) %>% 
      layout(xaxis = x_labs, yaxis = y_labs)
    
    p2