Search code examples
rggplot2ggplotly

Ticktext value does not fix ggplot2 facet_grid() breaking down when combined with ggplotly()


I have a dataframe:

gene_symbol<-c("DADA","SDAASD","SADDSD","SDADD","ASDAD","XCVXCVX","EQWESDA","DASDADS","SDASDASD","DADADASD","sdaadfd","DFSD","SADADDAD","SADDADADA","DADSADSASDWQ","SDADASDAD","ASD","DSADD")
panel<-c("growth","growth","growth","growth","big","big","big","small","small","dfgh","DF","DF","DF","DF","DF","gh","DF","DF")
ASDDA<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDb<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf2<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf3<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf4<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf5<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDA1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDb1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf1<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf11<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf21<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf31<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf41<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
ASDDAf51<-c("normal","over","low","over","normal","over","low","over","normal","over","DF","DF","DF","DF","DF","DF","DF","DF")
Gene_states22 <- data.frame(gene_symbol, panel, ASDDA, ASDDb, ASDDAf, ASDDAf1, ASDDAf2, 
                            ASDDAf3, ASDDAf4, ASDDAf5, ASDDA1, ASDDb1, ASDDAf1, ASDDAf11,
                            ASDDAf21, ASDDAf31, ASDDAf41, ASDDAf51)

And I create a heatmap with:

library(ggplot2); library(reshape2)
HG3 <- split(Gene_states22[,1:15], Gene_states22$panel)
HG4 <- melt(HG3, id.vars= c("gene_symbol","panel"))
HG4 <- HG4[,-5]
pp <- ggplot(HG4, aes(gene_symbol,variable)) + 
  geom_tile(aes(fill = value),
            colour = "grey50") + 
  facet_grid(~panel, scales = "free" ,space = "free") +
  scale_fill_manual(values = c("white", "red", "blue", "black", "yellow", "green", "brown"))

enter image description here As you can see I use facet_grid to separate my heatmap into groups based on panel value. The problem is that when I use ggplotly(pp) the column width differs from group to group and my plot seems ugly.

enter image description here

In order to fix the issue I used adapted answer of Plotly and ggplot with facet_grid in R: How to to get yaxis labels to use ticktext value instead of range value? :

library(plotly)
library(ggplot2)
library(data.table)
library(datasets) 


#add fake model for use in facet
dt<-data.table(HG4[1:50,])
dt[,variable:=rownames(HG4)]
dt[,panel:=substr(variable,1,regexpr(" ",variable)-1)][panel=="",panel:=variable]

ggplot.test<-ggplot(dt,aes(gene_symbol,variable))+facet_grid(panel~.,scales="free_y",space="free",drop=TRUE)+
  geom_tile(aes(fill = value),
            colour = "grey50") + 
  scale_fill_manual(values = c("white", "red", "blue", "black", "yellow", "green", "brown")) +
  labs(title = "Heatmap", x = "gene_symbol", y = "sample", fill = "value") +
  guides(fill = FALSE)+
  theme(panel.background = element_rect(fill = NA),
        panel.spacing = unit(0.5, "lines"), ## It was here where you had a 0 for distance between facets. I replaced it by 0.5 .
        strip.placement = "outside")



p <- ggplotly(ggplot.test)
len <- length(unique(HG4$panel))


total <- 1
for (i in 2:len) {
  total <- total + length(p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']])
}

spacer <- 0.01 #space between the horizontal plots
total_length = total + len * spacer
end <- 1
start <- 1

for (i in c('', seq(2, len))) {
  tick_l <- length(p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']]) + 1

  #fix the y-axis
  p[['x']][['layout']][[paste('yaxis', i, sep='')]][['tickvals']] <- seq(1, tick_l)
  p[['x']][['layout']][[paste('yaxis', i, sep='')]][['ticktext']][[tick_l]] <- ''

  end <- start - spacer
  start <- start - (tick_l - 1) / total_length
  v <- c(start, end)
  #fix the size
  p[['x']][['layout']][[paste('yaxis', i, sep='')]]$domain <- v
}

p[['x']][['layout']][['annotations']][[3]][['y']] <- (p[['x']][['layout']][['yaxis']]$domain[2] + p[['x']][['layout']][['yaxis']]$domain[1]) /2
p[['x']][['layout']][['shapes']][[2]][['y0']] <- p[['x']][['layout']][['yaxis']]$domain[1]
p[['x']][['layout']][['shapes']][[2]][['y1']] <- p[['x']][['layout']][['yaxis']]$domain[2]

#fix the annotations
for (i in 3:len + 1) {
  #fix the y position
  p[['x']][['layout']][['annotations']][[i]][['y']] <- (p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]]$domain[1] + p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]]$domain[2]) /2
  #trim the text
  p[['x']][['layout']][['annotations']][[i]][['text']] <- substr(p[['x']][['layout']][['annotations']][[i]][['text']], 1, length(p[['x']][['layout']][[paste('yaxis', i - 2, sep='')]][['ticktext']]) * 3 - 3)
}

#fix the rectangle shapes in the background
for (i in seq(0,(len - 2) * 2, 2)) {
  p[['x']][['layout']][['shapes']][[i+4]][['y0']] <- p[['x']][['layout']][[paste('yaxis', i /2 + 2, sep='')]]$domain[1]
  p[['x']][['layout']][['shapes']][[i+4]][['y1']] <- p[['x']][['layout']][[paste('yaxis', i /2 + 2, sep='')]]$domain[2]
}
p

But the heatmap is still not correct: enter image description here


Solution

  • So first things first:

    In your case I am not even sure whether a plotly heatmap is what you need. In addition you should never convert a complicated ggplot to plotly. It will fail! In 90% of cases. Try recreating your plot in plotly or whereever you want it to end up. Anything else ends up in coding hell.

    I started by doing some research:

    1. Here is a good description how to create heatmaps with different colors in plotly
    2. This explains how you can create titles in subplots.

    From post 1 I know that I have to create a matrix for each level in your data. So I wrote a function for that:

    mymat<-as.matrix(Gene_states22[,-1:-2])
    
    ### Creates a 1-NA dummy matrix for each level. The output is stored in a list
    dummy_mat<-function(mat,levels,names_col){
      mat_list<-lapply(levels,function(x){
                                mat[mat!=x]=NA
                                mat[mat==x]=1
                                mymat=t(apply(mat,2,as.numeric))
                                colnames(mymat)=names_col
                                return(mymat)
                                })
      names(mat_list)=levels
      return(mat_list)
    }
    my_mat_list<-dummy_mat(mymat,c('DF','low','normal','over'),Gene_states22$gene_symbol)
    
    ### Optional: The heatmap type is peculiar - I created a text-NA matrix for each category as well
    text_mat<-function(mat,levels,names_col){
      mat_list<-lapply(levels,function(x){
                                mat[mat!=x]=NA
                                mat=t(mat)
                                colnames(mat)=names_col
                                return(mat)
                                })
      names(mat_list)=levels
      return(mat_list)
    }
       my_mat_list_t<-text_mat(mymat,c('DF','low','normal','over'),as.character(Gene_states22$gene_symbol))
    

    In addition I needed colors for each level. These colors are created using some dataframe. You may write a similar (lapply-)loop here as well:

    DF_Color <- data.frame(x = c(0,1), y = c("#DEDEDE", "#DEDEDE"))
    colnames(DF_Color) <- NULL
    
    lowColor <- data.frame(x = c(0,1), y = c("#00CCFF", "#00CCFF"))
    colnames(lowColor) <- NULL
    
    normColor <- data.frame(x = c(0,1), y = c("#DEDE00", "#DEDE00"))
    colnames(normColor) <- NULL
    
    overColor <- data.frame(x = c(0,1), y = c("#DE3333", "#DE3333"))
    colnames(overColor) <- NULL
    

    In addition we need the columns in the matrix for each panel-category:

    mycols<-lapply(levels(Gene_states22$panel),function(x) grep(x,Gene_states22$panel))
    

    I stored this in a list as well. Next I use lapply-loop to plot. I store the values in a list and use subplot to put everything together:

    library(plotly)
    
    p_list<-lapply(1:length(mycols),function(j){
      columns<-mycols[[j]]
    
    p<-plot_ly(
        type = "heatmap"
    ) %>% add_trace(
        y=rownames(my_mat_list$DF),x=colnames(my_mat_list$DF)[columns],
        z = my_mat_list$DF[,columns],
        xgap=3,ygap=3, text=my_mat_list_t$DF[,columns],hoverinfo="x+y+text",
        colorscale = DF_Color,
        colorbar = list(
            len = 0.3,
            y = 0.3,
            yanchor = 'top',
            title = 'DF series',
            tickvals = ''
        )
    ) %>% add_trace(
      y=rownames(my_mat_list$low),x=colnames(my_mat_list$low)[columns],
        z = my_mat_list$low[,columns],
        xgap=3,ygap=3,text=my_mat_list_t$low[,columns],hoverinfo="x+y+text",
        colorscale = lowColor,
        colorbar = list(
            len = 0.3,
            y = 0.3,
            yanchor = 'top',
            title = 'low series',
            tickvals = ''
        )
    ) %>% add_trace(
      y=rownames(my_mat_list$normal),x=colnames(my_mat_list$normal)[columns],
        z = my_mat_list$normal[,columns],
        xgap=3,ygap=3,text=my_mat_list_t$normal[,columns],hoverinfo="x+y+text",
        colorscale = normColor,
        colorbar = list(
            len = 0.3,
            y = 1,
            yanchor = 'top',
            title = 'normal series',
            tickvals = ''
        )
    ) %>% add_trace(
      y=rownames(my_mat_list$over),x=colnames(my_mat_list$over)[columns],
        z = my_mat_list$over[,columns],
        xgap=3,ygap=3,text=my_mat_list_t$over[,columns],hoverinfo="x+y+text",
        colorscale = overColor,
        colorbar = list(
            len = 0.3,
            y = 1,
            yanchor = 'top',
            title = 'over series',
            tickvals = ''
        )
     )
    return(p)
    })
    
    subplot(p_list[[1]],p_list[[2]],shareY = TRUE) %>%
      layout(annotations = list(
     list(x = 0.2 , y = 1.05, text = levels(Gene_states22$panel)[1], showarrow = F, xref='paper', yref='paper'),
      list(x = 0.8 , y = 1.05, text = levels(Gene_states22$panel)[2], showarrow = F, xref='paper', yref='paper'))
    )
    

    enter image description here

    POSSIBLE ISSUES:

    1. You have to become create around categories like dfgh which occur only once. If only one column is selected in R, the output is automatically transformed into a (numeric or character) vector-type. Thus maybe add an as.matrix() to all z and text arguments
    2. hover-text doesn't really work. But plotly has a good documentation there. You should be able to figure that out.
    3. You also have to specify the width in the subplot-function. That will be fiddly if you have more than 10 categories.
    4. Interactivity doesn't really work. You can't remove traces. Why? No idea. Do some research if you need it. I guess it is connected with the plot type.
    5. I recommend specifying the extend of the plot(s) in px. That might make the tiles more similar.
    6. Finally you will need some reference for the (subplot) titles and you will need to adjust the margins of your plot. Such that the titles are visible.