Search code examples
rggplot2ggally

Is it possible to split correlation box to show correlation values for two different treatments in pairplot?


With the code below, I have created a scatterplot matrix. The below code just creates a correlation matrix for all of the data, regardless of treatment. However, a column in my data is "Si". I would like to make two different matrices (one for each treatment type) by dividing the box into two for better comparison, just like I did with lower function (Si levels, 0mM, 4mM).

library(GGally) 
leg <- grab_legend(ggplot(data=data1, aes(x=NA, y=NA, colour=Si)) +
                     geom_line(size=2))

my_fn <- function(data1, mapping, method="p", use="pairwise", ...){
  
  # grab data
  x <- eval_data_col(data1, mapping$x)
  y <- eval_data_col(data1, mapping$y)
  
  # calculate correlation
  corr <- cor(x, y, method=method, use=use)
  
  # calculate colour based on correlation value
  # Here I have set a correlation of minus one to blue, 
  # zero to white, and one to red 
  # Change this to suit: possibly extend to add as an argument of `my_fn`
  colFn <- colorRampPalette(c("blue", "white", "red"), interpolate ='spline')
  fill <- colFn(100)[findInterval(corr, seq(-1, 1, length=100))]
  
  ggally_cor(data=data1, size=5, digits=2, stars=TRUE, mapping=mapping, ...) + 
    theme_void() +
    theme(panel.background=element_rect(fill=fill))
}

lowerFn <- function(data1, mapping, emap=NULL, method = "lm", ...) {
  # mapping <- c(mapping, emap)
  # class(mapping) = "uneval" # need this to combine the two aes
  # Can use this instead
  mapping <- ggplot2:::new_aes( c(mapping, emap))
  p <- ggplot(data = data1, mapping = mapping) +
    geom_point(data = data1, alpha = 0.8, size = 3, shape = 16) +
    geom_smooth(method = method, ...) +
    theme_gray() # to get the white background and prominent axis
  p
}

ggpairs(
  data1, columns=4:6, legend=leg,
  upper = list(continuous=my_fn),
  lower = list(continuous = 
                 wrap(lowerFn, 
                      method = "lm", # To make lm bold, use size = 1.3
                      emap=aes(color=Si),
                      fullrange=TRUE, 
                      se=FALSE))) +
  theme(legend.position='top')

enter image description here

here is the data link; https://docs.google.com/spreadsheets/d/1O5haLrVNsLx4_Sn-mr7lUaON4MnwLegpeg2OieODt8I/edit?usp=sharing


Solution

  • Below a quick function to get you started. This just looks at how to divide up the upper triangle panel, where geom_rect is used, compared to the much easier panel.background that was used for a single value. Comments in the code indicate where the coordinates for the text and rectangles were calculated.

    library(GGally)
    library(ggplot2)
    
    my_fn <- function(data, mapping, method="p", use="pairwise", ndp=2, ...){
      
        # grab data
        x <- eval_data_col(data, mapping$x)
        y <- eval_data_col(data, mapping$y)
        col <- eval_data_col(data, mapping$colour)
    
        # calculate correlation
        colFn <- colorRampPalette(c("blue", "white", "red"), interpolate ='spline')
    
        if(is.null(col)) {
            corr <- cor(x, y, method=method, use=use)
            fill <- colFn(100)[findInterval(corr, seq(-1, 1, length=100))]
            p <- ggally_cor(data=data, size=5, digits=2, stars=TRUE, mapping=mapping, ...) +
                    theme_void() +
                    theme(panel.background=element_rect(fill=fill))
            }
        
        # getting cor values by group which we will use to colour
        if(!is.null(col)) {
            idx <- split(seq_len(nrow(data)), col)
            corr <- unlist(lapply(idx, function(i) cor(x[i], y[i], method=method, use=use)))
            lvs <- if(is.character(col)) sort(unique(col)) else levels(col)
            fill <- colFn(100)[findInterval(corr, seq(-1, 1, length=100))]
            cuts <- seq(min(y, na.rm=TRUE), max(y, na.rm=TRUE), length=length(idx)+1L)
            pos <- (head(cuts, -1) + tail(cuts, -1))/2 # for labels
            cuts[1] <- -Inf; cuts[length(idx)+1L] <- Inf # for rects
            rects <- data.frame(from=head(cuts, -1), to=tail(cuts, -1), fill=fill)
        
            p <- ggplot(data=data, mapping=mapping, ...) + 
                    geom_blank() + 
                    theme_void() + 
                    geom_rect(data=rects, aes(xmin=-Inf, xmax=Inf, ymin=from, ymax=to), fill=fill, inherit.aes = FALSE) +
                    annotate("text", x=mean(x), y=pos, label=paste(lvs, ": ", round(corr, ndp)))
            
            }
    
            return(p)
    }
    
    ggpairs(iris, columns=1:4,   mapping=aes(colour=Species), upper = list(continuous=my_fn))
    

    Which produces

    enter image description here