Search code examples
rheatmapternarybinningggtern

Ternary heatmap in R


I'm trying to come up with a way of plotting a ternary heatmap using R. I think ggtern should be able todo the trick, but I don't know how to do a binning function like stat_bin in vanilla ggplot2. Here's What I have so far:

require(ggplot2)
require(ggtern)
require(MASS) 
require(scales)

palette <- c( "#FF9933", "#002C54", "#3375B2", "#CCDDEC", "#BFBFBF", "#000000")

sig <- matrix(c(1,2,3,4),2,2)
data <- data.frame(mvrnorm(n=10000, rep(2, 2), Sigma))
data$X1 <- data$X1/max(data$X1)
data$X2 <- data$X2/max(data$X2)
data$X1[which(data$X1<0)] <- runif(length(data$X1[which(data$X1<0)]))
data$X2[which(data$X2<0)] <- runif(length(data$X2[which(data$X2<0)]))

##  Print 2d heatmap
ggplot(data, aes(x=X1, y=X2)) + 
    stat_bin2d(bins=50) + 
    scale_fill_gradient2(low=palette[4], mid=palette[3], high=palette[2]) +
    xlab("Percentage x") +
    ylab("Percentage y") +
    scale_y_continuous(labels = percent) +
    scale_x_continuous(labels = percent) +
    theme_bw() + theme(text = element_text(size = 15))

data$X3 <- with(data, 1-X1-X2)
data <- data[data$X3 >= 0,]

## Print ternary heatmap
ggtern(data, aes(x=X1, y=X2, z=X3)) +
    geom_point(color="black",size=1,shape=16) + theme_bw()

The first call to ggplot produces a nice 2d heatmap: enter image description here

The second plot plots the points in the ternary coordinate system. enter image description here I need something like stat_bin2d to get the count of points in each triangle. Ideally I want to set the size of the triangles like I do in 2d by setting the bins variable of stat_bin2d.


Solution

  • OK, so after playing around with this for a while I figured out a way to do this. Would love to hear from you if there's a smart way to do this.

    The code is below, here's the plot I produced with it: enter image description here

    require(ggplot2)
    require(ggtern)
    require(MASS) 
    require(scales)
    require(plyr)
    
    palette <- c( "#FF9933", "#002C54", "#3375B2", "#CCDDEC", "#BFBFBF", "#000000")
    
    # Example data
    # sig <- matrix(c(3,0,0,2),2,2)
    # data <- data.frame(mvrnorm(n=10000, rep(2, 2), sig))
    # data$X1 <- data$X1/max(data$X1)
    # data$X2 <- data$X2/max(data$X2)
    # data$X1[which(data$X1<0)] <- runif(length(data$X1[which(data$X1<0)]))
    # data$X2[which(data$X2<0)] <- runif(length(data$X2[which(data$X2<0)]))
    
    # Print 2d heatmap
    heatmap2d <- function(data) {
    p <- ggplot(data, aes(x=X1, y=X2)) + 
        stat_bin2d(bins=50) + 
        scale_fill_gradient2(low=palette[4], mid=palette[3], high=palette[2]) +
        xlab("Percentage x") +
        ylab("Percentage y") +
        scale_y_continuous(labels = percent) +
        scale_x_continuous(labels = percent) +
        theme_bw() + theme(text = element_text(size = 15))
    print(p)
    }
    
    # Example data
    # data$X3 <- with(data, 1-X1-X2)
    # data <- data[data$X3 >= 0,]
    
    # Auxiliary function for heatmap3d
    count_bin <- function(data, minT, maxT, minR, maxR, minL, maxL) {
        ret <- data
        ret <- with(ret, ret[minT <= X1 & X1 < maxT,])
        ret <- with(ret, ret[minL <= X2 & X2 < maxL,])
        ret <- with(ret, ret[minR <= X3 & X3 < maxR,])
        if(is.na(nrow(ret))) {
            ret <- 0
        } else {
            ret <- nrow(ret)
        }
        ret
    }
    
    # Plot 3dimensional histogram in a triangle
    # See dataframe data for example of the input dataformat
    heatmap3d <- function(data, inc, logscale=FALSE, text=FALSE, plot_corner=TRUE) {
    #   When plot_corner is FALSE, corner_cutoff determines where to stop plotting
        corner_cutoff = 1
    #   When plot_corner is FALSE, corner_number toggles display of obervations in the corners
    #   This only has an effect when text==FALSE
        corner_numbers = TRUE
    
        count <- 1
        points <- data.frame()
        for (z in seq(0,1,inc)) {
            x <- 1- z
            y <- 0
            while (x>0) {
                points <- rbind(points, c(count, x, y, z))
                x <- round(x - inc, digits=2)
                y <- round(y + inc, digits=2)
                count <- count + 1
            }
            points <- rbind(points, c(count, x, y, z))
            count <- count + 1
        }
        colnames(points) = c("IDPoint","T","L","R")
    
    #   base <- ggtern(data=points,aes(L,T,R)) +
    #               theme_bw() + theme_hidetitles() + theme_hidearrows() +
    #               geom_point(shape=21,size=10,color="blue",fill="white") +
    #               geom_text(aes(label=IDPoint),color="blue")
    #   print(base)
    
        polygons <- data.frame()
        c <- 1
    #   Normal triangles
        for (p in points$IDPoint) {
            if (is.element(p, points$IDPoint[points$T==0])) {
                next
            } else {
                pL <- points$L[points$IDPoint==p]
                pT <- points$T[points$IDPoint==p]
                pR <- points$R[points$IDPoint==p]
                polygons <- rbind(polygons, 
                            c(c,p),
                            c(c,points$IDPoint[abs(points$L-pL) < inc/2 & abs(points$R-pR-inc) < inc/2]),
                            c(c,points$IDPoint[abs(points$L-pL-inc) < inc/2 & abs(points$R-pR) < inc/2]))    
                c <- c + 1
            }
        }
    
    # Upside down triangles
        for (p in points$IDPoint) {
            if (!is.element(p, points$IDPoint[points$T==0])) {
                if (!is.element(p, points$IDPoint[points$L==0])) {
                    pL <- points$L[points$IDPoint==p]
                    pT <- points$T[points$IDPoint==p]
                    pR <- points$R[points$IDPoint==p]
                    polygons <- rbind(polygons, 
                                c(c,p),
                                c(c,points$IDPoint[abs(points$T-pT) < inc/2 & abs(points$R-pR-inc) < inc/2]),
                                c(c,points$IDPoint[abs(points$L-pL) < inc/2 & abs(points$R-pR-inc) < inc/2])) 
                    c <- c + 1
                }
            }
        }
    
    #   IMPORTANT FOR CORRECT ORDERING.
        polygons$PointOrder <- 1:nrow(polygons)
        colnames(polygons) = c("IDLabel","IDPoint","PointOrder")
    
        df.tr <- merge(polygons,points)
    
        Labs = ddply(df.tr,"IDLabel",function(x){c(c(mean(x$T),mean(x$L),mean(x$R)))})
        colnames(Labs) = c("Label","T","L","R")
    
    #   triangles <- ggtern(data=df.tr,aes(L,T,R)) +
    #                   geom_polygon(aes(group=IDLabel),color="black",alpha=0.25) +
    #                   geom_text(data=Labs,aes(label=Label),size=4,color="black") +
    #                   theme_bw()
    #        print(triangles)
    
        bins <- ddply(df.tr, .(IDLabel), summarize, 
                    maxT=max(T),
                    maxL=max(L),
                    maxR=max(R),
                    minT=min(T),
                    minL=min(L),
                    minR=min(R))
    
        count <- ddply(bins, .(IDLabel), summarize, N=count_bin(data, minT, maxT, minR, maxR, minL, maxL))
        df <- join(df.tr, count, by="IDLabel")
    
        Labs = ddply(df,.(IDLabel,N),function(x){c(c(mean(x$T),mean(x$L),mean(x$R)))})
        colnames(Labs) = c("Label","N","T","L","R")
    
        if (plot_corner==FALSE){
            corner <- ddply(df, .(IDPoint, IDLabel), summarize, maxperc=max(T,L,R))
            corner <- corner$IDLabel[corner$maxperc>=corner_cutoff]
    
            df$N[is.element(df$IDLabel, corner)] <- 0
            if (text==FALSE & corner_numbers==TRUE) {
                Labs$N[!is.element(Labs$Label, corner)] <- ""
                text=TRUE
            }
        }    
    
        heat <- ggtern(data=df,aes(L,T,R)) +
            geom_polygon(aes(fill=N,group=IDLabel),color="black",alpha=1)
        if (logscale == TRUE) {
                heat <- heat + scale_fill_gradient(name="Observations", trans = "log",
                                low=palette[2], high=palette[4])
        } else {
                heat <- heat + scale_fill_gradient(name="Observations", 
                                low=palette[2], high=palette[4])
        }
        heat <- heat +
            Tlab("x") +
            Rlab("y") +
            Llab("z") +
            theme_bw() + 
            theme(axis.tern.arrowsep=unit(0.02,"npc"), #0.01npc away from ticks ticklength
                        axis.tern.arrowstart=0.25,axis.tern.arrowfinish=0.75,
                        axis.tern.text=element_text(size=12),
                        axis.tern.arrow.text.T=element_text(vjust=-1),
                        axis.tern.arrow.text.R=element_text(vjust=2),
                        axis.tern.arrow.text.L=element_text(vjust=-1),
                        axis.tern.arrow.text=element_text(size=12),
                        axis.tern.title=element_text(size=15))
        if (text==FALSE) {
            print(heat)
        } else {
            print(heat + geom_text(data=Labs,aes(label=N),size=3,color="white"))
        }
    }
    
    # Usage examples
    # heatmap3d(data, 0.2, text=TRUE)
    # heatmap3d(data, 0.05)
    # heatmap3d(data, 0.1, text=FALSE, logscale=TRUE)
    # heatmap3d(data, 0.1, text=TRUE, logscale=FALSE, plot_corner=FALSE)
    # heatmap3d(data, 0.1, text=FALSE, logscale=FALSE, plot_corner=FALSE)