Search code examples
rrowsdimensions

Calculate the optimal grid layout dimensions for a given amount of plots in R


I have 12 plots in ggplot, and I am arranging them with grid.arrange. I manually set the number of rows in the grid to 4, the number of columns to 3. Since 3 x 4 = 12, this works like a charm.

But what if I have an arbitrary number of plots? Say 13... How would I programmatically find the number of rows and columns to use that makes the entire plot the most "square-like" shape?

I'd like to do this in R.

Update Link to data: http://github.com/ngfrey/DataGenii/blob/master/exampleMedicalData.csv

Here is the code I was working on this morning. Hopefully it will provide a more illustrative example. Note how I set the number of rows and columns in the return(list(plots=plots, numrow=4, numcol=3)) part of the function:

makePlots<- function(fdf){
idx<- which(sapply(fdf, is.numeric))
idx<- data.frame(idx)
names(idx)<- "idx"
idx$names<- rownames(idx)
plots<- list()

for(i in 2:length(idx$idx)) {
  varname<- idx$names[i]
  mydata<- fdf[, idx$names[i]]
  mydata<- data.frame(mydata)
  names(mydata)<- varname
  g<- ggplot(data=mydata, aes_string(x=varname) )
  g<- g + geom_histogram(aes(y=..density..), color="black", fill='skyblue')+ geom_density() + xlab(paste(varname))
  print(g)


  plots<- c(plots, list(g))
}

return(list(plots=plots, numrow=4, numcol=3 ))
}
res<- makePlots(fdf)
do.call(grid.arrange, c(res$plots, nrow=res$numrow, ncol=res$numcol))

Solution

  • Here's how I got this bad boy to work: (I could still tighten up the axis labels, and probably compress the first 2 if statements in the makePlots() function so it would run faster, but I'll tackle that at a later date/post)

    library(gmp)
    library(ggplot2)
    library(gridExtra)
    
    ############
    factors <- function(n)
    {
       if(length(n) > 1) 
       {
          lapply(as.list(n), factors)
       } else
       {
          one.to.n <- seq_len(n)
          one.to.n[(n %% one.to.n) == 0]
       }
    }
    
    
    ###########
    makePlots<- function(fdf){
    idx<- which(sapply(fdf, is.numeric))
    idx<- data.frame(idx)
    names(idx)<- "idx"
    idx$names<- rownames(idx)
    plots<- list()
    
    for(i in 2:length(idx$idx)) {
      varname<- idx$names[i]
      mydata<- fdf[, idx$names[i]]
      mydata<- data.frame(mydata)
      names(mydata)<- varname
      g<- ggplot(data=mydata, aes_string(x=varname) )
      g<- g + geom_histogram(aes(y=..density..), color="black", fill='skyblue')+ geom_density() + xlab(paste(varname))
      print(g)
    
    
      plots<- c(plots, list(g))
    }
    
    numplots<- 0
    #Note: The reason I put in length(idx$idx)-1 is because the first column is the row indicies, which are usually numeric ;)
    #isprime returns 0 for non-prime numbers, 2 for prime numbers
    if(length(idx$idx) == 2){
      numplots<- length(idx$idx)
      ncolx<- 1
      nrowx<- 2
    } else if(length(idx$idx)==3){
      numplots<- length(idx$idx)
      ncolx<- 1
      nrowx<- 3
    } else if(isprime((length(idx$idx)-1)) !=0){ 
      numplots<- length(idx$idx)
      facts<- factors(numplots)
      ncolx<- facts[length(facts)/2]
      nrowx<- facts[(length(facts)/2) + 1]
    
    } else{numplots<- (length(idx$idx)-1)
      facts<- factors(numplots)
      ncolx<- facts[length(facts)/2]
      nrowx<- facts[(length(facts)/2) + 1]}
    
    if(abs(nrowx-ncolx)>2){
      ncolx<- ncolx+1
      nrowx<- ceiling(numplots/ncolx)
    }
    
    
    return(list(plots=plots, numrow=nrowx, numcol=ncolx ))
    }
    res<- makePlots(fdf)
    do.call(grid.arrange, c(res$plots, nrow=res$numrow, ncol=res$numcol))