Search code examples
rggplot2facet-gridggpubr

workaround for ggplot2::facet_grid 'big' data bug?


More like "medium data", because a mere 16k rows is enough to reproduce this.

Working:

library(ggplot2)
outputdir = "/tmp"
mtdf = NULL
for(i in 1:100){
  mti = mtcars[,c("disp","hp","wt","gear","carb","mpg","qsec","vs","am")]
  mti$disp = mti$disp + 10*rnorm(length(mti$disp))
  mti$hp = mti$hp + 10*rnorm(length(mti$hp))
  mti$wt = mti$wt + 10*rnorm(length(mti$wt))
  mtdf = rbind(mtdf,mti)
}
p = ggplot(mtdf,aes(disp,hp,color=wt)) + 
      geom_point()+
      facet_nested(gear ~ am,scales="free")
ggsave(plot=p,file=file.path(outputdir,"facet_scatter.pdf"),width=10,height=10)

enter image description here

Not Working (various alternative approaches):

Here is the output of sessionInfo() showing the package versions that are loaded. This MWE was run on linux.

library(ggplot2)
library(ggpubr)
library(ggh4x)

outputdir = "/tmp"

mtdf = NULL
for(i in 1:500){
  mti = mtcars[,c("disp","hp","wt","gear","carb","mpg","qsec","vs","am")]
  mti$disp = mti$disp + 10*rnorm(length(mti$disp))
  mti$hp = mti$hp + 10*rnorm(length(mti$hp))
  mti$wt = mti$wt + 10*rnorm(length(mti$wt))
  mtdf = rbind(mtdf,mti)
}
p = ggplot(mtdf,aes(disp,hp,color=wt)) + 
      geom_point()+
      facet_nested(gear ~ am,scales="free")
ggsave(plot=p,file=file.path(outputdir,"facet_scatter.png"),width=10,height=10)

p = ggplot(mtdf,aes(disp,hp,color=wt)) + 
      geom_point()+
      facet_grid(gear ~ am,scales="free")
ggsave(plot=p,file=file.path(outputdir,"facet_scatter.png"),width=10,height=10)

p = ggscatter(mtdf,x="disp",y="hp",color="wt")
p = facet(p,facet.by=c("gear","am"))
ggsave(plot=p,file=file.path(outputdir,"facet_scatter.png"),width=10,height=10)

All three attempts generate the following error:

R version 4.1.1 (2021-08-10) -- "Kick Things"
Copyright (C) 2021 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

  Natural language support but running in an English locale

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

source("/mnt/md0/insshare/mkarikom/Active_Project_Backup/DURIAN/DURIAN/slurm/test.R", encoding = "UTF-8")
Error: `scale_id` must not be `NA`
Run `rlang::last_error()` to see where the error occurred.
> sessionInfo()
R version 4.1.1 (2021-08-10)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.3 LTS

Matrix products: default
BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/libmkl_rt.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] ggh4x_0.2.1   ggpubr_0.4.0  ggplot2_3.3.5

loaded via a namespace (and not attached):
 [1] pillar_1.6.4     compiler_4.1.1   tools_4.1.1      jsonlite_1.7.2  
 [5] lifecycle_1.0.1  tibble_3.1.6     gtable_0.3.0     pkgconfig_2.0.3 
 [9] rlang_0.4.12     DBI_1.1.2        withr_2.4.3      dplyr_1.0.7     
[13] generics_0.1.1   vctrs_0.3.8      grid_4.1.1       tidyselect_1.1.1
[17] glue_1.6.0       R6_2.5.1         rstatix_0.7.0    fansi_1.0.0     
[21] carData_3.0-5    purrr_0.3.4      tidyr_1.1.4      farver_2.1.0    
[25] car_3.0-12       magrittr_2.0.1   scales_1.1.1     backports_1.4.1 
[29] ellipsis_0.3.2   assertthat_0.2.1 abind_1.4-5      colorspace_2.0-2
[33] ggsignif_0.6.3   utf8_1.2.2       munsell_0.5.0    broom_0.7.11    
[37] crayon_1.4.2

rang::last_error()

As suggested in the error message:

> rlang::last_error()
<error/rlang_error>
`scale_id` must not be `NA`
Backtrace:
  1. ggplot2::ggsave(...)
  3. ggplot2:::grid.draw.ggplot(plot)
  5. ggplot2:::print.ggplot(x)
  7. ggplot2:::ggplot_build.ggplot(x)
  8. layout$train_position(data, scale_x(), scale_y())
  9. ggplot2:::f(..., self = self)
 10. self$facet$train_scales(...)
 11. ggplot2:::f(...)
 12. ggplot2:::scale_apply(layer_data, x_vars, "train", SCALE_X, x_scales)
Run `rlang::last_trace()` to see the full context.

rang::last_trace()

As suggested in the output of slang::last_error()

> rlang::last_trace()
<error/rlang_error>
`scale_id` must not be `NA`
Backtrace:
     █
  1. └─ggplot2::ggsave(...)
  2.   ├─grid::grid.draw(plot)
  3.   └─ggplot2:::grid.draw.ggplot(plot)
  4.     ├─base::print(x)
  5.     └─ggplot2:::print.ggplot(x)
  6.       ├─ggplot2::ggplot_build(x)
  7.       └─ggplot2:::ggplot_build.ggplot(x)
  8.         └─layout$train_position(data, scale_x(), scale_y())
  9.           └─ggplot2:::f(..., self = self)
 10.             └─self$facet$train_scales(...)
 11.               └─ggplot2:::f(...)
 12.                 └─ggplot2:::scale_apply(layer_data, x_vars, "train", SCALE_X, x_scales)

Update:

It's possible that this is due to the scales 1.1.1 package


Solution

  • The problem is related to libmkl.

    I can reproduce the failure mentioned above and avoid it by switch from MKL to openblas ( update-alternatives --config libblas.so.3-x86_64-linux-gnu ).

    Simple code to reproduce the problem

        library(ggplot2)
        sessionInfo()
        f <- function(n) {
            df <- data.frame(v = c("V1","V2", "V1", "V2"),
                     l = seq(1, 4*n), t = seq(4*n, 1), 
                     p = c("P_1", "P2", "P3", "P_1"))
    
            ggplot(data = df, aes(l, t)) + 
                geom_point() + 
                facet_grid(rows = vars(p), cols = vars(v))
        }
    
        f(100)
        f(10000)
    

    Runs well with openblas and fails, when using MKL.

    > library(ggplot2)
    > sessionInfo()
    R version 4.3.1 (2023-06-16)
    Platform: x86_64-pc-linux-gnu (64-bit)
    Running under: Debian GNU/Linux 12 (bookworm)
    
    Matrix products: default
    BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/libmkl_rt.so;  LAPACK version 3.8.0
    
    locale:
     [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
     [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
     [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
     [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
     [9] LC_ADDRESS=C               LC_TELEPHONE=C            
    [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
    
    time zone: Europe/Berlin
    tzcode source: system (glibc)
    
    attached base packages:
    [1] stats     graphics  grDevices utils     datasets  methods   base     
    
    other attached packages:
    [1] ggplot2_3.4.3
    
    loaded via a namespace (and not attached):
     [1] utf8_1.2.3       R6_2.5.1         tidyselect_1.2.0 magrittr_2.0.3  
     [5] gtable_0.3.4     glue_1.6.2       tibble_3.2.1     pkgconfig_2.0.3 
     [9] generics_0.1.3   dplyr_1.1.3      lifecycle_1.0.3  cli_3.6.1       
    [13] fansi_1.0.4      scales_1.2.1     grid_4.3.1       vctrs_0.6.3     
    [17] withr_2.5.0      compiler_4.3.1   munsell_0.5.0    pillar_1.9.0    
    [21] colorspace_2.1-0 rlang_1.1.1     
    > 
    > f <- function(n) 
    + {
    +   df <- data.frame(v = c("V1","V2", "V1", "V2"),
    +                  l = seq(1, 4*n),
    +                  t = seq(4*n, 1), 
    +                  p = c("P_1", "P2", "P3", "P_1"))
    +   ggplot(data = df, aes(l, t)) + 
    +     geom_point() + 
    +     facet_grid(rows = vars(p), cols = vars(v))
    + }
    > 
    > f(100)
    > f(10000)
    Error in `scale_apply()`:
    ! `scale_id` must not contain any "NA"
    Backtrace:
         ▆
      1. ├─base (local) `<fn>`(x)
      2. └─ggplot2:::print.ggplot(x)
      3.   ├─ggplot2::ggplot_build(x)
      4.   └─ggplot2:::ggplot_build.ggplot(x)
      5.     └─layout$train_position(data, scale_x(), scale_y())
      6.       └─ggplot2 (local) train_position(..., self = self)
      7.         └─self$facet$train_scales(...)
      8.           └─ggplot2 (local) train_scales(...)
      9.             └─ggplot2:::scale_apply(layer_data, x_vars, "train", SCALE_X, x_scales)
     10.               └─cli::cli_abort("{.arg scale_id} must not contain any {.val NA}")
     11.                 └─rlang::abort(...)
    Execution halted