Search code examples
rggplot2formataxis-labels

Good K/M/G abbreviation of y scale labels in ggplot2


Question

How can we easily have Kilo/Mega/Giga labels without "bits" or "Bytes" unit symbols?

Example

data.frame(x = LETTERS[1:5], n = c(0, 5000, 10000, 15000, 20000)) %>% 
  ggplot(aes(x, n)) + 
  geom_point() +
  scale_y_continuous(labels = scales::number_bytes_format(units = "si"))

y scale with Kb instead of K

For the y scale I expect the labels 0K, 5K, 10K, 15K, 20K. No Kb!

Bonus question

Is there any available solution to get 0, 1K, 1M, 1G labels? I.e., the most appropriate abbreviation of the values?


Solution

  • Try gdata::humanReadable:

    library(ggplot2)
    library(gdata)
    
    myDat <- data.frame(x = LETTERS[1:5], n = c(0, 5000, 10000, 15000, 20000))
    
    ggplot(myDat, aes(x, n)) + 
      geom_point() +
      scale_y_continuous(breaks = myDat$n, 
                         labels = humanReadable(myDat$n, standard = "Unix", sep = ""))
    

    enter image description here


    Edit:

    We could customise the function:

    humanReadableCustom <- function (x, units = "auto", standard = c("IEC", "SI", "Unix"), 
                                     digits = 1, width = NULL, sep = " ", justify = c("right", 
                                                                                      "left")) 
    {
      #suffix.SI <- c("B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
      # custom
      suffix.SI <- c("", "K", "M", "G", "T", "P", "E", "Z", "Y")
    
      suffix.IEC <- c("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB")
      suffix.Unix <- c("B", "K", "M", "G", "T", "P", "E", "Z", "Y")
      standard <- match.arg(standard)
      if (length(justify) == 1) 
        justify <- c(justify, justify)
      .applyHuman <- function(x, base, suffix, digits, width, 
                              sep) {
        n <- length(suffix)
        i <- pmax(pmin(floor(log(x, base)), n - 1), 0)
        if (!is.finite(i)) 
          i <- 0
        x <- x/base^i
        if (is.null(width)) 
          x <- format(round(x = x, digits = digits), nsmall = digits)
        else {
          lenX <- nchar(x)
          if (lenX > width) {
            digits <- pmax(width - nchar(round(x)) - 1, 
                           0)
          }
          if (i == 0) 
            digits <- 0
          x <- round(x, digits = digits)
        }
        c(x, suffix[i + 1])
      }
      if (any(x < 0)) 
        stop("'x' must be positive")
      if (standard == "SI") {
        suffix <- suffix.SI
        base <- 10^3
      }
      else if (standard == "IEC") {
        suffix <- suffix.IEC
        base <- 2^10
      }
      else {
        suffix <- suffix.Unix
        base <- 2^10
      }
      if (!missing(units) && units == "bytes") {
        retval <- rbind(x, "bytes")
      }
      else if (!missing(units) && units != "auto") {
        units <- suffix[match(toupper(units), toupper(suffix))]
        power <- match(units, suffix) - 1
        X <- x/(base^power)
        X <- format.default(x = X, digits = digits, nsmall = digits)
        retval <- rbind(X, rep(units, length(X)))
      }
      else retval <- sapply(X = x, FUN = ".applyHuman", base = base, 
                            suffix = suffix, digits = digits, width = width, sep = sep)
      if (all(justify == "none")) 
        paste(trim(retval[1, ]), trim(retval[2, ]), sep = sep)
      else paste(format(trim(retval[1, ]), justify = justify[1]), 
                 format(trim(retval[2, ]), justify = justify[2]), sep = sep)
    }
    

    Then plot

    library(ggplot2)
    library(gdata)
    
    myDat <- data.frame(x = LETTERS[1:5], n = c(0, 5000, 10000, 15000, 20000))
    
    ggplot(myDat, aes(x, n)) + 
      geom_point() +
      scale_y_continuous(breaks = myDat$n, 
                         labels = humanReadableCustom(myDat$n,
                                                      standard = "SI", sep = ""))
    

    enter image description here