Search code examples
rcorrespondence-analysis

Correspondence analysis plot in R- adjust text color


I am trying to change the color of the text/labels inside the plot (not x or y axis). This is my example code

observed <- matrix(c(  53, 15, 16, 45, 21, 5, 46, 11, 54, 40, 87, 43, 4, 19, 22, 16,
  19, 1, 3, 20, 7, 3, 6, 5, 24, 12, 29, 13, 3, 3, 4, 5,
  21, 1, 5, 26, 7, 4, 16, 5, 22, 19, 23, 21, 2, 13, 16, 18,
  153, 15, 45, 150, 46, 16, 107, 49, 180, 174, 212, 112, 24, 44, 47, 77,
  3, 1, 3, 11, 2, 0, 4, 1, 3, 9, 5, 8, 1, 3, 0, 4,
  4, 1, 5, 16, 4, 0, 3, 3, 16, 9, 9, 5, 1, 1, 2, 10,
  72, 10, 12, 67, 25, 6, 69, 24, 90, 79, 88, 59, 5, 30, 25, 23,
  25, 4, 8, 14, 17, 1, 23, 4, 29, 26, 31, 7, 4, 12, 18, 10,
  68, 6, 16, 43, 31, 2, 51, 26, 66, 54, 70, 17, 12, 29, 51, 20,
  5, 0, 2, 6, 5, 0, 6, 0, 9, 9, 9, 5, 1, 2, 2, 2,
  98, 10, 36, 113, 38, 15, 42, 38, 99, 89, 162, 75, 15, 28, 44, 37
), nrow=11, byrow=TRUE)

rownames(observed) <- c("apple", "banana", "grape", "olive",
                         "strawberry", "watermelon", "apricot", "plum", "orange",
                         "kiwi", "pomegranate")

colnames(observed) <- c("sandwich", "water", "bread",
                         "lettuce", "potato", "tomato",
                         "onion", "garlic", "pepper",
                         "broccoli", "mushroom", "jalapeno",
                         "prune", "avokado", "melon",
                         "lemon")



library(ca)
ca_results <- ca(observed)
plot(ca_results)

Anyone can help me to change the color? I want to put yellow color for lemon, red for strawberry and so on.

I tried to do it with text labels but keep getting NULL when trying to get the coordinates.


Solution

  • I modified plot.ca a little to achieve your goal. The new function is called "pasta" and yes, pepper is green. Omit the color vectors row.colors & col.colors in the function call to default to red and blue just like before. Row (fruit) is still cirlce and col (vegetable) is triangle.

    out

    observed <- matrix(c(  53, 15, 16, 45, 21, 5, 46, 11, 54, 40, 87, 43, 4, 19, 22, 16,
                           19, 1, 3, 20, 7, 3, 6, 5, 24, 12, 29, 13, 3, 3, 4, 5,
                           21, 1, 5, 26, 7, 4, 16, 5, 22, 19, 23, 21, 2, 13, 16, 18,
                           153, 15, 45, 150, 46, 16, 107, 49, 180, 174, 212, 112, 24, 44, 47, 77,
                           3, 1, 3, 11, 2, 0, 4, 1, 3, 9, 5, 8, 1, 3, 0, 4,
                           4, 1, 5, 16, 4, 0, 3, 3, 16, 9, 9, 5, 1, 1, 2, 10,
                           72, 10, 12, 67, 25, 6, 69, 24, 90, 79, 88, 59, 5, 30, 25, 23,
                           25, 4, 8, 14, 17, 1, 23, 4, 29, 26, 31, 7, 4, 12, 18, 10,
                           68, 6, 16, 43, 31, 2, 51, 26, 66, 54, 70, 17, 12, 29, 51, 20,
                           5, 0, 2, 6, 5, 0, 6, 0, 9, 9, 9, 5, 1, 2, 2, 2,
                           98, 10, 36, 113, 38, 15, 42, 38, 99, 89, 162, 75, 15, 28, 44, 37
    ), nrow=11, byrow=TRUE)
    
    rownames(observed) <- c("apple", "banana", "grape", "olive",
                            "strawberry", "watermelon", "apricot", "plum", "orange",
                            "kiwi", "pomegranate")
    
    colnames(observed) <- c("sandwich", "water", "bread",
                            "lettuce", "potato", "tomato",
                            "onion", "garlic", "pepper",
                            "broccoli", "mushroom", "jalapeno",
                            "prune", "avokado", "melon",
                            "lemon")
    
    #install.packages("ca")
    library(ca)
    
    # Perform CA
    ca_results <- ca(observed)
    
    # Create a color mapping for fruits and vegetables
    fruit_colors <- c(
      apple = "red3",
      banana = "yellow3",
      grape = "purple",
      olive = "olivedrab",
      strawberry = "red",
      watermelon = "pink",
      apricot = "orange",
      plum = "purple4",
      orange = "orange2",
      kiwi = "green4",
      pomegranate = "red4"
    )
    
    veg_colors <- c(
      sandwich = "brown",
      water = "blue",
      bread = "tan",
      lettuce = "green3",
      potato = "tan4",
      tomato = "tomato",
      onion = "pink2",
      garlic = "gray90",
      pepper = "green",
      broccoli = "green4",
      mushroom = "tan3",
      jalapeno = "darkgreen",
      prune = "purple3",
      avokado = "darkgreen",
      melon = "lightgreen",
      lemon = "yellow2"
    )
    
    
    pasta <- function (x, dim = c(1, 2), map = "symmetric", what = c("all", "all"), 
                    mass = c(FALSE, FALSE), contrib = c("none", "none"), 
                    # Modified color parameters
                    row.colors = NULL, col.colors = NULL,
                    default.row.color = "blue", default.col.color = "red",
                    pch = c(16, 21, 17, 24), labels = c(2, 2), arrows = c(FALSE, FALSE), 
                    lines = c(FALSE, FALSE), lwd = 1, xlab = "_auto_", ylab = "_auto_", 
                     ...) 
    {
      
      obj <- x
      if (length(what) != 2) {
        what <- rep(what, length = 2)
      }
      if (length(mass) != 2) {
        mass <- rep(mass, length = 2)
      }
      if (length(contrib) != 2) {
        contrib <- rep(contrib, length = 2)
      }
      if (length(labels) != 2) {
        labels <- rep(labels, length = 2)
      }
      if (length(pch) != 4) {
        pch <- rep(pch, length = 4)
      }
      if (length(lines) != 2) {
        lines <- rep(lines, length = 2)
      }
      
      # Initialize color vectors if not provided
      if (is.null(row.colors)) {
        row.colors <- rep(default.row.color, dim(obj$rowcoord)[1])
      }
      if (is.null(col.colors)) {
        col.colors <- rep(default.col.color, dim(obj$colcoord)[1])
      }
      
      if (!is.numeric(x$suprow)) {
        if (map == "colgab" | map == "colgreen") {
          if (what[1] != "none") 
            what[1] <- "active"
        }
      }
      if (!is.numeric(x$supcol)) {
        if (map == "rowgab" | map == "rowgreen") {
          if (what[2] != "none") 
            what[2] <- "active"
        }
      }
      if (min(dim) < 0) {
        swisign <- ifelse(dim < 0, -1, 1)
        dim.c <- dim(obj$rowcoord)[2]
        signmat <- diag(rep(swisign, length = dim.c))
        obj$rowcoord <- obj$rowcoord %*% signmat
        obj$colcoord <- obj$colcoord %*% signmat
        dim <- abs(dim)
      }
      K <- dim(obj$rowcoord)[2]
      I <- dim(obj$rowcoord)[1]
      J <- dim(obj$colcoord)[1]
      svF <- matrix(rep(obj$sv[1:K], I), I, K, byrow = TRUE)
      svG <- matrix(rep(obj$sv[1:K], J), J, K, byrow = TRUE)
      rpc <- obj$rowcoord * svF
      cpc <- obj$colcoord * svG
      symrpc <- obj$rowcoord * sqrt(svF)
      symcpc <- obj$colcoord * sqrt(svG)
      mt <- c("symmetric", "rowprincipal", "colprincipal", "symbiplot", 
              "rowgab", "colgab", "rowgreen", "colgreen")
      mti <- 1:length(mt)
      mtlut <- list(symmetric = list(x = rpc, y = cpc), rowprincipal = list(x = rpc, 
                                                                            y = obj$colcoord), colprincipal = list(x = obj$rowcoord, 
                                                                                                                   y = cpc), symbiplot = list(x = symrpc, y = symcpc), rowgab = list(x = rpc, 
                                                                                                                                                                                     y = obj$colcoord * obj$colmass), colgab = list(x = obj$rowcoord * 
                                                                                                                                                                                                                                      obj$rowmass, y = cpc), rowgreen = list(x = rpc, y = obj$colcoord * 
                                                                                                                                                                                                                                                                               sqrt(obj$colmass)), rowgreen = list(x = obj$rowcoord * 
                                                                                                                                                                                                                                                                                                                     sqrt(obj$rowmass), y = cpc))
      x <- mtlut[[mti[mt == map]]][[1]]
      y <- mtlut[[mti[mt == map]]][[2]]
      x.names <- obj$rownames
      y.names <- obj$colnames
      indx <- dim(x)[1]
      indy <- dim(y)[1]
      pch.x <- rep(pch[1], dim(x)[1])
      pch.y <- rep(pch[3], dim(y)[1])
      pr <- c("none", "active", "passive", "all")
      pri <- 1:4
      if (is.na(obj$rowsup[1])) {
        sup.x <- NA
        act.x <- x
        xn.sup <- NA
        xn.act <- x.names
      }
      else {
        sup.x <- x[obj$rowsup, ]
        act.x <- x[-obj$rowsup, ]
        pch.x[obj$rowsup] <- pch[2]
        xn.sup <- x.names[obj$rowsup]
        xn.act <- x.names[-obj$rowsup]
      }
      if (is.na(obj$colsup[1])) {
        sup.y <- NA
        act.y <- y
        yn.sup <- NA
        yn.act <- y.names
      }
      else {
        sup.y <- y[obj$colsup, ]
        act.y <- y[-obj$colsup, ]
        pch.y[obj$colsup] <- pch[4]
        yn.sup <- y.names[obj$colsup]
        yn.act <- y.names[-obj$colsup]
      }
      prlut <- list(none = list(x = NA, y = NA), active = list(x = act.x, 
                                                               y = act.y), supplementary = list(x = sup.x, y = sup.y), 
                    all = list(x = x, y = y))
      nameslut <- list(none = list(x.names = NA, y.names = NA), 
                       active = list(x.names = xn.act, y.names = yn.act), supplementary = list(x.names = xn.sup, 
                                                                                               y.names = yn.sup), all = list(x.names = x.names, 
                                                                                                                             y.names = y.names))
      pchlut <- list(none = list(x.pch = NA, y.pch = NA), active = list(x.pch = rep(pch[1], 
                                                                                    dim(x)[1]), y.pch = rep(pch[3], dim(y)[1])), supplementary = list(x.pch = rep(pch[2], 
                                                                                                                                                                  dim(x)[1]), y.pch = rep(pch[4], dim(y)[1])), all = list(x.pch = pch.x, 
                                                                                                                                                                                                                          y.pch = pch.y))
      x <- prlut[[pri[pr == what[1]]]][[1]]
      y <- prlut[[pri[pr == what[2]]]][[2]]
      x.names <- nameslut[[pri[pr == what[1]]]][[1]]
      y.names <- nameslut[[pri[pr == what[2]]]][[2]]
      x.pch <- pchlut[[pri[pr == what[1]]]][[1]]
      y.pch <- pchlut[[pri[pr == what[2]]]][[2]]
      if (is.matrix(x)) {
        x <- x[, dim]
      }
      else {
        x <- matrix(x[dim], ncol = length(dim), nrow = 1)
      }
      if (is.matrix(y)) {
        y <- y[, dim]
      }
      else {
        y <- matrix(y[dim], ncol = length(dim), nrow = 1)
      }
      if (mass[1]) {
        cex.x <- 0.5 + obj$rowmass^(1/3)/max(obj$rowmass^(1/3))
      }
      else {
        cex.x <- 1
      }
      if (mass[2]) {
        cex.y <- 0.5 + obj$colmass^(1/3)/max(obj$colmass^(1/3))
      }
      else {
        cex.y <- 1
      }
      nc0 <- 50
      cst <- 230
      
      q1 <- (1:dim(x)[1])
      q2 <- (1:dim(y)[1])
      l1 <- c(x[q1, 1], y[q2, 1])
      l1 <- l1[!is.na(l1)]
      l2 <- c(x[q1, 2], y[q2, 2])
      l2 <- l2[!is.na(l2)]
      if (length(l1) == 0) 
        l1 <- c(-0.1, 0.1)
      if (length(l2) == 0) 
        l2 <- c(-0.1, 0.1)
      lim1 <- range(l1) + c(-0.05, 0.05) * diff(range(l1))
      lim2 <- range(l2) + c(-0.05, 0.05) * diff(range(l2))
      pct <- round(100 * (obj$sv^2)/sum(obj$sv^2), 1)
      pct <- paste0(" (", pct[dim], "%)")
      if (xlab == "_auto_") {
        xlab = paste0("Dimension ", dim[1], pct[1])
      }
      if (ylab == "_auto_") {
        ylab = paste0("Dimension ", dim[2], pct[2])
      }
      pty.backup <- par()$pty
      plot(c(x[, 1], y[, 1]), c(x[, 2], y[, 2]), xlab = xlab, ylab = ylab, 
           type = "n", axes = FALSE, asp = 1, ...)
      box()
      abline(h = 0, v = 0, lty = 3)
      axis(1)
      axis(2)
      if (!is.na(x[1]) & labels[1] != 1) {
        if (arrows[1]) {
          for (i in 1:length(x[,1])) {
            .arrows(0, 0, x[i,1], x[i,2], 
                    col = row.colors[i], 
                    lwd = lwd, length = 0.1)
          }
        } else {
          points(x[,1], x[,2], cex = cex.x, 
                 col = row.colors[1:nrow(x)], 
                 pch = x.pch)
        }
      }
      
      if (labels[1] > 0) {
        xoff1 <- if (labels[1] > 1) 
          0.5 * strwidth(x.names, cex = 0.75) + 0.5 * strwidth("o", cex = 0.75)
        else 0
        xoff2 <- if (labels[1] > 1) 
          0.5 * strheight(x.names, cex = 0.75) + 0.5 * strheight("o", cex = 0.75)
        else 0
        text(x[,1] + xoff1, x[,2] + xoff2, x.names, 
             cex = 0.75, xpd = TRUE, 
             col = row.colors[1:nrow(x)])
      }
      
      if (!is.na(y[1]) & labels[2] != 1) {
        if (arrows[2]) {
          for (i in 1:length(y[,1])) {
            .arrows(0, 0, y[i,1], y[i,2], 
                    col = col.colors[i], 
                    lwd = lwd, length = 0.1)
          }
        } else {
          points(y[,1], y[,2], cex = cex.y, 
                 col = col.colors[1:nrow(y)], 
                 pch = y.pch)
        }
      }
      
      if (labels[2] > 0) {
        yoff1 <- if (labels[2] > 1) 
          0.5 * strwidth(y.names, cex = 0.75) + 0.5 * strwidth("o", cex = 0.75)
        else 0
        yoff2 <- if (labels[2] > 1) 
          0.5 * strheight(y.names, cex = 0.75) + 0.5 * strheight("o", cex = 0.75)
        else 0
        text(y[,1] + yoff1, y[,2] + yoff2, y.names, 
             cex = 0.75, xpd = TRUE, 
             col = col.colors[1:nrow(y)])
      }
      
      if (lines[1]) 
        lines(x[order(x[,1]),], col = row.colors[1], lwd = lwd)
      if (lines[2]) 
        lines(y[order(y[,1]),], col = col.colors[1], lwd = lwd)
      
      par(pty = pty.backup)
      rownames(x) <- x.names
      colnames(x) <- paste0("Dim", dim)
      rownames(y) <- y.names
      colnames(y) <- paste0("Dim", dim)
      result <- list(rows = x, cols = y)
      invisible(result)
    }
    
    pasta(ca_results, row.colors = fruit_colors, col.colors = veg_colors)