Search code examples
rcolorslegendpcaconfidence-interval

How to add colors and legend to ordiellipse in base package in R?


I have a dataframe (site by species matrix)that looks like this:

            SP1      SP2     SP3     SP4
    US       5       6       2       5
    US       5       6       2       5
    UK       5       6       2       5
   AUS       5       6       2       5

I'm trying to create a PCoA plot (Principal Coordinate Analysis) with 95% confidence polygons/ellipses. I need to uniquely color code each country(the points) along with each ellipse having the corresponding color code for the country and the legends.

#My current code
#If you  need a dataframe
df <- t(data.frame(matrix(rexp(10000, rate=10),nrow=100,ncol=100)))
rownames(df) <- rep(c("UK", "US", "Aus","Spain"),length.out=100)#I dont know how to loop this over 100 times to make it the rownames


df <- as.matrix(df[,-1]) #Use this to convert dataframe to matrix
row.names(df) <- df[,1]#Use this to convert dataframe to matrix
dat <- df
dat.db <- vegdist(dat, method = "bray")
dat.pcoa <- cmdscale(dat.db, eig = TRUE, k = 3)
explainvar1 <- round(dat.pcoa$eig[1] / sum(dat.pcoa$eig), 3) * 100
explainvar2 <- round(dat.pcoa$eig[2] / sum(dat.pcoa$eig), 3) * 100
explainvar3 <- round(dat.pcoa$eig[3] / sum(dat.pcoa$eig), 3) * 100
sum.eig <- sum(explainvar1, explainvar2, explainvar3)

# Define Plot Parameters
par(mar = c(5, 5, 1, 2) + 0.1)
# Initiate Plot
plot(dat.pcoa$points[ ,1], dat.pcoa$points[ ,2],
     xlab = paste("PCoA 1 (", explainvar1, "%)", sep = ""),
     ylab = paste("PCoA 2 (", explainvar2, "%)", sep = ""),
     pch = 16, cex = 2.0, type = "n", cex.lab = 1.5, cex.axis = 1.2, axes = FALSE)
axis(side = 1, labels = T, lwd.ticks = 2, cex.axis = 1.2, las = 1)
axis(side = 2, labels = T, lwd.ticks = 2, cex.axis = 1.2, las = 1)
abline(h = 0, v = 0, lty = 3)
box(lwd = 2)
points(dat.pcoa$points[ ,1], dat.pcoa$points[ ,2],
       pch = 19, cex = 1, bg = "gray", col = "grey")
ordiellipse(ord = dat.pcoa, groups = rownames(dat), kind = "se",conf = .95,col = NULL)

Note: This is not the same question as posted here. This question only asks how to perform the ordiplot in base package (since I have hit a wall for ggplot2)


Solution

  • For base plot, you can provide a vector of colors.

    Therefore, for each point you should assign a color, and use this vector of colors of length n points as input to col parameter in points. The easiest way to do this is factor the countries rownames of the PCA data and use the factor integer values to index a defined color palette.

    Similarly you can provide a vector of colors of length n groups to ordiellipse

    Here is the code based on your sample df:

    library(vegan)
    df <- t(data.frame(matrix(rexp(10000, rate=10),nrow=100,ncol=100)))
    rownames(df) <- rep(c("UK", "US", "Aus","Spain"), length.out=100)
    colnames(df) <- paste0("SP", 1:ncol(df))
    

    Now we factor the countries rownames

    # factor country and set levels
    col_vector <- factor(rownames(dat.pcoa$points),
                         levels=unique(rownames(dat.pcoa$points)))
    col_vector
    
    # see integer values of your factored countries and the given order
    str(col_vector) 
    # Factor w/ 4 levels "UK","US","Aus",..: 1 2 3 4 1 2 3 4 1 2 ...
    

    Create a color palette and index using the factored country names

    # palette() is the default R color palette or 
    # you can specify a vector of 4 colors
    col_palette <- palette()[col_vector]
    col_palette
    

    Then use these values in your plot

    par(mar = c(5, 5, 1, 2) + 0.1)
    # Initiate Plot
    plot(dat.pcoa$points[ ,1], dat.pcoa$points[ ,2],
         xlab = paste("PCoA 1 (", explainvar1, "%)", sep = ""),
         ylab = paste("PCoA 2 (", explainvar2, "%)", sep = ""),
         pch = 16, cex = 2.0, type = "n", 
         cex.lab = 1.5, cex.axis = 1.2, axes = FALSE)
    axis(side = 1, labels = T, lwd.ticks = 2, cex.axis = 1.2, las = 1)
    axis(side = 2, labels = T, lwd.ticks = 2, cex.axis = 1.2, las = 1)
    abline(h = 0, v = 0, lty = 3)
    box(lwd = 2)
    points(dat.pcoa$points[ ,1], dat.pcoa$points[ ,2],
           pch = 19, cex = 1, bg = "gray", col = col_palette)
    ordiellipse(ord = dat.pcoa, groups = col_vector, 
                kind = "se", conf = .95, col = unique(col_palette)) 
    

    Here we check that the labels match the assigned color

    # sanity check that point colors match label
    text(dat.pcoa$points[ ,1], dat.pcoa$points[ ,2], 
         labels=rownames(dat.pcoa$points), col = col_palette) 
    # sanity check that ellipse color match labels
    ordiellipse(ord = dat.pcoa, groups = col_vector, 
                kind = "se", conf = .95, col = unique(col_palette), label=TRUE) 
    

    enter image description here enter image description here

    Finally add a legend

    legend('topright', legend=unique(col_vector), col=unique(col_palette), pch = 16)