I have a dataframe (site by species matrix)that looks like this:
SP1 SP2 SP3 SP4
US 5 6 2 5
US 5 6 2 5
UK 5 6 2 5
AUS 5 6 2 5
I'm trying to create a PCoA plot (Principal Coordinate Analysis) with 95% confidence polygons/ellipses. I need to uniquely color code each country(the points) along with each ellipse having the corresponding color code for the country and the legends.
#My current code
#If you need a dataframe
df <- t(data.frame(matrix(rexp(10000, rate=10),nrow=100,ncol=100)))
rownames(df) <- rep(c("UK", "US", "Aus","Spain"),length.out=100)#I dont know how to loop this over 100 times to make it the rownames
df <- as.matrix(df[,-1]) #Use this to convert dataframe to matrix
row.names(df) <- df[,1]#Use this to convert dataframe to matrix
dat <- df
dat.db <- vegdist(dat, method = "bray")
dat.pcoa <- cmdscale(dat.db, eig = TRUE, k = 3)
explainvar1 <- round(dat.pcoa$eig[1] / sum(dat.pcoa$eig), 3) * 100
explainvar2 <- round(dat.pcoa$eig[2] / sum(dat.pcoa$eig), 3) * 100
explainvar3 <- round(dat.pcoa$eig[3] / sum(dat.pcoa$eig), 3) * 100
sum.eig <- sum(explainvar1, explainvar2, explainvar3)
# Define Plot Parameters
par(mar = c(5, 5, 1, 2) + 0.1)
# Initiate Plot
plot(dat.pcoa$points[ ,1], dat.pcoa$points[ ,2],
xlab = paste("PCoA 1 (", explainvar1, "%)", sep = ""),
ylab = paste("PCoA 2 (", explainvar2, "%)", sep = ""),
pch = 16, cex = 2.0, type = "n", cex.lab = 1.5, cex.axis = 1.2, axes = FALSE)
axis(side = 1, labels = T, lwd.ticks = 2, cex.axis = 1.2, las = 1)
axis(side = 2, labels = T, lwd.ticks = 2, cex.axis = 1.2, las = 1)
abline(h = 0, v = 0, lty = 3)
box(lwd = 2)
points(dat.pcoa$points[ ,1], dat.pcoa$points[ ,2],
pch = 19, cex = 1, bg = "gray", col = "grey")
ordiellipse(ord = dat.pcoa, groups = rownames(dat), kind = "se",conf = .95,col = NULL)
Note: This is not the same question as posted here. This question only asks how to perform the ordiplot in base package (since I have hit a wall for ggplot2)
For base plot, you can provide a vector of colors.
Therefore, for each point you should assign a color, and use this vector of colors of length n points as input to col parameter in points
. The easiest way to do this is factor the countries rownames of the PCA data and use the factor integer values to index a defined color palette.
Similarly you can provide a vector of colors of length n groups to ordiellipse
Here is the code based on your sample df:
library(vegan)
df <- t(data.frame(matrix(rexp(10000, rate=10),nrow=100,ncol=100)))
rownames(df) <- rep(c("UK", "US", "Aus","Spain"), length.out=100)
colnames(df) <- paste0("SP", 1:ncol(df))
Now we factor the countries rownames
# factor country and set levels
col_vector <- factor(rownames(dat.pcoa$points),
levels=unique(rownames(dat.pcoa$points)))
col_vector
# see integer values of your factored countries and the given order
str(col_vector)
# Factor w/ 4 levels "UK","US","Aus",..: 1 2 3 4 1 2 3 4 1 2 ...
Create a color palette and index using the factored country names
# palette() is the default R color palette or
# you can specify a vector of 4 colors
col_palette <- palette()[col_vector]
col_palette
Then use these values in your plot
par(mar = c(5, 5, 1, 2) + 0.1)
# Initiate Plot
plot(dat.pcoa$points[ ,1], dat.pcoa$points[ ,2],
xlab = paste("PCoA 1 (", explainvar1, "%)", sep = ""),
ylab = paste("PCoA 2 (", explainvar2, "%)", sep = ""),
pch = 16, cex = 2.0, type = "n",
cex.lab = 1.5, cex.axis = 1.2, axes = FALSE)
axis(side = 1, labels = T, lwd.ticks = 2, cex.axis = 1.2, las = 1)
axis(side = 2, labels = T, lwd.ticks = 2, cex.axis = 1.2, las = 1)
abline(h = 0, v = 0, lty = 3)
box(lwd = 2)
points(dat.pcoa$points[ ,1], dat.pcoa$points[ ,2],
pch = 19, cex = 1, bg = "gray", col = col_palette)
ordiellipse(ord = dat.pcoa, groups = col_vector,
kind = "se", conf = .95, col = unique(col_palette))
Here we check that the labels match the assigned color
# sanity check that point colors match label
text(dat.pcoa$points[ ,1], dat.pcoa$points[ ,2],
labels=rownames(dat.pcoa$points), col = col_palette)
# sanity check that ellipse color match labels
ordiellipse(ord = dat.pcoa, groups = col_vector,
kind = "se", conf = .95, col = unique(col_palette), label=TRUE)
Finally add a legend
legend('topright', legend=unique(col_vector), col=unique(col_palette), pch = 16)