Search code examples
rpca

How to colour code a PCA plot based on the data frame cell names?


data.matrix <- matrix(nrow=100, ncol=10)
colnames(data.matrix) <- c(
  paste("wt", 1:5, sep=""),
  paste("ko", 1:5, sep=""))
rownames(data.matrix) <- paste("gene", 1:100, sep="")
for (i in 1:100) {
  wt.values <- rpois(5, lambda=sample(x=10:1000, size=1))
  ko.values <- rpois(5, lambda=sample(x=10:1000, size=1))
 
  data.matrix[i,] <- c(wt.values, ko.values)
}
head(data.matrix)
dim(data.matrix)

pca <- prcomp(t(data.matrix), scale=TRUE) 

intall.packages("ggplot2")
library(ggplot2)
 
pca.data <- data.frame(Sample=rownames(pca$x),
  X=pca$x[,1],
  Y=pca$x[,2])
pca.data
 
ggplot(data=pca.data, aes(x=X, y=Y, label=Sample)) +
  geom_text() +
  xlab(paste("PC1 - ", pca.var.per[1], "%", sep="")) +
  ylab(paste("PC2 - ", pca.var.per[2], "%", sep="")) +
  theme_bw() +
  ggtitle("My PCA Graph")

The above code gives me this final pca plot: enter image description here

How can change the wt and ko points into coloured dots? (ie. change all "wt" points to blue dots and all "ko" points to red dots)


Solution

  • EDIT: The question was changed after my initial answer, see the bottom for updated answer.

    You can get the second character of Sample with substr(), and then pass that to col. Here is an example:

    library(ggplot2)
    library(dplyr)
    
    example_data <- data.frame(
      Sample = c("A1.1", "H2.1", "F2.1", "B1.1", "C1.1", "S2.1", "J2.1", "K1.1"),
      X = rnorm(n = 8),
      Y = rnorm(n = 8)
    )
    
    
    example_data %>% 
      mutate(prop = substr(Sample, 2, 2)) %>% # Make a new column with the values
      ggplot(aes(x = X, y = Y, label = Sample, col = prop)) +
      geom_text() +
      xlab(paste("PC1 - ")) +
      xlab(paste("PC2 - ")) +
      scale_color_manual(values = c("1" = "blue", "2" = "red")) + # Assigns colour to values
      theme_bw() +
      theme(legend.position = "none") # Removes legend
    

    Created on 2021-06-07 by the reprex package (v0.3.0)

    With the edited question, here is how to get points instead of text. Simply swap out geom_text() for geom_point().

    
    example_data %>% 
      mutate(prop = substr(Sample, 2, 2)) %>% # This is where the magic happens
      ggplot(aes(x = X, y = Y, label = Sample, col = prop)) +
      # geom_text() +
      geom_point() + # Will add points
      xlab(paste("PC1 - ")) +
      ylab(paste("PC2 - ")) +
      scale_color_manual(values = c("1" = "blue", "2" = "red")) +
      theme_bw() +
      theme(legend.position = "none") 
    

    Created on 2021-06-07 by the reprex package (v0.3.0)