Search code examples
rggplot2pcaggbiplot

Change legend and shape in ggbiplot pca


can you please help me with my pca? I would like to change the shapes in that way that each species has a different color and all 2-3 organisms for each species have 2-3 different symbols. It should look like this: enter image description here

So far I tried the following code:

setwd("~/Schwarze Johannisbeeren/SJ Wein mit nicht Sc/PCA/stackoverflow frage")
results = read.csv("results.csv", sep = ";", encoding = "UTF-8", header=TRUE, check.names=FALSE)


results.pca <- prcomp(results[,c(3:7)],       
                    center = TRUE,
                    scale. = TRUE)

#grouping by organism
results.organism <- results[, 1]

#by species
results.species <- results[, 2]


summary(results.pca)


library(ggplot2)
library(ggbiplot)


ggbiplot(results.pca, alpha=0, obs.scale = 1, var.scale = 1 ,ellipse = TRUE,ellipse.prob=0.68, circle = F, varname.size=0, 
         var.axes = F, groups=results$species) +
  theme_bw()+
  geom_point(aes( colour=factor(results.species)), size=2)+
scale_shape_manual(values= c("Mt1"= 1,  "Mt2" =2, "Al1"= 1, "Al2" =2, "Bg1" =1, "Bg2"=2, "Bg3" =3, "Cs1"= 1, "Cs2" =2, "Cs3" =3, "Df1"= 1, "Df2" =2, "Df3" =3))+
                     
  #scale_color_brewer(name= "organism", type = "qual", palette = 2)+
  #scale_x_continuous (limits = c (-1, 9))+   
  theme(axis.text.x = element_text(size = 12, colour = "black", vjust = 0.5, hjust = 1, face= "bold"), 
        axis.title.y = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 12, face = "bold"),
        axis.text.y = element_text(colour = "black", size = 12, face = "bold"))

and that´s my data

> results
   organism species  lactones cyanides alcohols  ethers   acids
1       Mt1      Mt 23435.167    166.4 137653.9  4040.1 1131.52
2       Mt1      Mt 23303.111    168.9 153511.0  4529.1 1148.52
3       Mt1      Mt 22340.556    176.6 150719.9  3255.8 1200.88
4       Mt2      Mt 51519.222    175.9 173401.1  3890.1 1196.12
5       Mt2      Mt 48824.500    166.5 171614.4  3694.1 1132.20
6       Mt2      Mt 50427.278    165.4 168865.1  3693.2 1124.72
7       Al1      Al 25260.222    162.0 211737.4  9563.9 1101.60
8       Al1      Al 23177.556    161.5 199886.7 10403.3 1098.20
9       Al1      Al 27903.000    156.2 240088.4 11897.1 1062.16
10      Al2      Al  5993.722    180.4 289334.9  6673.3 1226.72
11      Al2      Al  7307.389    169.7 275631.1  8333.4 1153.96
12      Al2      Al  9419.167    147.5 277924.5  9622.2 1003.00
13      Bg1      Bg 58216.944    132.4  92275.3  4099.5  900.32
14      Bg1      Bg 69860.222    147.4 105654.9  4080.6 1002.32
15      Bg1      Bg 72809.333    145.8 111731.3  4014.6  991.44
16      Bg2      Bg 51584.611    142.9 105548.2  6450.1  971.72
17      Bg2      Bg 57738.056    141.2 117728.9  6332.4  960.16
18      Bg2      Bg 53356.056    142.7 110260.2  6506.2  970.36
19      Bg3      Bg 41983.389    130.8 103799.4  4781.8  889.44
20      Bg3      Bg 46930.722    148.3 113944.6  5151.6 1008.44
21      Bg3      Bg 49487.611    139.4 121976.5  5318.3  947.92
22      Cs1      Cs  7155.056    161.6 221538.8  8356.0 1098.88
23      Cs1      Cs  8153.611    151.0 179823.0  7961.2 1026.80
24      Cs1      Cs  7445.722    168.6 176978.0  8196.5 1146.48
25      Cs2      Cs 10771.556    126.4 144314.1  8634.6  859.52
26      Cs2      Cs 12239.556    142.6 142913.7  9471.9  969.68
27      Cs2      Cs 13788.611    136.1 131506.7  9390.4  925.48
28      Cs3      Cs 12082.111    152.0 171730.0  6259.6 1033.60
29      Cs3      Cs 14331.556    143.3 141748.7  7532.8  974.44
30      Cs3      Cs 14123.056    158.2 150303.0  7755.8 1075.76
31      Df1      Df 26906.778    156.2 310203.9  5505.5 1062.16
32      Df1      Df 20689.111    163.5 214322.9  5315.6 1111.80
33      Df1      Df 22872.722    154.1 197572.9  4627.7 1047.88
34      Df2      Df 18838.222    159.2 125167.6 12372.9 1082.56
35      Df2      Df 18218.667    155.8 127077.2 11182.0 1059.44
36      Df2      Df 18545.389    156.2 154400.4 10543.6 1062.16
37      Df3      Df 19924.111    156.4 199472.6  4452.3 1063.52
38      Df3      Df 22504.056    158.0 196343.0  3994.1 1074.40
39      Df3      Df 16907.278    151.5 185052.9  4084.6 1030.20
>

By the way, is it possible to have only PC1( x %) instead of PC1(x % explained var.) for the axis labeling?


Solution

  • One approach to achieve your desired result would be to first create shape and color palettes which map organism names to shapes and colors. Second, inside your geom_point extend the data by adding a column with the organism for which I use dplyr::bind_cols. Doing so allows to map the organism on the shape and the color aes. Finally, get rid of the color legend for the groups using scale_color_discrete(guide = "none") and add a second color scale via ggnewscale::new_scale_color and a scale_color_manual:

    Note: Easy fix for the axis titles would be to set them manually using +labs(x = ..., y = ...).

    library(ggplot2)
    library(ggbiplot)
    
    pal_shape <- gsub("^.*?(.)$", "\\1", results$organism)
    pal_shape <- scales::shape_pal()(3)[as.integer(pal_shape)]
    names(pal_shape) <- results$organism
    
    pal_color <- gsub("^(.*?).$", "\\1", results$organism)
    pal_color <- setNames(scales::hue_pal()(5), sort(unique(results$species)))[pal_color]
    names(pal_color) <- results$organism
    
    ggbiplot(results.pca,
      alpha = 0, obs.scale = 1, var.scale = 1, ellipse = TRUE, ellipse.prob = 0.68, circle = F, varname.size = 0,
      var.axes = F, groups = results$species
    ) +
      scale_color_discrete(guide = "none") +
      ggnewscale::new_scale_color() +
      geom_point(data = ~ dplyr::bind_cols(.x, organism = results$organism), 
                 aes(shape = organism, colour = organism), 
                 size = 2) +
      scale_shape_manual(values = pal_shape) +
      scale_color_manual(values = pal_color) +
      theme_bw() +
      theme(
        axis.text.x = element_text(size = 12, colour = "black", vjust = 0.5, hjust = 1, face = "bold"),
        axis.title.y = element_text(size = 12, face = "bold"),
        axis.title.x = element_text(size = 12, face = "bold"),
        axis.text.y = element_text(colour = "black", size = 12, face = "bold")
      )
    

    enter image description here

    DATA

    results <- structure(list(organism = c(
      "Mt1", "Mt1", "Mt1", "Mt2", "Mt2",
      "Mt2", "Al1", "Al1", "Al1", "Al2", "Al2", "Al2", "Bg1", "Bg1",
      "Bg1", "Bg2", "Bg2", "Bg2", "Bg3", "Bg3", "Bg3", "Cs1", "Cs1",
      "Cs1", "Cs2", "Cs2", "Cs2", "Cs3", "Cs3", "Cs3", "Df1", "Df1",
      "Df1", "Df2", "Df2", "Df2", "Df3", "Df3", "Df3"
    ), species = c(
      "Mt",
      "Mt", "Mt", "Mt", "Mt", "Mt", "Al", "Al", "Al", "Al", "Al", "Al",
      "Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Bg", "Cs", "Cs",
      "Cs", "Cs", "Cs", "Cs", "Cs", "Cs", "Cs", "Df", "Df", "Df", "Df",
      "Df", "Df", "Df", "Df", "Df"
    ), lactones = c(
      23435.167, 23303.111,
      22340.556, 51519.222, 48824.5, 50427.278, 25260.222, 23177.556,
      27903, 5993.722, 7307.389, 9419.167, 58216.944, 69860.222, 72809.333,
      51584.611, 57738.056, 53356.056, 41983.389, 46930.722, 49487.611,
      7155.056, 8153.611, 7445.722, 10771.556, 12239.556, 13788.611,
      12082.111, 14331.556, 14123.056, 26906.778, 20689.111, 22872.722,
      18838.222, 18218.667, 18545.389, 19924.111, 22504.056, 16907.278
    ), cyanides = c(
      166.4, 168.9, 176.6, 175.9, 166.5, 165.4, 162,
      161.5, 156.2, 180.4, 169.7, 147.5, 132.4, 147.4, 145.8, 142.9,
      141.2, 142.7, 130.8, 148.3, 139.4, 161.6, 151, 168.6, 126.4,
      142.6, 136.1, 152, 143.3, 158.2, 156.2, 163.5, 154.1, 159.2,
      155.8, 156.2, 156.4, 158, 151.5
    ), alcohols = c(
      137653.9, 153511,
      150719.9, 173401.1, 171614.4, 168865.1, 211737.4, 199886.7, 240088.4,
      289334.9, 275631.1, 277924.5, 92275.3, 105654.9, 111731.3, 105548.2,
      117728.9, 110260.2, 103799.4, 113944.6, 121976.5, 221538.8, 179823,
      176978, 144314.1, 142913.7, 131506.7, 171730, 141748.7, 150303,
      310203.9, 214322.9, 197572.9, 125167.6, 127077.2, 154400.4, 199472.6,
      196343, 185052.9
    ), ethers = c(
      4040.1, 4529.1, 3255.8, 3890.1,
      3694.1, 3693.2, 9563.9, 10403.3, 11897.1, 6673.3, 8333.4, 9622.2,
      4099.5, 4080.6, 4014.6, 6450.1, 6332.4, 6506.2, 4781.8, 5151.6,
      5318.3, 8356, 7961.2, 8196.5, 8634.6, 9471.9, 9390.4, 6259.6,
      7532.8, 7755.8, 5505.5, 5315.6, 4627.7, 12372.9, 11182, 10543.6,
      4452.3, 3994.1, 4084.6
    ), acids = c(
      1131.52, 1148.52, 1200.88,
      1196.12, 1132.2, 1124.72, 1101.6, 1098.2, 1062.16, 1226.72, 1153.96,
      1003, 900.32, 1002.32, 991.44, 971.72, 960.16, 970.36, 889.44,
      1008.44, 947.92, 1098.88, 1026.8, 1146.48, 859.52, 969.68, 925.48,
      1033.6, 974.44, 1075.76, 1062.16, 1111.8, 1047.88, 1082.56, 1059.44,
      1062.16, 1063.52, 1074.4, 1030.2
    )), class = "data.frame", row.names = c(
      "1",
      "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
      "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
      "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35",
      "36", "37", "38", "39"
    ))