Search code examples
rplotpca

How to make a profile plot (principal component analysis) in R?


I'm currently running principal component analysis. For the interpretation I want to create a profile (pattern) plot to visualize the correlation between each principal component and the original variables. Is anyone familiar with a package or code to create this in R? I'm using the prcomp() function in R.

See examples:

enter image description here

profile pattern plot (PCA)

https://canadianaudiologist.ca/predicting-speech-perception-from-the-audiogram-and-vice-versa/ https://blogs.sas.com/content/iml/2019/11/04/interpret-graphs-principal-components.html

This is similar data to my db:

db <- structure(list(T025 = c(20, 60, 20, 10, 85, 5, 15, 10, 10, 25, 
15, 5, 15, 30, 15, 15, 10, 25, 45, 25, 55, 20, 65, 20, 10, 10, 
15, 15, 30, 35, 10, 50, 20, 15, 30, 15, 20, 35, 30, 20, 10, 20, 
30, 15, 40, 15, 10, 10, 20, 25, -5, 10, 40, 0, 15, 5, 15, 30, 
15, 80, 15, 35, 10, 50, 25, 10, 15, 20, 20, 20, 25, 20, 30, 10, 
20, 50, 25, 25, 55, 30, 20, 30, 15, 10, 15, 15, 35, 20, 30, 15, 
40, 20, 25, 15, 20, 35, 15, 25, 20, 40, 0, 20, 10, 10, 15, 10, 
20, 10, 35, 35, 25, 30, 20, 25, 15, 30, 35, 25, 30, 5, 20, 30, 
15, 25, 10), T05 = c(0, 25, 0, 5, 25, 5, 0, 0, 5, 5, 5, -5, 5, 
15, 15, 5, 0, 15, 25, 15, 50, 20, 45, 5, 5, 5, 0, 10, 10, 10, 
5, 20, 15, 10, 20, 10, -5, 10, 30, -5, 0, 10, 35, 5, 40, 0, 0, 
-5, 15, 25, 0, 5, 35, -5, 5, 0, 5, 5, 10, 70, 0, 20, 5, 30, 10, 
10, 5, 5, 25, 10, 20, 5, 25, 5, 10, 35, 15, 10, 45, 15, 15, 25, 
10, 5, 10, 5, 20, 15, 15, 5, 10, 10, 20, 5, 15, 25, 5, 20, 10, 
35, -10, 5, 0, -5, 0, 5, 15, 5, 15, 35, 20, 25, 10, 15, 15, 25, 
45, 0, 25, 0, 5, 25, 0, 20, 5), T1 = c(25, 20, 25, 20, 50, 10, 
15, 20, 25, 25, 25, 25, 15, 45, 25, 25, 20, 35, 40, 35, 65, 45, 
45, 30, 25, 20, 5, 20, 30, 25, 20, 35, 25, 25, 35, 15, 15, 25, 
45, 20, 25, 35, 40, 25, 60, 15, 15, 15, 25, 45, 20, 20, 60, 15, 
20, 25, 45, 45, 25, 75, 10, 45, 15, 50, 20, 25, 20, 15, 40, 30, 
50, 20, 40, 20, 35, 50, 35, 15, 50, 30, 20, 45, 25, 25, 20, 45, 
30, 35, 30, 30, 15, 15, 30, 25, 25, 25, 15, 40, 25, 55, 20, 30, 
10, 15, 50, 15, 40, 20, 20, 55, 35, 45, 20, 50, 35, 20, 65, 10, 
35, 15, 30, 55, 25, 15, 25), T2 = c(20, 20, 15, 25, 70, 10, 15, 
45, 50, 30, 20, 25, 10, 40, 20, 40, 30, 40, 25, 30, 45, 25, 50, 
20, 20, 20, 10, 10, 45, 10, 5, 40, 20, 15, 50, 25, 15, 20, 25, 
30, 20, 30, 35, 15, 65, 20, 25, 10, 10, 60, 25, 20, 70, 5, 15, 
15, 15, 25, 15, 60, 25, 55, 5, 50, 30, 35, 5, 10, 30, 10, 55, 
25, 40, 35, 40, 45, 25, 20, 35, 40, 5, 40, 10, 25, 10, 40, 30, 
20, 25, 25, 10, 25, 30, 45, 20, 25, 10, 55, 40, 60, 5, 10, 10, 
5, 20, 0, 40, 20, 35, 80, 25, 40, 15, 55, 25, 15, 65, 5, 25, 
5, 35, 45, 10, 5, 10), T4 = c(10, 25, 35, 35, 70, 20, 15, 70, 
55, 30, 50, 35, 40, 40, 35, 45, 60, 50, 15, 25, 70, 10, 60, 40, 
30, 15, 15, 15, 50, 5, 20, 70, 5, 35, 65, 40, 20, 65, 50, 30, 
45, 55, 65, 35, 45, 35, 40, 20, 5, 65, 20, 25, 75, 10, 25, 25, 
10, 25, 20, 55, 20, 65, 5, 60, 70, 45, 15, 25, 35, 5, 70, 55, 
65, 40, 35, 55, 35, 45, 45, 45, 20, 40, 25, 50, 15, 55, 55, 40, 
30, 60, 10, 60, 40, 35, 30, 65, 5, 75, 55, 80, 15, 30, 55, 15, 
50, 25, 45, 30, 45, 90, 20, 45, 20, 40, 35, 20, 70, 20, 30, 45, 
50, 55, 45, 5, 45), T8 = c(5, 55, 55, 40, 75, 40, 5, 70, 25, 
10, 50, 55, 5, 35, 10, 30, 40, 55, 20, 20, 65, -5, 55, 50, -10, 
45, 5, 50, 65, 20, 0, 75, 15, 30, 50, 50, 30, 70, 45, 25, 35, 
40, 85, 30, 60, 50, 55, 15, 10, 75, 60, 20, 90, 0, 20, 55, -10, 
20, 10, 45, 20, 65, 0, 70, 85, 0, -5, 30, 35, 5, 80, 45, 60, 
25, 35, 55, 30, 45, 65, 45, -5, 35, 35, 40, 50, 55, 50, 70, 45, 
40, 0, 55, 45, 30, 0, 56, 0, 45, 50, 70, 15, 20, 45, -10, 45, 
55, 45, 20, 50, 85, 5, 50, 10, 20, 25, 0, 70, 0, 25, 5, 45, 35, 
40, -5, 25)), row.names = c("1", "2", "3", "4", "5", "6", 
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", 
"18", "19", "20", "21", "22", "23", "24", "25", "26", 
"177", "191", "200", "205", "208", "212", "231", "236", "240", 
"246", "250", "259", "263", "264", "275", "276", "282", "293", 
"303", "304", "307", "309", "315", "316", "320", "322", "324", 
"327", "333", "338", "343", "356", "365", "377", "379", "393", 
"395", "399", "405", "411", "426", "428", "439", "448", "451", 
"459", "490", "495", "498", "513", "515", "521", "524", "528", 
"532", "550", "552", "559", "566", "570", "577", "583", "587", 
"595", "624", "638", "641", "645", "647", "650", "660", "668", 
"677", "683", "688", "691", "702", "704", "710", "719", "730", 
"732", "748", "752", "758", "766", "772", "780", "782", "790", 
"810", "828", "830", "836", "853", "862", "880", "889", "896"
), class = "data.frame")

db.pca <- prcomp(db, center= TRUE, scale.=TRUE)
summary(db.pca)
str(db.pca)
ggbiplot(db.pca)
screeplot(db.pca, type="line")

Solution

  • Here is a way with package FactoMineR to get the correlations. The plot is a base R plot.

    library(FactoMineR)
    
    res.pca <- PCA(iris[-5], graph = FALSE)
    cos2 <- res.pca$var$cos2
    
    old_par <- par(xpd = TRUE)
    matplot(
      cos2, 
      type = "l", 
      xlab = "variable", 
      ylab = "correlation",
      main = "Component Pattern Profiles",
      xaxt = "n"
    )
    axis(1, at = 1:nrow(cos2), labels = rownames(cos2))
    legend(
      x = "bottom",
      inset = c(0, -0.2),
      legend = colnames(cos2),
      col = 1:ncol(cos2),
      lty = 1:ncol(cos2),
      bty = "n",
      horiz = TRUE
      )
    par(old_par)
    

    enter image description here