I want to plot all components and group points by clusterNum column. However, as I tried to learn so far, we can plot only in 2D and 3D ways. So, I would like to know normally how to plot or represent graphs with these all PCs?
This is what the sample looks like:
PC1 PC2 PC3 PC4 PC5 clusterNum
1 -2.3779857 0.85818117 0.77918535 0.1967346 0.9826036 2
2 -1.4290545 0.09556012 -0.06358622 1.3468587 -0.1525576 2
3 1.5621954 2.53686714 -2.57818785 0.0111326 0.2755555 1
4 1.9915498 0.19282116 1.34349507 0.0578331 -1.0146932 3
5 -0.4431131 1.38970833 2.34020611 -0.8359105 0.1402896 2
6 -1.9892596 0.37200044 2.00614197 0.5013188 -1.5358936 2
7 -2.6111509 -0.31112666 1.44419058 0.7313908 0.2597182 2
1977 -1.8662394 1.74827166 -2.37657231 0.4158580 1.1100287 4
1978 2.3477702 -2.24859797 -0.47305695 0.3690514 -1.1667859 3
1979 2.0797897 -0.14012792 -0.82941643 -1.2233560 -0.4523913 1
data <- structure(list(PC1 = c(-2.37798570730988, -1.42905447617503,1.56219540602943, 1.99154980323715, -0.443113128962729, -1.98925960995357,-2.6111508923784, -1.86623936673779, 2.3477701685881, 2.07978972115199), PC2 = c(0.858181174741465, 0.0955601160006672, 2.53686714046073,0.192821161122631, 1.38970832915268, 0.372000440681993, -0.31112665937417,1.74827166020088, -2.24859797467832, -0.140127921225955), PC3 = c(0.779185345162976,-0.0635862220559265, -2.57818784684844, 1.34349506727208, 2.34020610639405,2.00614196687586, 1.44419058101111, -2.37657231100357, -0.473056945068265,-0.829416434702976), PC4 = c(0.196734648339808, 1.34685865526758,0.0111326028445577, 0.0578331048357187, -0.83591054343165, 0.501318761327088,0.731390845165095, 0.415858033401411, 0.369051391354487, -1.22335597705293), PC5 = c(0.982603621624129, -0.152557597364265, 0.275555453841701,-1.01469324992585, 0.140289629133083, -1.53589363488684, 0.259718249982426,1.11002871887763, -1.16678589524657, -0.452391297542505), clusterNum = c(2L,2L, 1L, 3L, 2L, 2L, 2L, 4L, 3L, 1L)), row.names = c(1L, 2L, 3L,4L, 5L, 6L, 7L, 1977L, 1978L, 1979L), class = "data.frame")
Thank you in advance.
You could do a line plot with multiple lines, like this:
data <- structure(list(PC1 = c(-2.37798570730988, -1.42905447617503,1.56219540602943, 1.99154980323715, -0.443113128962729, -1.98925960995357,-2.6111508923784, -1.86623936673779, 2.3477701685881, 2.07978972115199), PC2 = c(0.858181174741465, 0.0955601160006672, 2.53686714046073,0.192821161122631, 1.38970832915268, 0.372000440681993, -0.31112665937417,1.74827166020088, -2.24859797467832, -0.140127921225955), PC3 = c(0.779185345162976,-0.0635862220559265, -2.57818784684844, 1.34349506727208, 2.34020610639405,2.00614196687586, 1.44419058101111, -2.37657231100357, -0.473056945068265,-0.829416434702976), PC4 = c(0.196734648339808, 1.34685865526758,0.0111326028445577, 0.0578331048357187, -0.83591054343165, 0.501318761327088,0.731390845165095, 0.415858033401411, 0.369051391354487, -1.22335597705293), PC5 = c(0.982603621624129, -0.152557597364265, 0.275555453841701,-1.01469324992585, 0.140289629133083, -1.53589363488684, 0.259718249982426,1.11002871887763, -1.16678589524657, -0.452391297542505), clusterNum = c(2L,2L, 1L, 3L, 2L, 2L, 2L, 4L, 3L, 1L)), row.names = c(1L, 2L, 3L,4L, 5L, 6L, 7L, 1977L, 1978L, 1979L), class = "data.frame")
library(tidyverse)
data %>%
rownames_to_column() %>%
select(-clusterNum) %>%
mutate(rowname = factor(rowname, levels = rowname)) %>%
pivot_longer(-rowname) %>%
ggplot(aes(rowname, value, group = name, color = name)) +
geom_line()
I don't think that is a common thing to do with PCA results though. Look here, here and here to learn what common practices for visualizing PCA results are.