Search code examples
rplotlatticeparallel-coordinates

(Search path?) oddities in lattice::parallelplot


I need to construct a parallell coordinate plot, and I am trying to use lattice::parallellplot for that. Now, it does not seem to be able to find variables in the data.frame..

df <- structure(list(Patient = structure(c(1L, 1L, 2L, 2L, 3L, 3L), .Label = c("K1", 
"K10", "K11", "K12", "K13", "K14", "K3", "K5", "K6", "K7", "K8", 
"K9", "M1", "M10", "M11", "M12", "M13", "M14", "M15", "M16", 
"M17", "M18", "M3", "M4", "M5", "M6", "M7", "M8", "M9", "P11", 
"P14", "P15", "P17", "P19", "P20", "P21", "P22", "P24", "P25zi", 
"P26zi", "P27zi", "P28zi", "P29zi", "P30zi", "P31zi", "P32zi", 
"P33zi", "P34zi", "P35zi", "P36zi", "P37zi", "P38zi", "P39zi", 
"P40zi", "P42zi", "P43zi", "P44zi", "P45zi", "P47zi", "P48zi", 
"P49zi", "P6"), class = "factor"), Session = c("Control", "Control", 
"Control", "Control", "Control", "Control"), Stimulation = structure(c(1L, 
1L, 1L, 1L, 1L, 1L), .Label = c("Control", "Pre-op", "Stim OFF", 
"Stim ON"), class = "factor"), Medication = structure(c(1L, 1L, 
1L, 1L, 1L, 1L), .Label = c("Control", "Med OFF", "Med ON"), class = "factor"), 
    Pace = c("Fast", "Self paced", "Fast", "Self paced", "Fast", 
    "Self paced"), Task = c("AMR", "AMR", "AMR", "AMR", "AMR", 
    "AMR"), rpvi = c(0.0182795199284812, 0.0663539828711965, 
    0.0341240824873636, 0.12187724470919, 0.072604166062397, 
    0.173523783116511), npvi = c(13.3513362238, 18.2878243201769, 
    14.2017869466867, 30.4704307031096, 27.7354960871263, 47.2650462416463
    ), cov = c(0.149873573597108, 0.161675840793806, 0.128829768483374, 
    0.248221179697023, 0.263336212564805, 0.411141115364702), 
    PatientGroup = c("Normal control", "Normal control", "Normal control", 
    "Normal control", "Normal control", "Normal control")), .Names = c("Patient", 
"Session", "Stimulation", "Medication", "Pace", "Task", "rpvi", 
"npvi", "cov", "PatientGroup"), vars = list(Patient, Session, 
    Stimulation, Medication, Pace), drop = TRUE, row.names = c(NA, 
6L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"))
> latice:parallelplot
> head(df)
Patient Session Stimulation Medication       Pace Task       rpvi     npvi       cov   PatientGroup
1      K1 Control     Control    Control       Fast  AMR 0.01827952 13.35134 0.1498736 Normal control
2      K1 Control     Control    Control Self paced  AMR 0.06635398 18.28782 0.1616758 Normal control
3     K10 Control     Control    Control       Fast  AMR 0.03412408 14.20179 0.1288298 Normal control
4     K10 Control     Control    Control Self paced  AMR 0.12187724 30.47043 0.2482212 Normal control
5     K11 Control     Control    Control       Fast  AMR 0.07260417 27.73550 0.2633362 Normal control
6     K11 Control     Control    Control Self paced  AMR 0.17352378 47.26505 0.4111411 Normal Control
> parallelplot(nPVI | PatientGroup,data=df)
Error in parallelplot(npvi | PatientGroup, data = df) : 
  object 'npvi' not found

How come the parallelplot function does not find the columns in the data frame?

Somoe info about the session:

> sessionInfo()
R version 3.1.0 (2014-04-10)
Platform: x86_64-apple-darwin13.1.0 (64-bit)

locale:
[1] sv_SE.UTF-8/sv_SE.UTF-8/sv_SE.UTF-8/C/sv_SE.UTF-8/sv_SE.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] ggplot2_1.0.0   GGally_0.4.8    lattice_0.20-29

loaded via a namespace (and not attached):
 [1] colorspace_1.2-4 digest_0.6.4     grid_3.1.0       gtable_0.1.2     MASS_7.3-34      munsell_0.4.2    plyr_1.8.1       proto_0.3-10     Rcpp_0.11.2      reshape_0.8.5   
[11] reshape2_1.4     scales_0.2.4     stringr_0.6.2    tools_3.1.0     
> searchpaths()
 [1] ".GlobalEnv"                                                               "/Library/Frameworks/R.framework/Versions/3.1/Resources/library/ggplot2"  
 [3] "/Users/frkkan96/Library/R/3.1/library/GGally"                             "/Library/Frameworks/R.framework/Versions/3.1/Resources/library/lattice"  
 [5] "tools:rstudio"                                                            "/Library/Frameworks/R.framework/Versions/3.1/Resources/library/stats"    
 [7] "/Library/Frameworks/R.framework/Versions/3.1/Resources/library/graphics"  "/Library/Frameworks/R.framework/Versions/3.1/Resources/library/grDevices"
 [9] "/Library/Frameworks/R.framework/Versions/3.1/Resources/library/utils"     "/Library/Frameworks/R.framework/Versions/3.1/Resources/library/datasets" 
[11] "/Library/Frameworks/R.framework/Versions/3.1/Resources/library/methods"   "Autoloads"                                                               
[13] "/Library/Frameworks/R.framework/Resources/library/base" 

Solution

  • For some reason, parallelplot has a very bizarre formula syntax. It actually need a matrix/data.frame passed as the right hand side of the formula. Also, this matrix must have at least two columns. The lines of the parallel plot are drawn between these values.

    But first of all, what you posted for df seems to be a non-standard data.frame and somehow got the class "grouped_df" so i'm not sure how you named it. Here i've simplified it to a more standard data.frame

    df <- structure(list(Patient = structure(c(1L, 1L, 2L, 2L, 3L, 3L), .Label = c("K1", 
    "K10", "K11", "K12", "K13", "K14", "K3", "K5", "K6", "K7", "K8", 
    "K9", "M1", "M10", "M11", "M12", "M13", "M14", "M15", "M16", 
    "M17", "M18", "M3", "M4", "M5", "M6", "M7", "M8", "M9", "P11", 
    "P14", "P15", "P17", "P19", "P20", "P21", "P22", "P24", "P25zi", 
    "P26zi", "P27zi", "P28zi", "P29zi", "P30zi", "P31zi", "P32zi", 
    "P33zi", "P34zi", "P35zi", "P36zi", "P37zi", "P38zi", "P39zi", 
    "P40zi", "P42zi", "P43zi", "P44zi", "P45zi", "P47zi", "P48zi", 
    "P49zi", "P6"), class = "factor"), Session = c("Control", "Control", 
    "Control", "Control", "Control", "Control"), Stimulation = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L), .Label = c("Control", "Pre-op", "Stim OFF", 
    "Stim ON"), class = "factor"), Medication = structure(c(1L, 1L, 
    1L, 1L, 1L, 1L), .Label = c("Control", "Med OFF", "Med ON"), class = "factor"), 
        Pace = c("Fast", "Self paced", "Fast", "Self paced", "Fast", 
        "Self paced"), Task = c("AMR", "AMR", "AMR", "AMR", "AMR", 
        "AMR"), rpvi = c(0.0182795199284812, 0.0663539828711965, 
        0.0341240824873636, 0.12187724470919, 0.072604166062397, 
        0.173523783116511), npvi = c(13.3513362238, 18.2878243201769, 
        14.2017869466867, 30.4704307031096, 27.7354960871263, 47.2650462416463
        ), cov = c(0.149873573597108, 0.161675840793806, 0.128829768483374, 
        0.248221179697023, 0.263336212564805, 0.411141115364702), 
        PatientGroup = c("Normal control", "Normal control", "Normal control", 
        "Normal control", "Normal control", "Normal control")), .Names = c("Patient", 
    "Session", "Stimulation", "Medication", "Pace", "Task", "rpvi", 
    "npvi", "cov", "PatientGroup"), row.names = c(NA, 
    6L), class = "data.frame")
    

    OK, now we are ready to call parallelplot. Again, we need a data.frame and we need at least two columns (one for line start, one for line end). Therfore

    parallelplot(~df[,c("npvi","rpvi")] | PatientGroup,data=df)
    

    should work and it produces

    enter image description here

    given the sample data. (Hopefully it's clear from the output why at least two variables are required.)