Search code examples
rruntime-error

How to get the function multivari (plotrix) running with a data import from Excel


I'm relatively new to R and wrote the below script with RStudio to create a MultiVari chart. When I want to import data from an Excel sheet as dataset I get an error message (= dataset MVData) :

Error in dat_split[[i]][fac1][!is.na(data[[var]]), ]: ! Can't subset rows with !is.na(data[[var]]). ✖ Logical subscript !is.na(data[[var]]) must be size 1 or 24, not 48.

However, when I build the data set with data.frame (= dataset RRData) the script runs without issues.


library(SixSigma)
library(dplyr)
library(plotrix)
library(readxl)

# Create data set
waarde = c(0.68, -2.09, 1.74, 1.84, -1.7, 0.24, -0.6, -0.23, -0.63, -0.3, 0.75, -0.34, 0.1, 0.32, 0.35, 0.11, 0.31, 0.19, 0.05, -0.01, -0.06, 0.3, 0.33, 0.16, 5.06, 4.18, 5.14, 4.34, 4.29, 4.06, 6.07, 3.7, 6.31, 5.45, 4.72, 5.26, 3.31, 3.47, 3.12, 3.34, 3.27, 2.78, 3.59, 3.57, 3.88, 3.54, 3.67, 3.82)
proces = c('Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'Blank', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA', 'PSA')
type = c('Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Vierkant', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat', 'Plaat')
operator = c('BL', 'BL', 'BL', 'BL', 'BL', 'BL', 'JM', 'JM', 'JM', 'JM', 'JM', 'JM', 'BL', 'BL', 'BL', 'BL', 'BL', 'BL', 'JM', 'JM', 'JM', 'JM', 'JM', 'JM', 'BL', 'BL', 'BL', 'BL', 'BL', 'BL', 'JM', 'JM', 'JM', 'JM', 'JM', 'JM', 'BL', 'BL', 'BL', 'BL', 'BL', 'BL', 'JM', 'JM', 'JM', 'JM', 'JM', 'JM')
run = c('R1', 'R1', 'R1', 'R2', 'R2', 'R2', 'R1', 'R1', 'R1', 'R2', 'R2', 'R2', 'R1', 'R1', 'R1', 'R2', 'R2', 'R2', 'R1', 'R1', 'R1', 'R2', 'R2', 'R2', 'R1', 'R1', 'R1', 'R2', 'R2', 'R2', 'R1', 'R1', 'R1', 'R2', 'R2', 'R2', 'R1', 'R1', 'R1', 'R2', 'R2', 'R2', 'R1', 'R1', 'R1', 'R2', 'R2', 'R2')
ID = c('1', '2', '3', '1', '2', '3', '1', '2', '3', '1', '2', '3', '1', '2', '3', '1', '2', '3', '1', '2', '3', '1', '2', '3', '1', '2', '3', '1', '2', '3', '1', '2', '3', '1', '2', '3', '1', '2', '3', '1', '2', '3', '1', '2', '3', '1', '2', '3')
meting = c('1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1')
RRData = data.frame(waarde,proces, type, operator, run, ID, meting)
print(RRData)

# Import data from excel sheet
MVData <- read_excel("InputMultiVari.xlsx", 
          col_types = c("numeric", "text", "text","text", "text", "text", "text"))
print(MVData)

# Multi Vari chart
multivari("waarde", "ID", "operator", "run", "type",
          data = MVData,
          fac.cex = 1.5,
          main = "Multi-Vari Chart"
)

Does somebody have a clue how I can resolve this and use the Excel file as import?

I tried to change the variable types for the different colmuns but that didn't change the outcome.

Thanks

The result from dput(MVData)

> dput(MVData)
structure(list(waarde = c(0.68, -2.09, 1.74, 1.84, -1.7, 0.24, 
-0.6, -0.23, -0.63, -0.3, 0.75, -0.34, 0.1, 0.32, 0.35, 0.11, 
0.31, 0.19, 0.05, -0.01, -0.06, 0.3, 0.33, 0.16, 5.06, 4.18, 
5.14, 4.34, 4.29, 4.06, 6.07, 3.7, 6.31, 5.45, 4.72, 5.26, 3.31, 
3.47, 3.12, 3.34, 3.27, 2.78, 3.59, 3.57, 3.88, 3.54, 3.67, 3.82
), proces = c("Blank", "Blank", "Blank", "Blank", "Blank", "Blank", 
"Blank", "Blank", "Blank", "Blank", "Blank", "Blank", "Blank", 
"Blank", "Blank", "Blank", "Blank", "Blank", "Blank", "Blank", 
"Blank", "Blank", "Blank", "Blank", "PSA", "PSA", "PSA", "PSA", 
"PSA", "PSA", "PSA", "PSA", "PSA", "PSA", "PSA", "PSA", "PSA", 
"PSA", "PSA", "PSA", "PSA", "PSA", "PSA", "PSA", "PSA", "PSA", 
"PSA", "PSA"), type = c("Vierkant", "Vierkant", "Vierkant", "Vierkant", 
"Vierkant", "Vierkant", "Vierkant", "Vierkant", "Vierkant", "Vierkant", 
"Vierkant", "Vierkant", "Plaat", "Plaat", "Plaat", "Plaat", "Plaat", 
"Plaat", "Plaat", "Plaat", "Plaat", "Plaat", "Plaat", "Plaat", 
"Vierkant", "Vierkant", "Vierkant", "Vierkant", "Vierkant", "Vierkant", 
"Vierkant", "Vierkant", "Vierkant", "Vierkant", "Vierkant", "Vierkant", 
"Plaat", "Plaat", "Plaat", "Plaat", "Plaat", "Plaat", "Plaat", 
"Plaat", "Plaat", "Plaat", "Plaat", "Plaat"), operator = c("BL", 
"BL", "BL", "BL", "BL", "BL", "JM", "JM", "JM", "JM", "JM", "JM", 
"BL", "BL", "BL", "BL", "BL", "BL", "JM", "JM", "JM", "JM", "JM", 
"JM", "BL", "BL", "BL", "BL", "BL", "BL", "JM", "JM", "JM", "JM", 
"JM", "JM", "BL", "BL", "BL", "BL", "BL", "BL", "JM", "JM", "JM", 
"JM", "JM", "JM"), run = c("R1", "R1", "R1", "R2", "R2", "R2", 
"R1", "R1", "R1", "R2", "R2", "R2", "R1", "R1", "R1", "R2", "R2", 
"R2", "R1", "R1", "R1", "R2", "R2", "R2", "R1", "R1", "R1", "R2", 
"R2", "R2", "R1", "R1", "R1", "R2", "R2", "R2", "R1", "R1", "R1", 
"R2", "R2", "R2", "R1", "R1", "R1", "R2", "R2", "R2"), ID = c("1", 
"2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", 
"3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", 
"1", "2", "3", "1", "2", "3", "1", "2", "3", "1", "2", "3", "1", 
"2", "3", "1", "2", "3", "1", "2", "3"), meting = c("1", "1", 
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
"1", "1", "1", "1", "1", "1", "1")), class = c("tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -48L))

And the result of identical(MVData, RRData)

> identical(MVData, RRData)
[1] FALSE

Solution

  • The issue is caused by inconsistency of classes for RR- and MVData. And that's caused by classes of the object returned by read_excel() function:

    readxl::read_excel(path = "~/abc.xlsx") |>
      class()
    #> [1] "tbl_df"     "tbl"        "data.frame" 
    

    However multivari() function expect a data frame. From help you can read:

    [...]
    data    a data frame, required
    

    Back to your data:

    [...]
    
    class(RRData)
    #> [1] "data.frame"
    class(MVData)
    #> [1] "tbl_df"     "tbl"        "data.frame"
    
    MVData <- MVData |> 
      as.data.frame()
    
    multivari("waarde", "ID", "operator", "run", "type",
              data = MVData,
              fac.cex = 1.5,
              main = "Multi-Vari Chart"
    )
    

    Created on 2024-10-26 with reprex v2.1.0