Search code examples
rr-markdownboxplott-test

Compute t-test in R via reading csv data and visualize it in Boxplot


I am trying to import some data from a csv file and visualize it in a Boxplot to display it again online using a plateform. I am using R as developing language for computing a t-test function then displaying the results in plot. I am getting this error when I try to compute the t-test : Error

This is the code what I wrote it:

labels <- list('non-failing heart (NF)', 'failing heart (F)')

data <- read.csv("data.csv", header=T)
data[data == 'NA_integer_'] <- NA
t.test(data$NF, data$F)

df <- setNames(do.call(rbind.data.frame, 
                       lapply(data, function(d) data.frame(d[1], d[2]))),
              labels)    

                           
                           
results <- t.test(data$`non-failing heart (NF)`, data$`failing heart (F)`)


                           
results$statistic
results$estimate
results$p.value
                           

df$NF <- df$`non-failing heart (NF)`
df$F <- df$`failing heart (F)`
boxplot(df[3:4],
        data=df,
        cex.lab=0.65,
        xlab="Fig. 1 IRE binding activity for non-failing (NF) and failing (F) hearts.",
        ylab="IRE binding activity (%)",
        col="orange",
        border="brown",
        ylim = c(0, 120)
)

Sample Data

structure(list(NF = c("99", "96", "100", "105", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_" ), F = c(52L, 40L, 38L, 18L, 11L, 5L, 42L, 55L, 53L, 39L)), row.names = c(NA, 10L), class = "data.frame")

Update

After converting the data to numeric as mention in the answer, I am getting this error: Error 2


Solution

  • First you need to turn all the columns to numeric type:

    # add this line after data[data == 'NA_integer_']
    library(tidyverse)
    data %>% mutate_all(as.numeric) -> data
    

    And change the data.frame you're using to calculate result

    # change data with df
    results <- t.test(df$`non-failing heart (NF)`, df$`failing heart (F)`)
    

    full code:

    library(tidyverse)
    #> Warning: package 'tibble' was built under R version 3.6.2
    #> Warning: package 'purrr' was built under R version 3.6.2
    #> Warning: package 'dplyr' was built under R version 3.6.2
    labels <- list('non-failing heart (NF)', 'failing heart (F)')
    
    data<-structure(list(NF = c("99", "96", "100", "105", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_", "NA_integer_" ), F = c(52L, 40L, 38L, 18L, 11L, 5L, 42L, 55L, 53L, 39L)), row.names = c(NA, 10L), class = "data.frame")
    data[data == 'NA_integer_'] <- NA
    data %>% mutate_all(as.numeric) -> data
    t.test(data$NF, data$F)
    #> 
    #>  Welch Two Sample t-test
    #> 
    #> data:  data$NF and data$F
    #> t = 10.866, df = 10.695, p-value = 4.118e-07
    #> alternative hypothesis: true difference in means is not equal to 0
    #> 95 percent confidence interval:
    #>  51.54831 77.85169
    #> sample estimates:
    #> mean of x mean of y 
    #>     100.0      35.3
    df <- setNames(do.call(rbind.data.frame, 
                           lapply(data, function(d) data.frame(d[1], d[2]))),
                  labels)    
    
                               
                               
    results <- t.test(df$`non-failing heart (NF)`, df$`failing heart (F)`)
    
                               
    results$statistic
    #>         t 
    #> 0.2051717
    results$estimate
    #> mean of x mean of y 
    #>      75.5      68.0
    results$p.value
    #> [1] 0.8569285
                               
    
    df$NF <- df$`non-failing heart (NF)`
    df$F <- df$`failing heart (F)`
    boxplot(df[3:4],
            data=df,
            cex.lab=0.65,
            xlab="Fig. 1 IRE binding activity for non-failing (NF) and failing (F) hearts.",
            ylab="IRE binding activity (%)",
            col="orange",
            border="brown",
            ylim = c(0, 120)
    )