Search code examples
rscatter-plotabline

Error plotting lines on a graph made in R


Could you help me solve following issue:

I have two codes that were made to generate the same scatter plot. The first one works normally, generates the graph and the lines without any problems. It is a code that requires vector i to generate the mean and standard deviation(sd).

Code 2, on the other hand, does not require vector i, but the result is not the desired one regarding the construction of lines in relation to the mean and sd. In my opinion it was to work.

Could the problem be with the ylim?

I hope someone helps me with this! =)

Thank you so much!

First code

library(dplyr)
library(tidyr)
library(lubridate)


data <- structure(
  list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
       date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
       date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
                 "2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
                 "2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
                 "2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
       Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
               "Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
               "Thursday","Friday","Friday","Saturday","Saturday"),
       DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
       D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
       DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
       DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
  class = "data.frame", row.names = c(NA, -21L))

graph <- function(dt, dta = data) {                                        
dim_data<-dim(data)

day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
                length = dim_data[1]
)) 

data_grouped <- data %>%
  mutate(across(starts_with("date"), as.Date)) %>%
  group_by(date2) %>%
  summarise(Id = first(Id),
            date1 = first(date1),
            Week = first(Week),
            DTPE = first(DTPE),
            D1 = sum(D1)) %>%
  select(Id,date1,date2,Week,DTPE,D1)

data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
                                    date2=format(date2,"%d/%m/%Y"))
data_grouped<-data.frame(data_grouped)
data_grouped %>% 
  mutate(DTPE = na_if(DTPE, ""))

DS=c("Thursday","Friday","Saturday") 

i<-2

df_OC<-subset(data_grouped, DTPE == "")

ds_CO<-subset(df_OC,df_OC$Week==DS[i])

mean<-mean(as.numeric(ds_CO[,"D1"]) )
sd<-sd(as.numeric(ds_CO[,"D1"]))
    

  
  dta %>%
    filter(date2 == ymd(dt)) %>%
    summarize(across(starts_with("DR"), sum)) %>%
    pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
    mutate(name = as.numeric(name)) %>%
    plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5, 
         cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
  abline(h=mean, col='blue') +
    abline(h=(mean + sd), col='green',lty=2) 
  abline(h=(mean - sd), col='orange',lty=2)
  
}  
graph("2021-04-09",data)

enter image description here

Second code

library(dplyr)
library(tidyr)
library(lubridate)


data <- structure(
  list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
       date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                 "2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
       date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
                 "2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
                 "2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
                 "2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
       Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
               "Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
               "Thursday","Friday","Friday","Saturday","Saturday"),
       DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
       D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
       DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
       DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
  class = "data.frame", row.names = c(NA, -21L))


graph <- function(dt, dta = data) {
  
dim_data<-dim(data)

day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
                length = dim_data[1]
)) 

data_grouped <- data %>%
  mutate(across(starts_with("date"), as.Date)) %>%
  group_by(date2) %>%
  summarise(Id = first(Id),
            date1 = first(date1),
            Week = first(Week),
            DTPE = first(DTPE),
            D1 = sum(D1)) %>%
  select(Id,date1,date2,Week,DTPE,D1)

data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
                                        date2=format(date2,"%d/%m/%Y"))
data_grouped<-data.frame(data_grouped)
data_grouped %>% 
  mutate(DTPE = na_if(DTPE, ""))

# get the week day
  
my_day <- weekdays(as.Date(dt))

df_OC<-subset(data_grouped, DTPE == "")

ds_CO<-subset(df_OC,df_OC$Week == my_day)

mean<-mean(as.numeric(ds_CO[,"D1"]) )
sd<-sd(as.numeric(ds_CO[,"D1"]))


dta %>%
  filter(date2 == ymd(dt)) %>%
  summarize(across(starts_with("DR"), sum)) %>%
  pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
  mutate(name = as.numeric(name)) %>%
  plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5, 
       cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
abline(h=mean, col='blue') +
  abline(h=(mean + sd), col='green',lty=2) 
abline(h=(mean - sd), col='orange',lty=2)

}  
graph("2021-04-09",data)

enter image description here


Solution

  • You've messed up a lot with these data transformations. Below, however, I present my code that works according to your expectations.

    The main problem here was my_day <- weekdays (as.Date (dt)), In my system I was getting "piątek" and you didn't have such a day in your data, right?

    library(dplyr)
    library(tidyr)
    library(lubridate)
    
    
    data <- structure(
      list(Id=c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
           date1 = c("2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                     "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                     "2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20","2021-06-20",
                     "2021-06-20","2021-06-20","2021-06-20","2021-06-20"),
           date2 = c("2021-07-01","2021-07-01","2021-07-01","2021-07-01","2021-04-02",
                     "2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-02","2021-04-03",
                     "2021-04-03","2021-04-03","2021-04-03","2021-04-03","2021-04-08","2021-04-08",
                     "2021-04-09","2021-04-09","2021-04-10","2021-04-10"),
           Week= c("Thursday","Thursday","Thursday","Thursday","Friday","Friday","Friday","Friday",
                   "Friday","Friday","Saturday","Saturday","Saturday","Saturday","Saturday","Thursday",
                   "Thursday","Friday","Friday","Saturday","Saturday"),
           DTPE = c("Ho","Ho","Ho","Ho","","","","","","","","","","","","","","","","Ho","Ho"),
           D1 = c(8,1,9, 3,5,4,7,6,3,8,2,3,4,6,7,8,4,2,6,2,3), DR01 = c(4,1,4,3,3,4,3,6,3,7,2,3,4,6,7,8,4,2,6,7,3),
           DR02 = c(8,1,4,3,3,4,1,6,3,7,2,3,4,6,7,8,4,2,6,2,3), DR03 = c(7,5,4,3,3,4,1,5,3,3,2,3,4,6,7,8,4,2,6,4,3),
           DR04= c(4,5,6,7,3,2,7,4,2,1,2,3,4,6,7,8,4,2,6,4,3),DR05 = c(9,5,4,3,3,2,1,5,3,7,2,3,4,7,7,8,4,2,6,4,3)),
      class = "data.frame", row.names = c(NA, -21L))
    
    
    graph <- function(dt, dta = data) {
      
      dim_data<-dim(data)
      
      day<-c(seq.Date(from = as.Date(data$date2[1]), by = "days",
                      length = dim_data[1]
      )) 
      
      data_grouped <- data %>%
        mutate(across(starts_with("date"), as.Date)) %>%
        group_by(date2) %>%
        summarise(Id = first(Id),
                  date1 = first(date1),
                  Week = first(Week),
                  DTPE = first(DTPE),
                  D1 = sum(D1)) %>%
        select(Id,date1,date2,Week,DTPE,D1)
      
      #data_grouped <- data_grouped %>% mutate(date1=format(date1,"%d/%m/%Y"),
      #                                        date2=format(date2,"%d/%m/%Y"))
      #data_grouped<-data.frame(data_grouped)
      data_grouped %>% 
        mutate(DTPE = na_if(DTPE, ""))
      
      # get the week day
      
      #my_day <- weekdays(as.Date(dt))
      
      df_OC<-subset(data_grouped, DTPE == "")
      ds_CO = df_OC %>% filter(weekdays(date2) %in% weekdays(as.Date(dt)))
      #ds_CO<-subset(df_OC,df_OC$Week == my_day)
      
      mean<-mean(ds_CO$D1)
      sd<-sd(ds_CO$D1)
      
      
      dta %>%
        filter(date2 == ymd(dt)) %>%
        summarize(across(starts_with("DR"), sum)) %>%
        pivot_longer(everything(), names_pattern = "DR(.+)", values_to = "val") %>%
        mutate(name = as.numeric(name)) %>%
        plot(xlab = "Days", ylab = "Number", xlim = c(0, 45),cex=1.5,cex.lab=1.5, 
             cex.axis=1.5, cex.main=2, cex.sub=2, lwd=2.5, ylim = c((min(.$val) %/% 10) * 15, (max(.$val) %/% 10 + 1) * 100))
      abline(h=mean, col='blue') +
        abline(h=(mean + sd), col='green',lty=2) 
      abline(h=(mean - sd), col='orange',lty=2)
      
    }  
    graph("2021-04-09",data)
    
    

    Finally, I recommend:

    1. Keep your data in tibble,
    2. do not unnecessarily transform the date into a string several times and vice versa,
    3. use ggplo2. The charts will be much nicer.