Search code examples
rfunctionfor-loopstat

How to set the stat_function in for loop to plot two graphs with normal distribution, central and variance parameters


I would like to create the following plots in parallel

enter image description here

I have used the following code using the wide format dataset:

sumstatz_1 <- data.frame(whichstat = c("mean",
                                     "sd upr", 
                                     "sd lwr", 
                                     "median"),
                       value     = c(mean(data$score),
                                     mean(data$score)+sd(data$score),
                                     mean(data$score)-sd(data$score), 
                                     median(data$score)))


plot2 = ggplot(data, aes(x = score)) +                           
  geom_histogram(aes(y =..density..),
                 breaks = seq(0, max(data$score), by = 5), 
                 colour = "black", 
                 fill = "white") + stat_function(fun = dnorm, 
                                   args = list(mean = mean(data$score, na.rm = TRUE), 
                                   sd = sd(data$score, na.rm = TRUE)), 
                                   colour = 'black', size = 1) + 
  labs(title='score', x='score', y= 'Distribution') +
  geom_vline(data=sumstatz_1,aes(xintercept = value,
                               linetype = whichstat,
                               col = whichstat),size=1)

I have taken it by changing just the variable of interest to create the second graph. Anyway, I would like to create the same result by using an interactive graph. Here I have set up the following code that I have converted into a long format for convenience and then I have coded the following for loop:

for (i in 101:ncol(long)) {
    p <- ggplot(long, aes(x = points)) +                           
      geom_histogram(aes(y =..density..), 
                     breaks = seq(0, 50, by = 3), 
                     colour = "black", 
                     fill = "white") + facet_grid(.~ score)
} for (j in seq_along(long$score)){
   p +
      stat_function(fun = dnorm[???], 
                    args = list(mean = mean(long$points[long$score == 'j'], na.rm = TRUE), 
                                sd = mean(long$points[long$score == 'j'], na.rm = TRUE)), 
                    colour = 'black', size = 1)
  }

print(p)

But I have no clue how to set parameters in stat_function() nor wether it is possible to use in a for loop or another iterative method. Would you have possibly any suggestion?

Here the dataset

structure(list(ID = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 
7, 8, 8, 9, 9, 10, 10), score = structure(list(MM_score = c("score_2", 
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2", 
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2", 
"score_1", "score_2", "score_1", "score_2", "score_1", "score_2", 
"score_1")), row.names = c(NA, -20L), class = c("tbl_df", "tbl", 
"data.frame")), points = c(53, 13.25, 17.5, 1.59090909090909, 
48.5, 6.92857142857143, 40, 3.63636363636364, 46, 7.07692307692308, 
38, 4.47058823529412, 14.5, 1.61111111111111, 19.5, 3.54545454545455, 
37.5, 3.40909090909091, 5.5, 0.916666666666667)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L), groups = structure(list(
    ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), .rows = structure(list(
        1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 13:14, 15:16, 17:18, 
        19:20), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -10L), .drop = TRUE))

Solution

  • Try using this code:

    
    dados <- structure(list(ID = c(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 
    7, 8, 8, 9, 9, 10, 10), score = structure(list(MM_score = c("score_2", 
    "score_1", "score_2", "score_1", "score_2", "score_1", "score_2", 
    "score_1", "score_2", "score_1", "score_2", "score_1", "score_2", 
    "score_1", "score_2", "score_1", "score_2", "score_1", "score_2", 
    "score_1")), row.names = c(NA, -20L), class = c("tbl_df", "tbl", 
    "data.frame")), points = c(53, 13.25, 17.5, 1.59090909090909, 
    48.5, 6.92857142857143, 40, 3.63636363636364, 46, 7.07692307692308, 
    38, 4.47058823529412, 14.5, 1.61111111111111, 19.5, 3.54545454545455, 
    37.5, 3.40909090909091, 5.5, 0.916666666666667)), class = c("grouped_df", 
    "tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L), groups = structure(list(
        ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), .rows = structure(list(
            1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 13:14, 15:16, 17:18, 
            19:20), ptype = integer(0), class = c("vctrs_list_of", 
        "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
    ), row.names = c(NA, -10L), .drop = TRUE))
    
    dados <- dados %>% ungroup() %>% mutate(score = factor(score$MM_score))
    
    
    grid <- with(dados, seq(min(points), max(points), length = 100))
    normaldens <- data.frame()
    sumstatz_1 <- data.frame()
    for(i in levels(dados$score)){
      aux <- dados %>% filter(score == i) %>% 
        summarise(mean = mean(points), sd = sd(points), median = median(points))
      normaldens <- rbind(normaldens,data.frame(score = rep(i,100),
                                                points = grid,
                                                density = dnorm(grid, aux$mean, aux$sd)))
      sumstatz_1 <- rbind(sumstatz_1,
                          data.frame(score = rep(i,4),
                                     whichstat = c("mean",
                                                   "sd upr", 
                                                   "sd lwr", 
                                                   "median"),
                                     value = c(aux$mean,
                                         aux$mean+aux$sd,
                                         aux$mean-aux$sd, 
                                         aux$median)))
    }
    
    ggplot(dados, aes(x = points))  + 
      geom_histogram(aes(y = ..density..), 
                         breaks = seq(0, 50, by = 3), 
                         colour = "black", 
                         fill = "white") + 
      geom_line(aes(y = density), data = normaldens, colour = "red") +
      geom_vline(data=sumstatz_1,aes(xintercept = value,
                                   linetype = whichstat,
                                   col = whichstat),size=1)+
      facet_wrap(~score) 
    
    

    If you have any questions, please ask me!!