Search code examples
rggplot2drawing

How to draw vertical curly braces in a ggplot


I would like to include two vertical braces in the same way as in the picture. I have been trying many things but they don't seem to work out. enter image description here

Below I leave the code I am using, basically I create my own data, and plot a series of plots that I want to show students, and I want to introduce the braces in the last ggplot.

library(ggplot2)
library(ggbrace)
set.seed(127) #   Set seed for reproducibility

# Generate data for the first plot
x <- runif(100, 0, 10)
y <- 3 + 1.5 * x + rnorm(100, mean = 0, sd = 4)
data <- data.frame(x = x, y = y)

# First plot: points with regression line
ggplot(data, aes(x = x, y = y)) +
  geom_point(color = "gray") +
  geom_smooth(method = "lm", color = "black", se = FALSE) +
  ggtitle("First Plot: Points with Regression Line")  +
  labs(title = "Population and sample regression Lines",
       x = "Years of education",
       y = "Log of earnings") +
  theme_minimal() +
  theme(legend.position = "none") 

# Sample points for the second plot
sample_indices1 <- sample(1:100, 50)
x_sample1 <- x[sample_indices1]
y_sample1 <- y[sample_indices1]
data_sample1 <- data.frame(x = x_sample1, y = y_sample1)

# Second plot: sample points without new regression line
ggplot(data, aes(x = x, y = y)) +
  geom_point(color = "gray") +
  geom_point(data = data_sample1, aes(x = x, y = y), color = "#F8766D") +
  geom_smooth(method = "lm", color = "black", se = FALSE) +
  labs(title = "Population and sample regression Lines",
       x = "Years of education",
       y = "Log of earnings") +
  theme_minimal() +
  theme(legend.position = "none") 

# Third plot: sample points with new regression line
ggplot(data, aes(x = x, y = y)) +
  geom_point(color = "gray") +
  geom_point(data = data_sample1, aes(x = x, y = y), color = "#F8766D") +
  geom_smooth(method = "lm", color = "black", se = FALSE) +
  geom_smooth(data = data_sample1, method = "lm", color = "#F8766D", se = FALSE) +
  labs(title = "Population and sample regression Lines",
       x = "Years of education",
       y = "Log of earnings") +
  theme_minimal() +
  theme(legend.position = "none") 

# Sample points for the fourth plot
sample_indices2 <- sample(1:100, 50)
x_sample2 <- x[sample_indices2]
y_sample2 <- y[sample_indices2]
data_sample2 <- data.frame(x = x_sample2, y = y_sample2)

# Fourth plot: another sample with regression line
ggplot(data, aes(x = x, y = y)) +
  geom_point(color = "gray") +
  geom_point(data = data_sample2, aes(x = x, y = y), color = "#00BFC4") +
  geom_smooth(method = "lm", color = "black", se = FALSE) +
  geom_smooth(data = data_sample2, method = "lm", color = "#00BFC4", se = FALSE) +
  labs(title = "Population and sample regression Lines",
       x = "Years of education",
       y = "Log of earnings") +
  theme_minimal() +
  theme(legend.position = "none") 

# Fifth plot: all three regressions and points
ggplot(data, aes(x = x, y = y)) +
  geom_point(color = "gray") +
  geom_point(data = data_sample1, aes(x = x, y = y), color = "#F8766D") +
  geom_point(data = data_sample2, aes(x = x, y = y), color = "#00BFC4") +
  geom_smooth(method = "lm", color = "black", se = FALSE) +
  geom_smooth(data = data_sample1, method = "lm", color = "#F8766D", se = FALSE) +
  geom_smooth(data = data_sample2, method = "lm", color = "#00BFC4", se = FALSE) +
  labs(title = "Population and sample regression Lines",
       x = "Years of education",
       y = "Log of earnings") +
  theme_minimal() +
  theme(legend.position = "none") 


# Sixth plot: sample points with new regression line
ggplot(data, aes(x = x, y = y)) +
  geom_point(color = "gray") +
  geom_point(data = data_sample1, aes(x = x, y = y), color = "#F8766D") +
  geom_smooth(method = "lm", color = "black", se = FALSE) +
  geom_smooth(data = data_sample1, method = "lm", color = "#F8766D", se = FALSE) +
  labs(title = "Population and sample regression Lines",
       x = "Years of education",
       y = "Log of earnings") +
  theme_minimal() +
  theme(legend.position = "none") +
  annotate("text", x = 1, y = 4.5, label = expression(u[i] == y[i] - beta[0] - beta[1] * x[i]), parse = TRUE,size=3.5, col="#F8766D") +
  stat_brace(aes(x=c(2.8,3.3), y=c(.9,7.5)), rotate=270, size=.5, col="#F8766D") +
  annotate("text", x = 4.5, y = 3.5, label = expression(hat(u)[i] == y[i] - hat(beta)[0] - hat(beta)[1] * x[i]), parse = TRUE,size=3.5,col="#F8766D") +
  stat_brace(aes(x=c(2.2,2.7), y=c(.9,5.3)), inherit.data=T, rotate=90, size=.5, col="#F8766D")

Solution

  • To make your code work put the coordinates for your braces in a data.frame and pass it to the data= argument of stat_brace instead of mapping vectors on the x and y aes which will result in an error as the length of the vectors differs from the number of rows of the global data.

    library(ggplot2)
    library(ggbrace)
    
    ggplot(data, aes(x = x, y = y)) +
      geom_point(color = "gray") +
      geom_point(data = data_sample1, aes(x = x, y = y), color = "#F8766D") +
      geom_smooth(method = "lm", color = "black", se = FALSE) +
      geom_smooth(data = data_sample1, method = "lm", color = "#F8766D", se = FALSE) +
      labs(
        title = "Population and sample regression Lines",
        x = "Years of education",
        y = "Log of earnings"
      ) +
      theme_minimal() +
      theme(legend.position = "none") +
      annotate("text",
        x = 1, y = 4.5,
        label = expression(u[i] == y[i] - beta[0] - beta[1] * x[i]),
        parse = TRUE, size = 3.5, col = "#F8766D"
      ) +
      stat_brace(
        data = data.frame(x = c(2.8, 3.3), y = c(.9, 7.5)),
        aes(x, y),
        rotate = 270, size = .5, col = "#F8766D"
      ) +
      annotate("text",
        x = 4.5, y = 3.5,
        label = expression(hat(u)[i] == y[i] - hat(beta)[0] - hat(beta)[1] * x[i]),
        parse = TRUE, size = 3.5, col = "#F8766D"
      ) +
      stat_brace(
        data = data.frame(x = c(2.2, 2.7), y = c(.9, 5.3)),
        aes(x, y), rotate = 90, size = .5, col = "#F8766D"
      )
    

    enter image description here