Search code examples
rggplot2

Dynamic geom_text() pastes of labels across facets in fact_wrap()


Defining functions

hb <- function(x) {
  y = ((abs(2*x - 1))^2 * sign(x - .5))+1
  y * 5
}

hc <- function(y) {
  sapply(y, function(x) {
    if (x >= 0.5) {
      return ((1 + sqrt(2 * x - 1)) * 5)
    } else {
      return ((1 - sqrt(1 - 2 * x)) * 5)
    }
  })
}

Applying functions:

set.seed(1018)
tibble(
  G = sample(c("A","B","C"),100000,replace = T),
  A = sample(0:10, 100000, replace = T),
  e = rnorm(100000, 0, sd = .5),
  B = as.integer(hb(A/10) + e + .5), 
  C = as.integer(hc(A/10) + e + .5)
  ) %>%
    mutate(
      Mix = case_when(
    G == "A" ~ A,
    G == "B" ~ B,
    G == "C" ~ C
  )) |>
  select(
    A,B,C,Mix
  ) |>
  pivot_longer(
    cols = everything(),
    names_to = "Model",
    values_to = "Value"
  ) -> db

ggplot (weird output):

db |>
  filter(Value >= 0,
         Value <= 10) |>
  mutate(
    mean = round(mean(Value),2) |> as.character(),
    sd = round(sd(Value),2) |> as.character(),
    .by = Model
  ) |>
  ggplot(aes(x = Value)) +
  geom_histogram(
    binwidth = 1, fill = "blue",
    color = "black", alpha = 0.7) +
  labs(
    x = "x",
    y = "Frequency",
  ) +
  geom_text(
    aes(
      label =
        paste("AVG: ", mean,
                "\n",
                "SD: ", sd,
                "\n")
    ),
      x = Inf, y = Inf,
      hjust = 1, vjust = 1,
  color = "firebrick2",
  size = 3.8,
  fontface = "bold",
  ) +
  scale_x_continuous(breaks = c(0, 5, 10)) +
  facet_wrap(~ Model) +
  theme_minimal(base_size = 13)
  

The question:

This code make kinda dynamic labels but it takes a suspicious amount of time. I suspect it generates 10000 labels... If I suggest to use only first(mean) and first(sd), the result is invariant, but it provides me a warning.

Any way to optimise this?


Solution

  • Create a summary dataframe for your geom_text() that has just 4 rows:

    db <- db |>
      filter(Value >= 0,
        Value <= 10) |>
      mutate(
        mean = round(mean(Value),2) |> as.character(),
        sd = round(sd(Value),2) |> as.character(),
        .by = Model
      ) 
    
    ggplot() +
      geom_histogram(data = db, aes(x = Value),
        binwidth = 1, fill = "blue",
        color = "black", alpha = 0.7) +
      labs(
        x = "x",
        y = "Frequency",
      ) +
      geom_text( data = db %>% summarise(.by = Model, mean = mean(Value), sd = sd(Value)),
        mapping = aes(
          label =
            paste("AVG: ", mean,
              "\n",
              "SD: ", sd,
              "\n")
        ),
        x = Inf, y = Inf,
        hjust = 1, vjust = 1,
        color = "firebrick2",
        size = 3.8,
        fontface = "bold",
      ) +
      scale_x_continuous(breaks = c(0, 5, 10)) +
      facet_wrap(~ Model) +
      theme_minimal(base_size = 13)