Search code examples
rggplot2axis-labelsyaxis

Change the y-axis labels after using scale_y_break in R


I have data on Percentage Change from Baseline for two groups. To visualize the data, I created a boxplot and added the number of observations at the bottom using stat_summary. Additionally, I conducted a test to assess if the Percentage Change from Baseline is significantly different from zero for each group.

I attempted to break the y-axis into two parts, but after trying to modify the labels of the y-axis using scale_y_continuous, I encountered an issue: the y-axis is displayed twice, on both the right and left sides. How can I modify the labels of the y-axis without showing it twice?

Here are my data and my attempt in R:

set.seed(200)
# data
df <- data.frame(
  G1 = c(rep("A", 8), rep("B", 30)),
  G2 = c(rnorm(8, mean = 2, sd = 10), rnorm(29, mean = 50, sd = 20), 1000)
)

n_fun <- function(x){
  return(data.frame(y = min(df$G2) * 5, # adjusted line
                    label = paste("N=", length(x), "\n")))
  
}

library(dplyr)
# calculate the mean and SD by group
mean_sd <- df %>% 
  group_by(G1) %>% 
  summarize(
    rmean = mean(G2),
    rsd = sd(G2)
  )
# ____ Boxplots: 
theme_set(theme_minimal())

# Test whether each group differs from 0
WC_tests = df %>%
  group_by(G1) %>%
  summarise(P = wilcox.test(G2, mu = 0)$p.value,
            Sig = if (P <= 0.0001) {
              "****"} else if (P <= 0.001) {
              "***" } else if (P <= 0.01) {
              "**"  } else if (P <= 0.05) {
              "*"   } else {
              "ns"  },
            MaxWidth = max(G2))

# gplot
ggboxplot(df, x = "G1", y = "G2", color = "G1", add = "jitter", palette = "jco") +
  # Axis and legende
  xlab("") + 
  ylab("Percent Change") +
  theme(legend.key.size = unit(2.5, "lines")) +
  scale_y_break(c(100, 980)) + # BREAK
  scale_y_continuous(breaks = c(0,50,100, 980, 1000), labels = c(0,50,100, 980, 1000)) + 
  theme(axis.text.x = element_text(angle = 0, size = 9),
        axis.text.y = element_text(size = 10),
        # Remove labels on the right side
        axis.text.y.right = element_blank()) +
  
  # Sample size
  stat_summary(fun.data = n_fun, geom = "text",
               aes(group = G1), hjust = 0.8,
               position = position_dodge(0.9), size = 3) +
  
  # # Wilcoxon test mean comparaison
  # Use the prepared table of test results as data for the geom
  geom_text(aes(label = Sig, y = max(df$G2) + 3), size = 3,
            data = WC_tests)+
  #  y = 0
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray") +
  
  # Mean and sd
  geom_text(data = mean_sd, aes(x = G1, y = 990,
                                label = paste("Mean(SD):",
                                              round(rmean, 2),
                                              "(",
                                              round(rsd, 2),
                                              ")")),
            color = "black")

enter image description here

enter image description here

Thank you! Shaima


Solution

  • You could use the ticklabels argument from scale_y_break. Then you don't need the scale_y_continuous function to prevent two y-axis like this:

    library(ggpubr)
    library(ggbreak)
    ggboxplot(df, x = "G1", y = "G2", color = "G1", add = "jitter", palette = "jco") +
      # Axis and legende
      xlab("") + 
      ylab("Percent Change") +
      theme(legend.key.size = unit(2.5, "lines")) +
      scale_y_break(c(100, 980), ticklabels = c(0,50,100, 980, 1000)) + # BREAK
      #scale_y_continuous(breaks = c(0,50,100, 980, 1000), labels = c(0,50,100, 980, 1000)) + 
      theme(axis.text.x = element_text(angle = 0, size = 9),
            axis.text.y = element_text(size = 10),
            # Remove labels on the right side
            axis.text.y.right = element_blank()) +
      
      # Sample size
      stat_summary(fun.data = n_fun, geom = "text",
                   aes(group = G1), hjust = 0.8,
                   position = position_dodge(0.9), size = 3) +
      
      # # Wilcoxon test mean comparaison
      # Use the prepared table of test results as data for the geom
      geom_text(aes(label = Sig, y = max(df$G2) + 3), size = 3,
                data = WC_tests)+
      #  y = 0
      geom_hline(yintercept = 0, linetype = "dashed", color = "gray") +
      
      # Mean and sd
      geom_text(data = mean_sd, aes(x = G1, y = 990,
                                    label = paste("Mean(SD):",
                                                  round(rmean, 2),
                                                  "(",
                                                  round(rsd, 2),
                                                  ")")),
                color = "black")
    

    enter image description here

    Created on 2024-03-18 with reprex v2.0.2