Search code examples
rggplot2r-forestplotforest-plots

Match the text formatting on a forest plot


I have created the following forest plot using ggforestplot. However, I would like the 'p-trend' values (QM_P_trend) that appear on the plots to match the text style and size of the y-axis labels (i.e., the 'Analyte' values).

enter image description here

Currently, I am adding the p-trend values using geom_text(), but they do not match the formatting of the y-axis labels.

How can I modify my code to ensure that the p-trend values align with the y-axis labels in both size and style?

Here is my code and example dataset for reference:

###################################################################################
# CREATE DATA -------------------------------------------------------------------#
###################################################################################

data <- structure(list(Carrier_comparison = c("AvsB", "AvsB", "AvsB", "CvsB", 
                                              "CvsB", "CvsB", "AvsB", "AvsB", "AvsB", "CvsB", "CvsB", "CvsB", 
                                              "AvsB", "AvsB", "AvsB", "CvsB", "CvsB", "CvsB"), 
                       Tertile = structure(c(3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L), 
                       levels = c("Tertile 3 (63-70)", "Tertile 2 (55-62)", "Tertile 1 (40-54)"), 
                       class = "factor"), Assay = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), 
                       levels = c("Assay_1","Assay_2", "Assay_3"), class = "factor"), 
                       BETA = c(0.0536645935253193, 0.00934331381571756, -0.0502503436470944, 0.00816704351606194, 
                                0.0946005745129161, -0.0749759598409221, -0.0600444465527413, 0.0601560795464937, 
                                -0.0640637292430954, 0.0219960172083058, -0.148417379449254, -0.0346517428785544, 
                                0.118618573404726, 0.0822558056705285, 0.15602529221275, -0.0326073326703257, 
                                -0.054029294502272, -0.0346020391169203), 
                       SE = c(0.11057661927178, 0.113129029834029, 0.126265725049744, 
                              0.0823378264579776, 0.0866601604863716, 0.100479295994743, 0.102816408820442, 
                              0.106832143822942, 0.117433994065687, 0.0774019365360251, 0.0821098859062016, 
                              0.0932959318751491, 0.102738955735482, 0.108841714570024, 0.118096903894084, 
                              0.0781497668089097, 0.0811784837707031, 0.0931611680323888), 
                       P_uncorrected = c(0.627642070133785, 0.934211300388773, 0.690846714020907, 
                              0.921016957706137, 0.275432313055544, 0.455911706235102, 
                              0.559432213622232, 0.573597978019841, 0.585625011701019, 
                              0.776350390578681, 0.0711164128651439, 0.710460149142023, 
                              0.248707272854769, 0.450125272681661, 0.187040523394829, 
                              0.676619698998927, 0.505916944765656, 0.71045895979309), 
                        QM_P_trend = c(0.545836080293994, 0.545836080293994, 0.545836080293994, 
                                       0.7479209500248, 0.7479209500248, 0.7479209500248, 0.867112096895725, 
                                       0.867112096895725, 0.867112096895725, 0.57166587344432, 0.57166587344432, 
                                       0.57166587344432, 0.880298751128355, 0.880298751128355, 0.880298751128355, 
                                       0.949181439539369, 0.949181439539369, 0.949181439539369)), 
                                       row.names = c(NA, -18L), class = "data.frame")


###################################################################################
# CREATE PLOTS -------------------------------------------------------------------#
###################################################################################

# Reverse the order of Tertile levels (to ensure ascending order on plots)
data <- data %>% 
  mutate(Tertile = factor(Tertile, levels = rev(levels(Tertile))))

# Calculate LCI and UCI (95% Confidence Intervals)
data <- data %>%
  mutate(
    LCI = BETA - (1.96 * SE),
    UCI = BETA + (1.96 * SE)
  )

cat("Plot the data...\n\n")

# Create the forest plot function
forest_plot_data <- function(proteins, data, main_comparison) {
  # Define the desired order for Carrier_comparison based on main_comparison
  comparison_levels <- if (main_comparison == "A") {
    c("AvsB", "CvsB")
  } else {
    c("CvsB", "AvsB")
  }
  
  # Filter data for selected proteins
  data <- data %>%
    filter(Assay %in% proteins) 
  
  # Ensure the Carrier_comparison is a factor with the desired level order
  data$Carrier_comparison <- factor(data$Carrier_comparison, levels = comparison_levels)
  
  # Calculate x-axis limits
  max_abs_limit <- max(abs(c(data$LCI, data$UCI)), na.rm = TRUE)
  x_limits <- c(-max_abs_limit, 1.3*max_abs_limit)
  
  # Plot
  plot <- ggforestplot::forestplot(
    df = data,
    name = Assay,  # Use combined label for y-axis
    estimate = BETA,
    se = SE,
    pvalue = P_uncorrected, 
    psignif = 0.01, # fill estimate if significant at this level
    ci = 0.95,
    logodds = FALSE,
    xlab = "Beta (95% confidence intervals)",
    ylab="Analyte",
    colour = Tertile,
    shape = Tertile,
    position = position_dodge(width = 0.8)  # Increase dodge width for better separation
  ) +
    facet_grid(. ~ Carrier_comparison, scales = "free", space = "free") +
    scale_x_continuous(limits = x_limits) +
    theme(
      text = element_text(family = "Arial", size = 12, color = "black"),  # Change "Arial" to your preferred font
      strip.text = element_text(size = 16, color = "black", hjust = 0.5, vjust = 3.5, margin = margin(t = 12, b = 10)),
      axis.text.x = element_text(size = 12, color = "black"),
      axis.text.y = element_text(size = 12, color = "black", margin = margin(l = 15)),
      axis.title.x = element_text(size = 14, color = "black", vjust = 0.5, margin = margin(t = 5, b = 5)),
      axis.title.y = element_text(size = 14, color = "black", face = "bold", angle = 0, vjust = 1.01, margin = margin(r = -70)),  # Rotated y-axis title
      legend.title = element_text(size = 14, color = "black"),
      legend.text = element_text(size = 12, color = "black"),  # Customize legend labels
      legend.key.size = unit(1.5, "lines")
    ) + 
    geom_text(
      data = data.frame(Carrier_comparison = comparison_levels, # Add "p-value" label above p-values in each panel
                        label = "bolditalic('p') * bold('-trend')"),  # Correct expression for bold and italic
      aes(x = 1.15*max_abs_limit, y = Inf, label = label),
      vjust = 1, 
      parse = TRUE  # Use parse = TRUE to interpret the label as an expression
    )   + 
    geom_text(
      aes(x = 1.15*max_abs_limit, y = Assay, label = sprintf("%.3f", QM_P_trend)),  # Display QM_P_trend values
      vjust = 0.5
    ) +
    scale_y_discrete(expand = expansion(add = c(0.5, 1.3)))  # Add extra space at the top and bottom of the y-axis
  
  # Return plot
  return(plot)
}

# Create function to identify 'top analytes' 
proteins_list <- levels(data$Assay)

# Generate plots
CvsB_forest_plot <- forest_plot_data(proteins_list, data, main_comparison = "C") 

# View plot
CvsB_forest_plot


Solution

  • The issue with the boldish style of the labels is due to the fact that you add the p-value labels multiple times, i.e. once for each tertile. To fix that you can use e.g. dplyr::distinct to filter your data for unique values. Concerning the size, use the same size for your labels as for the axis text:

    library(dplyr)
    library(ggplot2)
    library(ggforestplot)
    
    # Create the forest plot function
    forest_plot_data <- function(proteins, data, main_comparison) {
      # Define the desired order for Carrier_comparison based on main_comparison
      comparison_levels <- if (main_comparison == "A") {
        c("AvsB", "CvsB")
      } else {
        c("CvsB", "AvsB")
      }
    
      # Filter data for selected proteins
      data <- data %>%
        filter(Assay %in% proteins)
    
      # Ensure the Carrier_comparison is a factor with the desired level order
      data$Carrier_comparison <- factor(data$Carrier_comparison, levels = comparison_levels)
    
      # Calculate x-axis limits
      max_abs_limit <- max(abs(c(data$LCI, data$UCI)), na.rm = TRUE)
      x_limits <- c(-max_abs_limit, 1.3 * max_abs_limit)
    
      # Plot
      plot <- ggforestplot::forestplot(
        df = data,
        name = Assay, # Use combined label for y-axis
        estimate = BETA,
        se = SE,
        pvalue = P_uncorrected,
        psignif = 0.01, # fill estimate if significant at this level
        ci = 0.95,
        logodds = FALSE,
        xlab = "Beta (95% confidence intervals)",
        ylab = "Analyte",
        colour = Tertile,
        shape = Tertile,
        position = position_dodge(width = 0.8) # Increase dodge width for better separation
      ) +
        facet_grid(. ~ Carrier_comparison, scales = "free", space = "free") +
        scale_x_continuous(limits = x_limits) +
        theme(
          text = element_text(family = "Arial", size = 12, color = "black"), # Change "Arial" to your preferred font
          strip.text = element_text(size = 16, color = "black", hjust = 0.5, vjust = 3.5, margin = margin(t = 12, b = 10)),
          axis.text.x = element_text(size = 12, color = "black"),
          axis.text.y = element_text(size = 12, color = "black", margin = margin(l = 15)),
          axis.title.x = element_text(size = 14, color = "black", vjust = 0.5, margin = margin(t = 5, b = 5)),
          axis.title.y = element_text(size = 14, color = "black", face = "bold", angle = 0, vjust = 1.01, margin = margin(r = -70)), # Rotated y-axis title
          legend.title = element_text(size = 14, color = "black"),
          legend.text = element_text(size = 12, color = "black"), # Customize legend labels
          legend.key.size = unit(1.5, "lines")
        ) +
        geom_text(
          data = data.frame(
            Carrier_comparison = comparison_levels, # Add "p-value" label above p-values in each panel
            label = "bolditalic('p') * bold('-trend')"
          ), # Correct expression for bold and italic
          aes(x = 1.15 * max_abs_limit, y = Inf, label = label),
          vjust = 1,
          parse = TRUE # Use parse = TRUE to interpret the label as an expression
        ) +
        geom_text(
          data = distinct(data, Assay, Carrier_comparison, QM_P_trend),
          aes(x = 1.15 * max_abs_limit, y = Assay, label = sprintf("%.3f", QM_P_trend)), # Display QM_P_trend values
          vjust = 0.5, size = 12 / .pt
        ) +
        scale_y_discrete(expand = expansion(add = c(0.5, 1.3))) # Add extra space at the top and bottom of the y-axis
    
      # Return plot
      return(plot)
    }
    
    # Generate plots
    CvsB_forest_plot <- forest_plot_data(proteins_list, data, main_comparison = "C")
    
    # View plot
    CvsB_forest_plot
    

    enter image description here