I have created the following forest plot using ggforestplot. However, I would like the 'p-trend' values (QM_P_trend) that appear on the plots to match the text style and size of the y-axis labels (i.e., the 'Analyte' values).
Currently, I am adding the p-trend values using geom_text(), but they do not match the formatting of the y-axis labels.
How can I modify my code to ensure that the p-trend values align with the y-axis labels in both size and style?
Here is my code and example dataset for reference:
###################################################################################
# CREATE DATA -------------------------------------------------------------------#
###################################################################################
data <- structure(list(Carrier_comparison = c("AvsB", "AvsB", "AvsB", "CvsB",
"CvsB", "CvsB", "AvsB", "AvsB", "AvsB", "CvsB", "CvsB", "CvsB",
"AvsB", "AvsB", "AvsB", "CvsB", "CvsB", "CvsB"),
Tertile = structure(c(3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L, 3L, 2L, 1L),
levels = c("Tertile 3 (63-70)", "Tertile 2 (55-62)", "Tertile 1 (40-54)"),
class = "factor"), Assay = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L),
levels = c("Assay_1","Assay_2", "Assay_3"), class = "factor"),
BETA = c(0.0536645935253193, 0.00934331381571756, -0.0502503436470944, 0.00816704351606194,
0.0946005745129161, -0.0749759598409221, -0.0600444465527413, 0.0601560795464937,
-0.0640637292430954, 0.0219960172083058, -0.148417379449254, -0.0346517428785544,
0.118618573404726, 0.0822558056705285, 0.15602529221275, -0.0326073326703257,
-0.054029294502272, -0.0346020391169203),
SE = c(0.11057661927178, 0.113129029834029, 0.126265725049744,
0.0823378264579776, 0.0866601604863716, 0.100479295994743, 0.102816408820442,
0.106832143822942, 0.117433994065687, 0.0774019365360251, 0.0821098859062016,
0.0932959318751491, 0.102738955735482, 0.108841714570024, 0.118096903894084,
0.0781497668089097, 0.0811784837707031, 0.0931611680323888),
P_uncorrected = c(0.627642070133785, 0.934211300388773, 0.690846714020907,
0.921016957706137, 0.275432313055544, 0.455911706235102,
0.559432213622232, 0.573597978019841, 0.585625011701019,
0.776350390578681, 0.0711164128651439, 0.710460149142023,
0.248707272854769, 0.450125272681661, 0.187040523394829,
0.676619698998927, 0.505916944765656, 0.71045895979309),
QM_P_trend = c(0.545836080293994, 0.545836080293994, 0.545836080293994,
0.7479209500248, 0.7479209500248, 0.7479209500248, 0.867112096895725,
0.867112096895725, 0.867112096895725, 0.57166587344432, 0.57166587344432,
0.57166587344432, 0.880298751128355, 0.880298751128355, 0.880298751128355,
0.949181439539369, 0.949181439539369, 0.949181439539369)),
row.names = c(NA, -18L), class = "data.frame")
###################################################################################
# CREATE PLOTS -------------------------------------------------------------------#
###################################################################################
# Reverse the order of Tertile levels (to ensure ascending order on plots)
data <- data %>%
mutate(Tertile = factor(Tertile, levels = rev(levels(Tertile))))
# Calculate LCI and UCI (95% Confidence Intervals)
data <- data %>%
mutate(
LCI = BETA - (1.96 * SE),
UCI = BETA + (1.96 * SE)
)
cat("Plot the data...\n\n")
# Create the forest plot function
forest_plot_data <- function(proteins, data, main_comparison) {
# Define the desired order for Carrier_comparison based on main_comparison
comparison_levels <- if (main_comparison == "A") {
c("AvsB", "CvsB")
} else {
c("CvsB", "AvsB")
}
# Filter data for selected proteins
data <- data %>%
filter(Assay %in% proteins)
# Ensure the Carrier_comparison is a factor with the desired level order
data$Carrier_comparison <- factor(data$Carrier_comparison, levels = comparison_levels)
# Calculate x-axis limits
max_abs_limit <- max(abs(c(data$LCI, data$UCI)), na.rm = TRUE)
x_limits <- c(-max_abs_limit, 1.3*max_abs_limit)
# Plot
plot <- ggforestplot::forestplot(
df = data,
name = Assay, # Use combined label for y-axis
estimate = BETA,
se = SE,
pvalue = P_uncorrected,
psignif = 0.01, # fill estimate if significant at this level
ci = 0.95,
logodds = FALSE,
xlab = "Beta (95% confidence intervals)",
ylab="Analyte",
colour = Tertile,
shape = Tertile,
position = position_dodge(width = 0.8) # Increase dodge width for better separation
) +
facet_grid(. ~ Carrier_comparison, scales = "free", space = "free") +
scale_x_continuous(limits = x_limits) +
theme(
text = element_text(family = "Arial", size = 12, color = "black"), # Change "Arial" to your preferred font
strip.text = element_text(size = 16, color = "black", hjust = 0.5, vjust = 3.5, margin = margin(t = 12, b = 10)),
axis.text.x = element_text(size = 12, color = "black"),
axis.text.y = element_text(size = 12, color = "black", margin = margin(l = 15)),
axis.title.x = element_text(size = 14, color = "black", vjust = 0.5, margin = margin(t = 5, b = 5)),
axis.title.y = element_text(size = 14, color = "black", face = "bold", angle = 0, vjust = 1.01, margin = margin(r = -70)), # Rotated y-axis title
legend.title = element_text(size = 14, color = "black"),
legend.text = element_text(size = 12, color = "black"), # Customize legend labels
legend.key.size = unit(1.5, "lines")
) +
geom_text(
data = data.frame(Carrier_comparison = comparison_levels, # Add "p-value" label above p-values in each panel
label = "bolditalic('p') * bold('-trend')"), # Correct expression for bold and italic
aes(x = 1.15*max_abs_limit, y = Inf, label = label),
vjust = 1,
parse = TRUE # Use parse = TRUE to interpret the label as an expression
) +
geom_text(
aes(x = 1.15*max_abs_limit, y = Assay, label = sprintf("%.3f", QM_P_trend)), # Display QM_P_trend values
vjust = 0.5
) +
scale_y_discrete(expand = expansion(add = c(0.5, 1.3))) # Add extra space at the top and bottom of the y-axis
# Return plot
return(plot)
}
# Create function to identify 'top analytes'
proteins_list <- levels(data$Assay)
# Generate plots
CvsB_forest_plot <- forest_plot_data(proteins_list, data, main_comparison = "C")
# View plot
CvsB_forest_plot
The issue with the boldish style of the labels is due to the fact that you add the p-value labels multiple times, i.e. once for each tertile. To fix that you can use e.g. dplyr::distinct
to filter your data for unique values. Concerning the size, use the same size for your labels as for the axis text:
library(dplyr)
library(ggplot2)
library(ggforestplot)
# Create the forest plot function
forest_plot_data <- function(proteins, data, main_comparison) {
# Define the desired order for Carrier_comparison based on main_comparison
comparison_levels <- if (main_comparison == "A") {
c("AvsB", "CvsB")
} else {
c("CvsB", "AvsB")
}
# Filter data for selected proteins
data <- data %>%
filter(Assay %in% proteins)
# Ensure the Carrier_comparison is a factor with the desired level order
data$Carrier_comparison <- factor(data$Carrier_comparison, levels = comparison_levels)
# Calculate x-axis limits
max_abs_limit <- max(abs(c(data$LCI, data$UCI)), na.rm = TRUE)
x_limits <- c(-max_abs_limit, 1.3 * max_abs_limit)
# Plot
plot <- ggforestplot::forestplot(
df = data,
name = Assay, # Use combined label for y-axis
estimate = BETA,
se = SE,
pvalue = P_uncorrected,
psignif = 0.01, # fill estimate if significant at this level
ci = 0.95,
logodds = FALSE,
xlab = "Beta (95% confidence intervals)",
ylab = "Analyte",
colour = Tertile,
shape = Tertile,
position = position_dodge(width = 0.8) # Increase dodge width for better separation
) +
facet_grid(. ~ Carrier_comparison, scales = "free", space = "free") +
scale_x_continuous(limits = x_limits) +
theme(
text = element_text(family = "Arial", size = 12, color = "black"), # Change "Arial" to your preferred font
strip.text = element_text(size = 16, color = "black", hjust = 0.5, vjust = 3.5, margin = margin(t = 12, b = 10)),
axis.text.x = element_text(size = 12, color = "black"),
axis.text.y = element_text(size = 12, color = "black", margin = margin(l = 15)),
axis.title.x = element_text(size = 14, color = "black", vjust = 0.5, margin = margin(t = 5, b = 5)),
axis.title.y = element_text(size = 14, color = "black", face = "bold", angle = 0, vjust = 1.01, margin = margin(r = -70)), # Rotated y-axis title
legend.title = element_text(size = 14, color = "black"),
legend.text = element_text(size = 12, color = "black"), # Customize legend labels
legend.key.size = unit(1.5, "lines")
) +
geom_text(
data = data.frame(
Carrier_comparison = comparison_levels, # Add "p-value" label above p-values in each panel
label = "bolditalic('p') * bold('-trend')"
), # Correct expression for bold and italic
aes(x = 1.15 * max_abs_limit, y = Inf, label = label),
vjust = 1,
parse = TRUE # Use parse = TRUE to interpret the label as an expression
) +
geom_text(
data = distinct(data, Assay, Carrier_comparison, QM_P_trend),
aes(x = 1.15 * max_abs_limit, y = Assay, label = sprintf("%.3f", QM_P_trend)), # Display QM_P_trend values
vjust = 0.5, size = 12 / .pt
) +
scale_y_discrete(expand = expansion(add = c(0.5, 1.3))) # Add extra space at the top and bottom of the y-axis
# Return plot
return(plot)
}
# Generate plots
CvsB_forest_plot <- forest_plot_data(proteins_list, data, main_comparison = "C")
# View plot
CvsB_forest_plot