Search code examples
rlistplotyaxis

How to dynamically adjust ylim in base R plots?


I have a list of data (see below), which I represent graphically using this script:

# Define plotting parameters
pch_values <- c(16, 17)  # Different symbols for beta_0 and beta_1
colors <- c("red", "blue", "gray60")  # Colors for different noise types
lty_values <- c(1, 2, 3)  # Line types for different noise types
quartile_titles <- c("Absolute Bias of First Quartile", "Absolute Bias of Second Quartile (Median)", "Absolute Bias of Third Quartile")

# Directory for saving plots
output_dir <- "Abs_Quartile_Bias_Plots"
if (!dir.exists(output_dir)) dir.create(output_dir)

# Compute unified y-axis limits for all quartile plots in test_data
y_values_all <- unlist(sapply(test_data, function(noise_data) {
  sapply(noise_data, function(T_data) T_data)
}), use.names = FALSE)

ylim_min <- min(y_values_all, na.rm = TRUE)
ylim_max <- max(y_values_all, na.rm = TRUE)
margin <- 0.05 * (ylim_max - ylim_min)  # Add 5% margin above and below
unified_ylim <- c(ylim_min - margin, ylim_max + margin)

# Set output file
png(file.path(output_dir, "Abs_Quartile_Bias_Case1.png"), 
    width = 1200, height = 400, res = 120)
par(mfrow = c(1, 3), mar = c(4, 4, 2, 1))  # 3 side-by-side subplots

# Plot each absolute quartile bias
for (quartile_idx in 1:3) {
  # Start a blank plot with unified y-axis limits
  plot(NULL, log = "x", xlim = range(sample_sizes), ylim = unified_ylim, 
       xlab = "Sample Size (T)", 
       ylab = quartile_titles[quartile_idx],
       main = quartile_titles[quartile_idx])
  
  # Add lines and points for beta_0 and beta_1 for each noise type
  for (noise_idx in seq_along(noise_types)) {
    noise <- noise_types[noise_idx]
    for (coef_name_idx in seq_along(pch_values)) {
      coef_name <- c("beta_0", "beta_1")[coef_name_idx]  # Explicitly specify coefficient names
      
      y_values <- sapply(test_data[[noise]][[coef_name]], function(T_data) T_data[[quartile_idx]])
      
      # Plot lines
      lines(sample_sizes, y_values, col = colors[noise_idx], lty = lty_values[noise_idx], lwd = 1)
      # Plot points
      points(sample_sizes, y_values, col = colors[noise_idx], pch = pch_values[coef_name_idx])
    }
  }
  
  # Add legends
  if (quartile_idx == 1) {  # Only add legends to the first plot
    legend("topright", 
           legend = c(expression(beta[0]), expression(beta[1])),
           pch = pch_values, 
           col = "black", bty = "n")
    legend("topright", 
           legend = str_to_title(noise_types), 
           col = colors, 
           lty = lty_values, 
           lwd = 1, bty = "n", inset = c(.18, 0))
  }
}

dev.off()

Which yields the following figure:

enter image description here

The problem as you can see, is that ylim is not adjusted to each subplot, which creates plots where the upper end of ylim is far from the highest value of the lines inside the subplot.

Question:

Can someone help me modify the code such that ylim is dynamically adjusted to the range of the data at hand?

I apologize of my data is not optimally arranged.

Here is the dput() output of the data in question:

> dput(test_data)
list(gaussian = list(beta_0 = list(`50` = c(`25%` = 0.243594614710333, 
`50%` = 0.00277753736411457, `75%` = 0.245669686373254), `100` = c(`25%` = 0.171918499273751, 
`50%` = 9.39373213166839e-05, `75%` = 0.183152746837558), `200` = c(`25%` = 0.116881126444175, 
`50%` = 0.00117104968639603, `75%` = 0.123610284184898), `500` = c(`25%` = 0.0804521896728776, 
`50%` = 0.00103027837221392, `75%` = 0.0784101295289328), `1000` = c(`25%` = 0.0541010700519603, 
`50%` = 0.00294703982537503, `75%` = 0.0531978110356446), `2000` = c(`25%` = 0.0377543581178926, 
`50%` = 0.00333965713516848, `75%` = 0.0413079011502395), `5000` = c(`25%` = 0.0217425538489762, 
`50%` = 0.000460109571013723, `75%` = 0.0257457441062152)), beta_1 = list(
    `50` = c(`25%` = 0.00882506794511806, `50%` = 0.000371504996548033, 
    `75%` = 0.00765082797750871), `100` = c(`25%` = 0.00290263844961158, 
    `50%` = 6.82498075157412e-05, `75%` = 0.00286586470881645
    ), `200` = c(`25%` = 0.00109095625190614, `50%` = 5.22460831402505e-05, 
    `75%` = 0.00109448558611547), `500` = c(`25%` = 0.00026571655332952, 
    `50%` = 8.85613155698906e-07, `75%` = 0.000259202833770455
    ), `1000` = c(`25%` = 9.21700578033757e-05, `50%` = 7.24859772205377e-07, 
    `75%` = 9.59841859602406e-05), `2000` = c(`25%` = 3.41775667214161e-05, 
    `50%` = 3.30163678108342e-06, `75%` = 2.86060854759462e-05
    ), `5000` = c(`25%` = 8.28928386886751e-06, `50%` = 4.18267251500737e-07, 
    `75%` = 8.43229566438453e-06))), laplace = list(beta_0 = list(
    `50` = c(`25%` = 0.198717632408904, `50%` = 0.0081310174987681, 
    `75%` = 0.210213471331161), `100` = c(`25%` = 0.130486991150159, 
    `50%` = 0.00513011870745728, `75%` = 0.151674765218005), 
    `200` = c(`25%` = 0.111212423845099, `50%` = 0.00734704246964579, 
    `75%` = 0.0981520122896047), `500` = c(`25%` = 0.0630465314163474, 
    `50%` = 0.00897075972839323, `75%` = 0.0683817884348215), 
    `1000` = c(`25%` = 0.042034483419907, `50%` = 0.00132855758101336, 
    `75%` = 0.0425837417269435), `2000` = c(`25%` = 0.0339129308142302, 
    `50%` = 0.00345531227818663, `75%` = 0.0280143719137393), 
    `5000` = c(`25%` = 0.0198791807771237, `50%` = 0.000479565883414246, 
    `75%` = 0.0194025102135911)), beta_1 = list(`50` = c(`25%` = 0.00740494352386101, 
`50%` = 0.00058286479518399, `75%` = 0.00741009508728219), `100` = c(`25%` = 0.00244184197921071, 
`50%` = 0.000134746791191187, `75%` = 0.00244511866997321), `200` = c(`25%` = 0.000866632964359182, 
`50%` = 8.7678493771115e-05, `75%` = 0.0010573916668597), `500` = c(`25%` = 0.000235006834866658, 
`50%` = 9.75544170911391e-06, `75%` = 0.000212441344117131), 
    `1000` = c(`25%` = 8.00301522252411e-05, `50%` = 1.07320472242378e-06, 
    `75%` = 7.29608951477445e-05), `2000` = c(`25%` = 2.54472054908028e-05, 
    `50%` = 1.46816056423305e-06, `75%` = 2.84447257516973e-05
    ), `5000` = c(`25%` = 6.16220803828504e-06, `50%` = 1.48241995123755e-07, 
    `75%` = 6.65929311072233e-06))), cauchy = list(beta_0 = list(
    `50` = c(`25%` = 0.274524411682779, `50%` = 0.0361625726093231, 
    `75%` = 0.336776027564404), `100` = c(`25%` = 0.198215929997078, 
    `50%` = 0.00328917426856501, `75%` = 0.207109168934052), 
    `200` = c(`25%` = 0.155318692086412, `50%` = 0.00672679937945397, 
    `75%` = 0.154703229084234), `500` = c(`25%` = 0.0880029220252172, 
    `50%` = 0.00278228951379011, `75%` = 0.0884479446139164), 
    `1000` = c(`25%` = 0.0688536372229773, `50%` = 0.00305819621170977, 
    `75%` = 0.0644673760777885), `2000` = c(`25%` = 0.0507696330400698, 
    `50%` = 0.000188404592404767, `75%` = 0.0500372607497894), 
    `5000` = c(`25%` = 0.0310586078673812, `50%` = 0.0017270922827648, 
    `75%` = 0.0322962294132709)), beta_1 = list(`50` = c(`25%` = 0.0120626033466085, 
`50%` = 0.0017021299049178, `75%` = 0.00978118814587825), `100` = c(`25%` = 0.00338466332476295, 
`50%` = 7.46446842248005e-06, `75%` = 0.0037767462745677), `200` = c(`25%` = 0.00125677406659053, 
`50%` = 4.41521154233016e-05, `75%` = 0.00133801259948108), `500` = c(`25%` = 0.000315465752873667, 
`50%` = 1.30850267132665e-05, `75%` = 0.0003114951811094), `1000` = c(`25%` = 0.000119877019459924, 
`50%` = 5.46893714759022e-07, `75%` = 0.000117584995672271), 
    `2000` = c(`25%` = 4.14058369635484e-05, `50%` = 9.97860400531181e-07, 
    `75%` = 4.36402038799244e-05), `5000` = c(`25%` = 9.47781604621056e-06, 
    `50%` = 9.22661139490799e-07, `75%` = 1.12907100797699e-05
    ))))

Solution

  • Apparently you have simulation data in matrix or matrix-convertible format. In this case, check out matplot which is designed for matrices. Use type='o' to get both lines and points.

    png('foo.png', 800, 240)
    op <- par(mfrow=c(1, 3), mar=c(4, 4, 2, 2)+.1)
    for (i in 1:3) {
      sim [[i]] |> 
        matplot(type='o', pch=rep(16:17, each=3), col=rep(colors, 2), 
                lty=rep(lty_values, 2), xlab='Sample Size (T)', 
                ylab=quartile_titles[i], xaxt='n', ylim=range(sim[[i]])*1.05)
      axis(1, 1:7, sample_sizes)
      if (identical(i, 1L)) {
        legend("topright", 
               legend=c(as.list(noise_types), sapply(0:1, \(x) bquote(beta[.(x)]))),
               pch=c(rep(NA, length(noise_types)), 16:17), 
               lty=c(lty_values, rep(NA, 2)), 
               col=c(colors, 1, 1), ncol=2, inset=c(-.1, 0),
               bty="n")
      }
    }
    par(op)
    dev.off()
    

    enter image description here

    If you really want the redundant titles, use main as usual.


    Data:

    set.seed(42)
    sim <- outer((1:6)^2, c(.8, .5, 1), 
                 Vectorize(\(z, f) {
                   (0.35*exp(-0.5*(1:7 - 1)) + rnorm(7, mean=0, sd=0.01))/z*f
                 }, 
                 SIMPLIFY=FALSE)) |> 
      apply(2, \(x) do.call('rbind', x), simplify=FALSE)
    noise_types <- c('Gaussian', 'Laplace', 'Cauchy')
    pch_values <- 16:17
    colors <- c("gray60", "red", "blue")
    lty_values <- c(3, 1, 2)
    quartile_titles <- c("Absolute Bias of First Quartile", 
                         "Absolute Bias of Second Quartile (Median)", 
                         "Absolute Bias of Third Quartile")
    sample_sizes <- c(50, 100, 200, 500, 1000, 2000, 5000)