The following code creates a data frame with 3 columns.
set.seed(142222)
num_lots <- 5
# Create an empty data frame to store the simulated data
data <- data.frame(Lot = rep(1:num_lots, each = 9),
Time = rep(3 * 0:8, times = num_lots),
Measurement = numeric(num_lots * 9))
# Simulate purity data for each lot and time point
for (lot in 1:num_lots) {
# Generate random intercept and slope for each lot
intercept <- rnorm(1, mean = 95, sd = 2)
slope <- runif(1, min = -.7, max = 0)
for (month in 0:8) {
# Simulate purity data with noise
data[data$Lot == lot & data$Time == month * 3, "Purity"] <- intercept + slope * month * 3 + rnorm(1, mean = 0, sd = .35)
}
}
And then I fit a mixed-effect model to the simulated data. As follow:
ggplot(data, aes(x = Time, y = Purity, color = as.factor(Lot), shape = as.factor(Lot))) +
geom_point() +
geom_smooth(method = "lm", se=FALSE, type = 1) +
labs(
title = "Test",
x = "month",
y = "Purity",
color = "Lot", # Set legend title for color
shape = "Lot" # Set legend title for shape
) +
theme_minimal() +
scale_x_continuous(breaks = c(0, 3, 6, 9, 12, 15, 18, 21, 24))
The result looks like the following:
Question: I want to show only the 95% lower confidence bound
on the worst regression line
. How can I do that?
Worst regression line
is the line which will intersect the horizontal line == 80 earlier than other lines.
I know if I set se == TRUE
then all confidence bound for all of the lines will be shown. But I only want to have the lower confidence bound for only the worst line.
Bonus question: How can I fix the legend in such a way that only the symbols are shown (and not the line over them)?
You can plot two geom_smooth()
s - one for the 4 'good' lines and one for the 1 'worst' line, e.g.
library(tidyverse)
set.seed(142222)
num_lots <- 5
# Create an empty data frame to store the simulated data
data <- data.frame(Lot = rep(1:num_lots, each = 9),
Time = rep(3 * 0:8, times = num_lots),
Measurement = numeric(num_lots * 9))
# Simulate purity data for each lot and time point
for (lot in 1:num_lots) {
# Generate random intercept and slope for each lot
intercept <- rnorm(1, mean = 95, sd = 2)
slope <- runif(1, min = -.7, max = 0)
for (month in 0:8) {
# Simulate purity data with noise
data[data$Lot == lot & data$Time == month * 3, "Purity"] <- intercept + slope * month * 3 + rnorm(1, mean = 0, sd = .35)
}
}
ggplot(data = data,
aes(x = Time, y = Purity,
color = as.factor(Lot),
shape = as.factor(Lot))) +
geom_point(key_glyph = "point") +
geom_smooth(data = data %>% filter(Lot == 2),
method = "lm", se=TRUE, type = 1,
key_glyph = "point") +
geom_smooth(data = data %>% filter(Lot != 2),
method = "lm", se=FALSE, type = 1,
key_glyph = "point") +
labs(
title = "Test",
x = "month",
y = "Purity",
color = "Lot", # Set legend title for color
shape = "Lot" # Set legend title for shape
) +
theme_minimal() +
scale_x_continuous(breaks = c(0, 3, 6, 9, 12, 15, 18, 21, 24))
#> Warning in geom_smooth(data = data %>% filter(Lot == 2), method = "lm", :
#> Ignoring unknown parameters: `type`
#> Warning in geom_smooth(data = data %>% filter(Lot != 2), method = "lm", :
#> Ignoring unknown parameters: `type`
#> `geom_smooth()` using formula = 'y ~ x'
#> `geom_smooth()` using formula = 'y ~ x'
Created on 2023-10-12 with reprex v2.0.2
Instead of using key_glyph = "point"
, it's better to use show_legend = FALSE
per @stefan's comment:
library(tidyverse)
set.seed(142222)
num_lots <- 5
# Create an empty data frame to store the simulated data
data <- data.frame(Lot = rep(1:num_lots, each = 9),
Time = rep(3 * 0:8, times = num_lots),
Measurement = numeric(num_lots * 9))
# Simulate purity data for each lot and time point
for (lot in 1:num_lots) {
# Generate random intercept and slope for each lot
intercept <- rnorm(1, mean = 95, sd = 2)
slope <- runif(1, min = -.7, max = 0)
for (month in 0:8) {
# Simulate purity data with noise
data[data$Lot == lot & data$Time == month * 3, "Purity"] <- intercept + slope * month * 3 + rnorm(1, mean = 0, sd = .35)
}
}
ggplot(data = data,
aes(x = Time, y = Purity,
color = as.factor(Lot),
shape = as.factor(Lot))) +
geom_point() +
geom_smooth(data = data %>% filter(Lot == 2),
method = "lm", formula = "y ~ x",
se=TRUE,
show.legend = FALSE) +
geom_smooth(data = data %>% filter(Lot != 2),
method = "lm", formula = "y ~ x",
se=FALSE,
show.legend = FALSE) +
labs(
title = "Test",
x = "month",
y = "Purity",
color = "Lot", # Set legend title for color
shape = "Lot" # Set legend title for shape
) +
theme_minimal() +
scale_x_continuous(breaks = c(0, 3, 6, 9, 12, 15, 18, 21, 24))
Created on 2023-10-12 with reprex v2.0.2
You can automate the selection of the 'worst' line in different ways; the easiest way is to select the Lot with the lowest Purity at Time = 0, but this may change depending on your data (i.e. maybe you want to select the Lot with the lowest Purity at Time = 24?). You can draw the upper bounds only, but you have to calculate the coordinates yourself, e.g.
library(tidyverse)
set.seed(142222)
num_lots <- 5
# Create an empty data frame to store the simulated data
data <- data.frame(Lot = rep(1:num_lots, each = 9),
Time = rep(3 * 0:8, times = num_lots),
Measurement = numeric(num_lots * 9))
# Simulate purity data for each lot and time point
for (lot in 1:num_lots) {
# Generate random intercept and slope for each lot
intercept <- rnorm(1, mean = 95, sd = 2)
slope <- runif(1, min = -.7, max = 0)
for (month in 0:8) {
# Simulate purity data with noise
data[data$Lot == lot & data$Time == month * 3, "Purity"] <- intercept + slope * month * 3 + rnorm(1, mean = 0, sd = .35)
}
}
# Select the worst regression line
worst <- data %>% filter(Purity == min(Purity)) %>% pull(Lot)
# Build the 5 linear models
output <- data %>%
nest_by(Lot) %>%
reframe(model = list(lm(data = data, formula = Purity ~ Time)))
# Apply the models and extract the coordinates
preds <- predict(output$model[[worst]], newdata = data, se.fit = TRUE)
input_df <- data.frame(fit = preds$fit, se.fit = preds$se.fit) %>%
bind_cols(data)
# Plot data and input_df
ggplot(data = data,
aes(x = Time, y = Purity,
color = as.factor(Lot),
shape = as.factor(Lot))) +
geom_point() +
geom_smooth(method = "lm", formula = "y ~ x",
se=FALSE,
show.legend = FALSE) +
geom_ribbon(data = input_df, aes(x = Time, y = Purity,
ymin = fit, ymax = fit + se.fit * 2),
inherit.aes = FALSE, lty = 2, fill = "blue", alpha = 0.25) +
labs(
title = "Test",
x = "month",
y = "Purity",
color = "Lot", # Set legend title for color
shape = "Lot" # Set legend title for shape
) +
theme_minimal() +
scale_x_continuous(breaks = c(0, 3, 6, 9, 12, 15, 18, 21, 24))
Created on 2023-10-12 with reprex v2.0.2