I am performing a quantile regression as following:
library(quantreg) #quantile regression framework
#function to perform quantile regression with bootstrap confidence intervals
rq_bootstrap <- function(data, n_bootstrap = 1000) {
rq_model <- summary(
rq(julian_day ~ year_centered, data = data, tau = 1:99 / 100),
se = "boot",
bsmethod = "xy",
R = n_bootstrap,
level = 0.95
)
plot(rq_model, mfrow = c(1, 2))
coef_df <- as.data.frame(sapply(rq_model, function(x) c(tau = x$tau, x$coefficients[-1, ])))
return(list(model_summary = rq_model, coefficients = coef_df))
}
#apply the function
rq_results_lat1 <- rq_bootstrap(lat1)
The resulting plot is correct. However, I would like to access to the estimates values and the values of their bootstrap confidence interval, but I do not manage to.
Here is a sample of my data frame for reproductibility:
> dput(lat1[sample(nrow(lat1), 50),])
structure(list(date = structure(c(5567, 14294, 12820, 16167,
17629, 14327, 16452, 16130, 18664, 13913, 13243, 15759, 12507,
11049, 15410, 11342, 12821, 16147, 13240, 18645, 17979, 11706,
17607, 17615, 16528, 16131, 13562, 18349, 13221, 14290, 15068,
16507, 15059, 14997, 14283, 9931, 3677, 15421, 12151, 13591,
13920, 16548, 16123, 18294, 13596, 17186, 12862, 6690, 13218,
13252), class = "Date"), lat = c(57, 56, 57, 59, 58, 59, 56,
59, 56, 56, 59, 57, 56, 57, 59, 55, 56, 56, 59, 59, 59, 56, 57,
57, 58, 56, 58, 55, 57, 56, 58, 59, 58, 58, 56, 58, 58, 58, 56,
57, 57, 55, 59, 56, 56, 55, 57, 59, 59, 59), long = c(11, 12,
12, 17, 11, 15, 12, 18, 12, 16, 14, 11, 16, 11, 15, 12, 16, 16,
18, 16, 17, 12, 13, 12, 13, 12, 12, 13, 11, 16, 14, 17, 12, 14,
16, 14, 11, 17, 16, 18, 12, 14, 16, 12, 12, 12, 13, 16, 17, 15
), julian_day = c(89, 50, 37, 97, 98, 83, 17, 60, 37, 35, 95,
54, 90, 93, 71, 20, 38, 77, 92, 18, 83, 19, 76, 84, 93, 61, 49,
88, 73, 46, 94, 72, 85, 23, 39, 70, 26, 82, 99, 78, 42, 113,
53, 33, 83, 20, 79, 117, 70, 104), year = c("1985", "2009", "2005",
"2014", "2018", "2009", "2015", "2014", "2021", "2008", "2006",
"2013", "2004", "2000", "2012", "2001", "2005", "2014", "2006",
"2021", "2019", "2002", "2018", "2018", "2015", "2014", "2007",
"2020", "2006", "2009", "2011", "2015", "2011", "2011", "2009",
"1997", "1980", "2012", "2003", "2007", "2008", "2015", "2014",
"2020", "2007", "2017", "2005", "1988", "2006", "2006"), decade = c("1980-1989",
"2000-2009", "2000-2009", "2010-2019", "2010-2019", "2000-2009",
"2010-2019", "2010-2019", "2020-2029", "2000-2009", "2000-2009",
"2010-2019", "2000-2009", "2000-2009", "2010-2019", "2000-2009",
"2000-2009", "2010-2019", "2000-2009", "2020-2029", "2010-2019",
"2000-2009", "2010-2019", "2010-2019", "2010-2019", "2010-2019",
"2000-2009", "2020-2029", "2000-2009", "2000-2009", "2010-2019",
"2010-2019", "2010-2019", "2010-2019", "2000-2009", "1990-1999",
"1980-1989", "2010-2019", "2000-2009", "2000-2009", "2000-2009",
"2010-2019", "2010-2019", "2020-2029", "2000-2009", "2010-2019",
"2000-2009", "1980-1989", "2000-2009", "2000-2009"), time = c(13L,
15L, 15L, 16L, 16L, 15L, 16L, 16L, 17L, 15L, 15L, 16L, 15L, 15L,
16L, 15L, 15L, 16L, 15L, 17L, 16L, 15L, 16L, 16L, 16L, 16L, 15L,
17L, 15L, 15L, 16L, 16L, 16L, 16L, 15L, 14L, 13L, 16L, 15L, 15L,
15L, 16L, 16L, 17L, 15L, 16L, 15L, 13L, 15L, 15L), lat_grouped = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1), year_centered = structure(c(85, 109, 105,
114, 118, 109, 115, 114, 121, 108, 106, 113, 104, 100, 112, 101,
105, 114, 106, 121, 119, 102, 118, 118, 115, 114, 107, 120, 106,
109, 111, 115, 111, 111, 109, 97, 80, 112, 103, 107, 108, 115,
114, 120, 107, 117, 105, 88, 106, 106), class = "AsIs")), row.names = c(3378L,
21344L, 11753L, 33172L, 42002L, 22010L, 33453L, 31852L, 49389L,
17982L, 15167L, 29657L, 11059L, 7655L, 27224L, 7730L, 11784L,
32491L, 14955L, 48899L, 44025L, 8367L, 40663L, 41022L, 35232L,
31932L, 16540L, 47318L, 13830L, 21255L, 26321L, 34488L, 25776L,
25064L, 21134L, 6545L, 2085L, 27476L, 9846L, 17134L, 18082L,
35808L, 31695L, 45590L, 17310L, 37493L, 12220L, 4371L, 13731L,
15875L), class = "data.frame")
> head(lat1)
date lat long julian_day year decade time lat_grouped year_centered
592 1970-01-18 57 12 18 1970 1970-1979 12 1 70
593 1970-01-18 57 18 18 1970 1970-1979 12 1 70
594 1970-01-18 56 12 18 1970 1970-1979 12 1 70
595 1970-01-18 57 18 18 1970 1970-1979 12 1 70
596 1970-01-19 56 16 19 1970 1970-1979 12 1 70
597 1970-01-23 56 12 23 1970 1970-1979 12 1 70
I would be glad if someone knows how to extract the researched values.
Your data are already there in the output, but you probably need to reshape it into a format with estimates and upper / lower confidence intervals. The confidence intervals can be calculated, since they are simply the fit +/- 1.96 standard errors.
The manipulation is probably a bit easier using tidyverse functions:
library(quantreg)
library(tidyverse)
rq_bootstrap <- function(data, n_bootstrap = 1000) {
suppressWarnings(
rq(julian_day ~ year_centered, data = data, tau = 1:99 / 100)
) %>%
summary(se = "boot", bsmethod = "xy", R = n_bootstrap, level = 0.95) %>%
map_df(function(x) {
coef(x) |>
as.data.frame() |>
cbind(variable = rownames(coef(x)), tau = x$tau)
}) %>%
bind_rows() %>%
mutate(lower = Value - `Std. Error` * 1.96,
upper = Value + `Std. Error` * 1.96) %>%
select(variable, tau, Value, lower, upper) %>%
as_tibble()
}
Now if we run our function, we get a nicer result:
rq_results_lat1 <- rq_bootstrap(lat1)
rq_results_lat1
#> # A tibble: 198 x 5
#> variable tau Value lower upper
#> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 (Intercept) 0.01 34.7 -76.2 146.
#> 2 year_centered 0.01 -0.154 -1.14 0.836
#> 3 (Intercept) 0.02 34.7 -54.6 124.
#> 4 year_centered 0.02 -0.154 -0.949 0.641
#> 5 (Intercept) 0.03 34.7 -81.1 150.
#> 6 year_centered 0.03 -0.154 -1.19 0.885
#> 7 (Intercept) 0.04 24.4 -92.0 141.
#> 8 year_centered 0.04 -0.0526 -1.09 0.985
#> 9 (Intercept) 0.05 30.1 -94.5 155.
#> 10 year_centered 0.05 -0.100 -1.22 1.02
#> # ... with 188 more rows
And to show this is the expected output, let's plot this using ggplot
ggplot(rq_results_lat1, aes(tau, Value)) +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.3) +
ggh4x::geom_pointpath(size = 1) +
facet_wrap(.~variable, scale = 'free_y') +
theme_classic(base_size = 16) +
theme(strip.background = element_blank(),
panel.border = element_rect(fill = NA),
strip.text = element_text(size = 14, face = "bold"),
panel.spacing = unit(1, "cm"))
This is identical to the base R plot obtained with plot(rq_model)
, though when you plot the summary you need to specify the confidence level. We can check the output of the above code is correct by doing:
plot(rq_model, mfrow = c(1, 2), level = 0.95)
Created on 2023-08-03 with reprex v2.0.2