Search code examples
rggplot2autocorrelation

Plotting multiple ACF with ggplot


Hi I'm trying to plot multiple ACF and PACF with ggplot.

I've come a bit of the way but looping things still cause me a lot of trouble. So hope you can help me out.

I want to make four acf plots, one of each AAPL, GE, SPY and WMT.

col_names <- colnames(df)
col_names <- col_names[-c(1,2)]
for (i in col_names){
bacf <- acf(df, plot = FALSE)
bacfdf <- with(bacf, data.frame(lag, acf))
significance_level <- qnorm((1 + 0.95)/2)/sqrt(sum(!is.na(df)))+

q <- ggplot(data = bacfdf, mapping = aes(x = lag, y = acf)) +
  geom_segment(mapping = aes(xend = lag, yend = 0))+
  geom_hline(yintercept=c(significance_level,-significance_level), lty=3, color="blue") +
  ggtitle(i)+theme_minimal()
plot_list[[i]]<-q;

plot_grid(q[[1]],q[[2]],q[[3]],q[[4]])

Some of my data:

structure(list(Date = structure(c(10960, 10961, 10962, 10963, 
10966, 10967, 10968, 10969, 10970, 10974, 10975, 10976, 10977, 
10980, 10981, 10982, 10983, 10984, 10987, 10988), class = "Date"), 
    `AAPL, not cleaned` = c(-8.810021, 1.45281, -9.051401, 4.628075, 
    -1.774445, -5.25055, -6.181806, 10.40407, 3.74302, 3.425328, 
    2.48944, 6.309463, -1.948374, -4.652429, 5.493372, -1.852238, 
    -0.1725783, -7.924, 2.074379, -3.431709), AAPL = c(-8.810021, 
    1.45281, -9.051401, 4.628075, -1.774445, -5.25055, -6.181806, 
    10.40407, 3.74302, 3.425328, 2.48944, 6.309463, -1.948374, 
    -4.652429, 5.493372, -1.852238, -0.1725783, -7.924, 2.074379, 
    -3.431709), GE = c(-4.08219945, -0.17376199, 1.32681098, 
    3.7986923, -0.03966156, 0.1651528, 0.32948959, 1.14473858, 
    -1.80480699, -2.00675631, 0.48530696, -1.88697651, -1.24799082, 
    -4.25203994, 0.26750549, 2.10052733, 0.21893437, -5.62251427, 
    0, 1.48150858), SPY = c(-3.989133, 0.1787311, -1.620197, 
    5.645238, 0.3424661, -1.203798, -0.999791, 1.345214, 1.348592, 
    -0.7898116, 0.8111037, -1.542447, -0.2161228, -2.875245, 
    1.129238, -0.7957602, -0.4002674, -3.16912, 2.677718, 0.9804
    ), WMT = c(-3.813763, -2.360084, 1.391327, 7.280618, -1.841673, 
    -1.498155, -1.812554, 0.1075352, -0.9720049, 1.630053, -2.314561, 
    -1.067179, -1.494226, -5.024863, 2.904527, 1.316343, -4.642776, 
    -7.004438, -0.6916664, 6.949199)), row.names = c(NA, 20L), class = "data.frame")

Solution

  • Tried to make it as efficient as possible honoring your intent. Could use base r like lapply in some cases but I did it with tidyverse

    library(dplyr)
    library(purrr)
    library(ggplot2)
    
    glimpse(df)
    #> Rows: 20
    #> Columns: 6
    #> $ Date                <date> 2000-01-04, 2000-01-05, 2000-01-06, 2000-01-07, …
    #> $ `AAPL, not cleaned` <dbl> -8.8100210, 1.4528100, -9.0514010, 4.6280750, -1.…
    #> $ AAPL                <dbl> -8.8100210, 1.4528100, -9.0514010, 4.6280750, -1.…
    #> $ GE                  <dbl> -4.08219945, -0.17376199, 1.32681098, 3.79869230,…
    #> $ SPY                 <dbl> -3.9891330, 0.1787311, -1.6201970, 5.6452380, 0.3…
    #> $ WMT                 <dbl> -3.8137630, -2.3600840, 1.3913270, 7.2806180, -1.…
    
    # we'll go straight to building the dataframe with `map_dfr`
    
    bacdf <- map_df(df, function(ts) acf(ts, plot = FALSE)$acf)
    
    # The lags are all the same just 0 through the number of rows minus 1
    
    bacdf$lag <- 0:(nrow(bacdf) - 1)
    
    # reorder things and eliminate `Date` and unclean AAPL which is actually identical to AAPL
    
    bacdf <- bacdf %>% select(lag, everything(), -Date, -`AAPL, not cleaned`)
    
    bacdf
    #> # A tibble: 14 x 5
    #>      lag    AAPL      GE     SPY      WMT
    #>    <int>   <dbl>   <dbl>   <dbl>    <dbl>
    #>  1     0  1       1       1       1      
    #>  2     1 -0.147   0.0812 -0.197   0.0769 
    #>  3     2  0.143  -0.316   0.0197 -0.453  
    #>  4     3 -0.0940 -0.182  -0.407  -0.296  
    #>  5     4  0.312   0.326   0.239   0.158  
    #>  6     5 -0.229   0.0171 -0.0140  0.176  
    #>  7     6 -0.0727 -0.238  -0.0138 -0.00309
    #>  8     7 -0.0300 -0.198  -0.0321 -0.0287 
    #>  9     8 -0.150   0.0675 -0.135  -0.0981 
    #> 10     9 -0.238   0.101  -0.0376 -0.106  
    #> 11    10 -0.186  -0.313  -0.219  -0.0790 
    #> 12    11  0.114  -0.0322  0.285   0.115  
    #> 13    12 -0.119   0.148  -0.0444  0.202  
    #> 14    13  0.128   0.214   0.0559 -0.0870
    
    significance_level <- qnorm((1 + 0.95)/2)/sqrt(sum(!is.na(df)))
    
    myaxis <- colnames(bacdf[1])
    mynames <- colnames(bacdf[-1])
    myaxis
    #> [1] "lag"
    mynames
    #> [1] "AAPL" "GE"   "SPY"  "WMT"
    
    for (i in seq_along(mynames)) {
      print(ggplot(bacdf, aes_string(x = myaxis, y = mynames[[i]])) + 
              geom_segment(mapping = aes(xend = lag, yend = 0)) +
              geom_point() +
              geom_hline(yintercept = c(significance_level, -significance_level), lty = 3, color = "blue") +
              ggtitle(mynames[[i]]) +
              theme_minimal())
    }
    

    Just for the record your data...

    df <- structure(list(Date = structure(c(10960, 10961, 10962, 10963,
                                            10966, 10967, 10968, 10969, 10970, 10974, 10975, 10976, 10977,
                                            10980, 10981, 10982, 10983, 10984, 10987, 10988), class = "Date"),
                         `AAPL, not cleaned` = c(-8.810021, 1.45281, -9.051401, 4.628075,
                                                 -1.774445, -5.25055, -6.181806, 10.40407, 3.74302, 3.425328,
                                                 2.48944, 6.309463, -1.948374, -4.652429, 5.493372, -1.852238,
                                                 -0.1725783, -7.924, 2.074379, -3.431709), AAPL = c(-8.810021,
                                                                                                    1.45281, -9.051401, 4.628075, -1.774445, -5.25055, -6.181806,
                                                                                                    10.40407, 3.74302, 3.425328, 2.48944, 6.309463, -1.948374,
                                                                                                    -4.652429, 5.493372, -1.852238, -0.1725783, -7.924, 2.074379,
                                                                                                    -3.431709), GE = c(-4.08219945, -0.17376199, 1.32681098,
                                                                                                                       3.7986923, -0.03966156, 0.1651528, 0.32948959, 1.14473858,
                                                                                                                       -1.80480699, -2.00675631, 0.48530696, -1.88697651, -1.24799082,
                                                                                                                       -4.25203994, 0.26750549, 2.10052733, 0.21893437, -5.62251427,
                                                                                                                       0, 1.48150858), SPY = c(-3.989133, 0.1787311, -1.620197,
                                                                                                                                               5.645238, 0.3424661, -1.203798, -0.999791, 1.345214, 1.348592,
                                                                                                                                               -0.7898116, 0.8111037, -1.542447, -0.2161228, -2.875245,
                                                                                                                                               1.129238, -0.7957602, -0.4002674, -3.16912, 2.677718, 0.9804
                                                                                                                       ), WMT = c(-3.813763, -2.360084, 1.391327, 7.280618, -1.841673,
                                                                                                                                  -1.498155, -1.812554, 0.1075352, -0.9720049, 1.630053, -2.314561,
                                                                                                                                  -1.067179, -1.494226, -5.024863, 2.904527, 1.316343, -4.642776,
                                                                                                                                  -7.004438, -0.6916664, 6.949199)), row.names = c(NA, 20L), class = "data.frame")
    
    

    Created on 2020-05-13 by the reprex package (v0.3.0)