Search code examples
rggplot2colorsggmap

How to apply discrete colour scale to R stat_summary_2d()?


I am trying to apply a discrete colour scale to tiles created by the stat_summary_2d function in R. These tiles are plotted on top of a map created by ggmap and each tile represents the mean of all points that fall inside the tile's boundaries (more specifically, pollution values). Below is an image of what I have achieved so far (I cannot include the image directly due to my low reputation at the moment):

Current attempt at map overlay using stat_summary_2d() in R

As you can see, it is almost there. However, there are 2 problems:

  1. the discrete colours are not correct as I am using a gradient, and
  2. the legend does not show all of the categories.

I know using a gradient does not give me the discrete breaks I want, but it's the best I've managed so far. In addition, the colours I want are based on the UK's Air Quality Index (AQI) scale. The breaks, colours and labels for these categories are all in the code below, in case you cannot see the attached image.

AQI categories

Below is the code I have written so far:

library(ggmap)
library(ggplot2)

# Bounding box for creating map
bbox <- make_bbox(longitude, latitude, pmData[,], f = 0.005)
m <- get_map(bbox, source = "stamen", zoom = 15)

# Plot map
ggmap(m) +
  stat_summary_2d(geom = "tile", bins = 75, fun = mean, 
                  aes(x = longitude, y = latitude, z = pm_raw),
                  data = pmData) +
  xlab("Longitude") + 
  ylab("Latitude") +
  guides(fill=guide_legend(keywidth=0.05, keyheight=0.05, default.unit="inch")) +
  scale_fill_gradientn(breaks=c(0, 17, 34, 51, 59, 67, 76, 84, 92, 100, Inf),
                       minor_breaks = NULL,
                       guide = "legend",
                       colours = c("palegreen", "green", "green3", "yellow", "yellow3", 
                                   "orange", "salmon", "red", "red4", "purple"),
                       labels = c("Low", "Low", "Low", "Moderate", "Moderate", "Moderate", 
                                  "High", "High", "High", "Very high", "Very high"))

Since each tile's value is a mean, using a cut to create the discrete categories does not work as it leads to a Continuous value supplied to discrete scale error.

Let me know if you need example data and I can have a go at posting some.

Thank you.

structure(list(latitude = c(51.383833333, 51.386423333, 51.387305, 
51.388133333, 51.383815, 51.378265, 51.38614, 51.390151667, 51.390126667, 
51.380048333, 51.389838333, 51.389863333, 51.386211667, 51.381303333, 
51.378036667, 51.387336667, 51.378036667, 51.388873333, 51.387988333, 
51.38419, 51.383691667, 51.384888333, 51.377866667, 51.384748333, 
51.384993333, 51.377456667, 51.389825, 51.378485, 51.377715, 
51.386725, 51.386111667, 51.388356667, 51.383935, 51.383833333, 
51.378548333, 51.379861667, 51.386688333, 51.388008333, 51.387793333, 
51.383365, 51.38366, 51.387165, 51.384871667, 51.378063333, 51.378378333, 
51.377955, 51.37791, 51.388115, 51.384763333, 51.383471667, 51.380998333, 
51.37841, 51.380526667, 51.380526667, 51.381601667, 51.378836667, 
51.37867, 51.382835, 51.38553, 51.38784, 51.386893333, 51.385325, 
51.377981667, 51.383798333, 51.378041667, 51.382858333, 51.38115, 
51.3812, 51.383555, 51.38834, 51.384345, 51.381181667, 51.377565, 
51.379041667, 51.384736667, 51.384828333, 51.38389, 51.382735, 
51.38527, 51.385566667, 51.380448333, 51.378585, 51.387068333, 
51.379693333, 51.379535, 51.381738333, 51.386378333, 51.378341667, 
51.385281667, 51.385115, 51.37791, 51.378043333, 51.377256667, 
51.381213333, 51.386808333, 51.380298333, 51.37851, 51.382393333, 
51.381196667, 51.377373333, 51.389178333, 51.385386667, 51.385038333, 
51.382931667, 51.389025, 51.384655, 51.38477, 51.378701667, 51.377633333, 
51.386478333, 51.383513333, 51.384828333, 51.380108333, 51.382911667, 
51.377868333, 51.377931667, 51.382061667, 51.382773333, 51.377656667, 
51.384795, 51.377878333, 51.377436667, 51.377338333, 51.382755, 
51.389243333, 51.386008333, 51.383578333, 51.381366667, 51.385426667, 
51.383373333, 51.38756, 51.385405, 51.384733333, 51.382111667, 
51.377853333, 51.377975, 51.385383333, 51.3777, 51.383573333, 
51.38748, 51.384433333, 51.389521667, 51.384586667, 51.388643333, 
51.388745, 51.389503333, 51.384065, 51.385931667, 51.3902, 51.3902, 
51.377628333, 51.377165, 51.388163333, 51.38431, 51.381613333
), longitude = c(-2.363555, -2.351465, -2.352908333, -2.354518333, 
-2.363075, -2.357106667, -2.36021, -2.356771667, -2.356616667, 
-2.357106667, -2.356616667, -2.357891667, -2.360146667, -2.374723333, 
-2.361418333, -2.359806667, -2.35918, -2.35918, -2.359898333, 
-2.371886667, -2.36423, -2.353118333, -2.357285, -2.353675, -2.361558333, 
-2.36048, -2.356543333, -2.365641667, -2.362033333, -2.351465, 
-2.351508333, -2.354675, -2.363166667, -2.381021667, -2.366575, 
-2.371295, -2.351558333, -2.371886667, -2.376568333, -2.367488333, 
-2.36423, -2.359898333, -2.38118, -2.36133, -2.359806667, -2.35902, 
-2.35746, -2.35464, -2.362713333, -2.381021667, -2.357285, -2.359898333, 
-2.357106667, -2.357106667, -2.377308333, -2.361121667, -2.367248333, 
-2.3812, -2.377001667, -2.354101667, -2.359915, -2.376568333, 
-2.363448333, -2.363593333, -2.357106667, -2.358393333, -2.372698333, 
-2.372431667, -2.364931667, -2.359806667, -2.354518333, -2.357375, 
-2.361121667, -2.363593333, -2.381628333, -2.37422, -2.363448333, 
-2.358081667, -2.352161667, -2.37829, -2.372305, -2.360146667, 
-2.352568333, -2.357285, -2.360146667, -2.35762, -2.373406667, 
-2.36133, -2.37422, -2.36133, -2.357845, -2.359806667, -2.361418333, 
-2.357375, -2.359806667, -2.370346667, -2.365983333, -2.379583333, 
-2.357436667, -2.36013, -2.355635, -2.380135, -2.381021667, -2.388205, 
-2.355141667, -2.371321667, -2.372501667, -2.368555, -2.363548333, 
-2.351465, -2.364931667, -2.362295, -2.37045, -2.38118, -2.357533333, 
-2.36389, -2.38118, -2.358116667, -2.361878333, -2.363166667, 
-2.357891667, -2.36048, -2.361558333, -2.358843333, -2.358843333, 
-2.36021, -2.368918333, -2.377361667, -2.376808333, -2.365816667, 
-2.353393333, -2.377001667, -2.363166667, -2.380608333, -2.357106667, 
-2.356938333, -2.376808333, -2.362295, -2.369111667, -2.35344, 
-2.354408333, -2.35856, -2.363448333, -2.359675, -2.35517, -2.35856, 
-2.38118, -2.36021, -2.356781667, -2.356781667, -2.36048, -2.36133, 
-2.354521667, -2.372698333, -2.377001667), pm_raw = c(20.2856490204, 
77.8877903749, 71.7933099292, 98.9227746379, 13.168840034, 25.1495305964, 
18.5911875492, 11.5532213795, 11.5532213795, 34.6047765217, 34.2986999054, 
38.1823784449, 60.2354155746, 19.6998286117, 24.3481168666, 18.4068794706, 
92.4024502724, 20.8700066963, 53.7655811403, 26.56884172, 25.9563524058, 
41.8398168697, 21.204229175, 18.4985229695, 69.4438433391, 35.0603153227, 
52.5171448444, 37.6010327173, 11.3101753017, 50.9286177028, 17.8909927779, 
16.4572391933, 39.9021155572, 8.86286052724, 23.9431598443, 39.7689915216, 
30.8634678742, 18.1143232738, 18.3941479353, 9.46628056132, 14.412527522, 
27.7423289978, 10.7671408538, 15.9341080291, 20.6349163328, 15.4269963603, 
5.83588000809, 16.1941855148, 68.6352334409, 14.4015212512, 102.822361444, 
26.5387481, 60.5308607077, 6.97155968363, 234.155664235, 26.7571613942, 
12.5228906193, 9.15531699502, 28.963860631, 11.1517290682, 76.4919317928, 
11.5337240599, 16.9191620121, 14.9634160546, 7.48538606606, 7.36565315293, 
6.58305090626, 2.88802757122, 4.80066803954, 14.3716849029, 21.3111785588, 
16.1417055686, 8.91166650308, 14.6912815119, 13.6515088126, 34.6758160409, 
107.411084295, 8.34201752814, 43.5562737658, 13.1949460218, 22.5121677682, 
7.94588103399, 13.7452057672, 16.4227477955, 24.6492704894, 27.7137110021, 
14.1559453375, 32.1195214536, 22.1462601372, 29.8239718823, 60.8699820745, 
71.4253002227, 35.8562060293, 82.0146924171, 38.3981186095, 15.3572590305, 
8.77474869292, 41.4601384099, 19.6070387258, 41.9414449, 29.3322623223, 
120.145146039, 72.294045077, 21.6956980653, 9.83379481087, 9.61218271598, 
6.18969929985, 15.3210540005, 48.9376058474, 56.9003135671, 28.1088565396, 
6.59098222239, 22.6884614987, 52.7084659288, 38.4823403648, 58.7704971854, 
19.9000537492, 0.000966813590044, 0.000966813590044, 26.0864635077, 
36.8963060363, 39.9757260957, 42.790215098, 79.8085095756, 47.0276739845, 
31.4995803181, 133.024483283, 31.2866736476, 92.7349765429, 11.1185166794, 
31.1820240423, 8.08385762269, 48.4851751962, 63.9069236938, 27.1244461233, 
60.3187940712, 20.5374151595, 28.4407989289, 65.8097519874, 91.1719973116, 
42.3963928042, 18.0173416637, 85.6945673673, 20.9880757421, 53.375218596, 
26.4525339035, 47.8580307084, 23.9389236402, 4.46424583036, 12.5901938763, 
17.1264828579, 25.9273498878, 92.1563825857, 11.1875766427, 28.5205111082
)), row.names = c(NA, 155L), class = "data.frame")

Solution

  • See if the following works for you. Explanations annotated in the code:

    # define the cut points (I changed 100 to 101)
    fill.breaks <- c(0, 17, 34, 51, 59, 67, 76, 84, 92, 101, Inf)
    
    # obtain the correct factor names output by this specific set of cutpoints
    # (less error-prone than typing them out manually)
    fill.names <- levels(cut(seq(0, 120), breaks = fill.breaks, right = FALSE))
    
    # define the labels & colours associated with the cutpoints
    fill.labels <- c(rep(c("Low", "Moderate", "High"), each = 3), "Very High")
    fill.colors <- c("palegreen", "green", "green3", "yellow", "yellow3", 
                     "orange", "salmon", "red", "red4", "purple")
    names(fill.labels) <- fill.names
    names(fill.colors) <- fill.names
    
    # define the function in stat_summary_2d to return the results of mean as
    # appropriately cut factor levels, & use scale_fill_manual to match each
    # level to its corresponding color / label
    ggplot(pmData,
           aes(x = longitude, y = latitude, z = pm_raw)) +
      stat_summary_2d(geom = "tile", bins = 75,
                      fun = function(x) cut(mean(x), 
                                            breaks = fill.breaks,
                                            right = FALSE)) +
      scale_fill_manual(values = fill.colors,
                        labels = fill.labels) +
      guides(fill=guide_legend(keywidth=0.05, keyheight=0.05, default.unit="inch"))
    

    plot