I'm trying to loop through a subset of columns of the data frame btest
(data below) and plot some data. I still do not quite understand the tidyeval system, though it seems this is not fully implemented in ggplot2 yet anyway?
I can do this using the code below:
bcells <- LETTERS[1:9]
lapply(bcells, function(cell)
ggplot(data = btest) +
geom_point(aes_string(x = 'response',
y = cell,
color = 'response')))
However, the documentation for aes_string
and aes_
states that aes_
is preferable.
aes_string
andaes_
are particularly useful when writing functions that create plots because you can use strings or quoted names/calls to define the aesthetic mappings, rather than having to use substitute to generate a call toaes()
.I recommend using
aes_()
, because creating the equivalents ofaes(colour = "my colour")
oraes{x = `X$1`}
withaes_string()
is quite clunky.
MY QUESTION: I do not understand why aes_ is preferable, or how I would implement the same code using aes_
instead of aes_string
here.
My data is below:
structure(list(A = c(0.982753626864792, 0.490259710510256, 0.454306062926931,
0.443880090600994, 0.992704372174903, 0.831160693384458, 0.740981059382211,
0.971324123908582, 0.589614636646592, 0.663016559532728, 0.0442355006911685,
0.758388810061299, 0.39969185826509, 0.96343122781417, 0.578266180477106,
0.935289565081073, 0.954414616390872, 0.476708144579943, 0.906912570353835,
0.860767420084079, 0.878055964477507, 0.717065411183562, 0.626679994549329,
0.00471648517899614), B = c(0, 0.0359489937221843, 0.0455022610469154,
0.0245991717594771, 0, 0.0150049906282552, 0.0357444181630181,
0.00409885247542489, 0.0153491030612378, 0.0116215258999156,
0.0971266806096337, 0.00523783384210994, 0.000104337240641022,
0.000475801870965878, 0.0388763858222573, 0, 0.00947456311935685,
0.091028293882243, 0, 0.00114639793715674, 0.01709111810953,
0.00642564686487351, 0.0122005596623948, 0.0735538728126912),
C = c(0.00396910361917928, 0.3726373775819, 0.389150409858995,
0.236004149077653, 0.00654522233764124, 0.0757762646142197,
0.064467293054645, 0.021714367242937, 0.0510773710590119,
0.279742249706993, 0.228435750343793, 0.0163968987432784,
0.0386501968648076, 0.0147706021018908, 0.117796354856799,
0.0262705486829676, 0.0116437209145554, 0.249652632820836,
0.0708357724821996, 0.115182751748712, 0.0485081551895102,
0.0430306406326062, 0.0185687667917195, 0.062321917083855
), D = c(0, 0, 0.00728600019514972, 0.00320524248329104,
0, 0.0063037888029564, 0.00654538187729239, 0, 0.0176038859003177,
0, 0.0181870750390433, 0.00152581718814669, 0.00977725964480791,
0, 0, 0.00447626637015039, 0, 0, 0, 0, 0, 0.0453332704320773,
0, 0), E = c(0, 0, 0, 0.000475498116547242, 0.000291416366767824,
0, 0, 0.000108185143509404, 0, 0, 0.000585484789620521, 0,
0.00119989502426795, 0.000562924764494004, 0, 0, 0.000232527879948303,
6.96708420418182e-05, 0.000472096790474276, 0.000545274075130702,
0.000572161953294472, 0, 0, 0.0111234621378363), F = c(0.0113619316667346,
0.0761221446319925, 0.0940043097282167, 0.181463421237771,
0.00045898912068803, 0.0379484560273567, 0.130661228056559,
0.00273248163097645, 0.27374951093064, 0.0456196648603633,
0.311899809955928, 0.200378764906006, 0.483217874497928,
0.0162868512293491, 0.187555044444225, 0.0336927109381938,
0.0179346325967824, 0.141906152617276, 0.0167439810037839,
0.0137012129908311, 0.0297632632518369, 0.170891255992311,
0.210301640776889, 0.0808642159093989), G = c(0.00140289433926378,
0.00867420181911554, 0, 0.0103474797609997, 0, 0.0217237781037489,
0, 2.19895985703425e-05, 0, 0, 0.113543191682212, 0.00442851495302812,
0, 0, 0.0176396645397039, 0.000270908927614937, 0.00629993909848545,
0.0406351052576609, 0.00503557936970754, 0.00865694316409033,
0.0191377054890488, 0, 0.0115149714931613, 0.243819008858108
), H = c(0, 0.00293624962565618, 0, 0.0109784338152019, 0,
0, 0, 0, 0.0368789010169724, 0, 0, 0.0104630777433289, 0.0618086409502145,
0, 0.0252902404777352, 0, 0, 0, 0, 0, 0, 0.0101281816458403,
0.051238858176748, 0), I = c(0.000512443510029838, 0.0134213221088962,
0.00975095624379213, 0.0890465131480651, 0, 0.0120820284390054,
0.0216006194662742, 0, 0.0157265913852278, 0, 0.185986506888601,
0.00318028256280335, 0.00554993751224351, 0.00447259221913014,
0.0345761293821743, 0, 0, 0, 0, 0, 0.00687163152927302, 0.00712559324872945,
0.0694952085497587, 0.523601038019114), `P-value` = c(9999,
9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999,
9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999, 9999,
9999, 9999, 9999), Correlation = c(0.787379117728473, 0.713767273835577,
0.432941448432532, 0.654688521787571, 0.690623129562749,
0.72269025999843, 0.535092134674879, 0.795288368310815, 0.754840745986047,
0.0872468087627683, 0.760738916041899, 0.875990453791969,
0.878637700077733, 0.851326230903871, 0.458259685017224,
0.815125101981778, 0.299231595131615, 0.613359452217542,
0.424264050686203, 0.691764490900993, 0.806704730396525,
0.602426815978143, 0.786361339790331, 0.871574807143838),
RMSE = c(0.698736121897212, 0.75020398425833, 0.901716663988092,
0.763690363629575, 0.758968447930353, 0.757909848657902,
0.8482750320726, 0.695776594753745, 0.668395739137566, 1.04995120161959,
0.654740332409367, 0.590052129521314, 0.484783647407576,
0.659059332792332, 0.890274043213301, 0.687194392331628,
0.966871968720401, 0.807189528281839, 0.906250907041538,
0.770762860306121, 0.704446496934398, 0.805908330153981,
0.652426738364919, 0.490900219800415), sample_id = c("x6494",
"x1867", "x5038", "x5118", "x4631", "x6126", "x2051", "x0346",
"x2056", "x4949", "x5784", "x7357", "x1509", "x9449", "x0167",
"x9521", "x1494", "x7623", "x9705", "x4810", "x3549", "x6336",
"x9699", "x8727"), patient_id = c("x6494", "x1867", "x5038",
"x5118", "x4631", "x6126", "x2051", "x0346", "x2056", "x4949",
"x5784", "x7357", "x1509", "x9449", "x0167", "x9521", "x1494",
"x7623", "x9705", "x4810", "x3549", "x6336", "x9699", "x8727"
), treated = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE), timing = c("post",
"pre", "post", "post", "post", "pre", "pre", "post", "pre",
"post", "pre", "post", "post", "post", "pre", "post", "pre",
"post", "post", "post", "pre", "post", "post", "pre"), response = c("nonresp",
"nonresp", "nonresp", "nonresp", "nonresp", "resp", "nonresp",
"nonresp", "nonresp", "nonresp", "nonresp", "nonresp", "resp",
"nonresp", "nonresp", "nonresp", "nonresp", "resp", "nonresp",
"nonresp", "nonresp", "nonresp", "resp", "nonresp"), dataset = c("sny",
"sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny",
"sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny", "sny",
"sny", "sny", "sny", "sny", "sny"), OS_status = c(1, 1, 1,
1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0,
0, 1), OS_time = c(36.5, 78.2142857142857, 62.5714285714286,
140.785714285714, 26.0714285714286, 99.0714285714286, 41.7142857142857,
151.214285714286, 203.357142857143, 36.5, 26.0714285714286,
104.285714285714, 234.642857142857, 31.2857142857143, 140.785714285714,
140.785714285714, 104.285714285714, 208.571428571429, 62.5714285714286,
20.8571428571429, 26.0714285714286, 359.785714285714, 229.428571428571,
20.8571428571429), filtercol = structure(c(2L, 3L, 2L, 2L,
2L, 3L, 3L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L,
2L, 3L, 2L, 2L, 3L), .Label = c("on", "post", "pre"), class = "factor")), class = "data.frame", row.names = c(NA,
-24L), .Names = c("A", "B", "C", "D", "E", "F", "G", "H", "I",
"P-value", "Correlation", "RMSE", "sample_id", "patient_id",
"treated", "timing", "response", "dataset", "OS_status", "OS_time",
"filtercol"))
Aside from objections on the grounds that storing code as strings is bad, the idea behind preferring aes_
to aes_string
is explained in their examples in the documentation:
# You can't easily mimic these calls with aes_string aes(`$100`, colour = "smooth") aes_(~ `$100`, colour = "smooth") # Ok, you can, but it requires a _lot_ of quotes aes_string("`$100`", colour = '"smooth"')
It also lays out how to get your example to work:
# Three ways of generating the same aesthetics aes(mpg, wt, col = cyl) aes_(quote(mpg), quote(wt), col = quote(cyl)) aes_(~mpg, ~wt, col = ~cyl) aes_string("mpg", "wt", col = "cyl") ... # Convert strings to names with as.name var <- "cyl" ... aes_(col = as.name(var))
In context, then,
library(ggplot2)
plots <- lapply(LETTERS[1:9], function(cell){
ggplot(data = btest,
aes_(~response, as.name(cell), color = ~response)) +
geom_point()
})
plots[[3]]
However, at some point in the future, ggplot2 NSE will change, likely to use rlang so it's more uniform with the rest of the tidyverse, so don't put too much effort into figuring out its subtleties now; if you can get aes_string
to do what you want, go for it. For now.
Instead, I might suggest a different approach entirely: converting your nine plots into one facetted one. You'll need to reshape the data to long form, but at that point it's not much more complicated:
library(tidyr)
btest %>%
gather(cell, value, A:I) %>%
ggplot(aes(response, value, color = response)) +
geom_point() +
facet_wrap(~cell)