Hopefully this will be quick.
I have plotted the following chart using ggplot.
with the code:
ggplot(ContourDummy,aes(x=Measure.Name1,y=Measure.Name2,colour=Category.Name))
+geom_density_2d()
My issue is that some of the contour lines are not complete.
Now if I scale my axis by adding the following...
+ scale_x_continuous(minor_breaks=0, breaks=seq(14,26,12),limits=c(14,26))
+ scale_y_continuous(minor_breaks=0, breaks=seq(50,100,50),limits=c(50,100)
I get the desired output.
But is there any way of automatically setting the limits? I want to be able to replicate this chart type automatically just by switching the data source, x, y and colour.
I don't particularly want to be fiddling around with scales every time.
Here's a function that expands the x and y ranges to include the maximum extent of the density contours. The function works as follows:
Create a plot object with x and y ranges expanded well beyond the data range, so that we can be sure the plot will include complete contour lines.
Use ggplot_build
to determine the min and max x and y values among all the density contours.
Set the x and y ranges of the plot to the min and max x and y values determined in step 2.
The exp
parameter is there to expand the final range by a tiny amount (1% by default) because a small piece of contour line can still be cut off without that small bit of extra padding (in the example below, try plotting the mtcars
data frame with exp=0
and you'll see what I mean).
d2d = function(data, var1, var2, col, exp=0.005) {
# If the colour variable is numeric, convert to factor
if(is.numeric(data[,col])) {
data[,col] = as.factor(data[,col])
}
# Create plot, but expand x and y ranges well beyond data
p=ggplot(data, aes_string(var1, var2, colour=col)) +
geom_density_2d() +
scale_x_continuous(limits=c(min(data[,var1]) - 2*diff(range(data[,var1])),
max(data[,var1]) + 2*diff(range(data[,var1])))) +
scale_y_continuous(limits=c(min(data[,var2]) - 2*diff(range(data[,var2])),
max(data[,var2]) + 2*diff(range(data[,var2]))))
# Get min and max x and y values among all density contours
pb = ggplot_build(p)
xyscales = lapply(pb$data[[1]][,c("x","y")], function(var) {
rng = range(var)
rng + c(-exp*diff(rng), exp*diff(rng))
})
# Set x and y ranges to include complete density contours
ggplot(data, aes_string(var1, var2, colour=col)) +
geom_density_2d() +
scale_x_continuous(limits=xyscales[[1]]) +
scale_y_continuous(limits=xyscales[[2]])
}
Try out the function on two built-in data sets:
d2d(mtcars, "wt","mpg", "cyl")
d2d(iris, "Petal.Width", "Petal.Length", "Species")
Here's what the plots look like with the default x and y ranges:
ggplot(mtcars, aes(wt, mpg, colour=factor(cyl))) + geom_density_2d()
ggplot(iris, aes(Petal.Width, Petal.Length, colour=Species)) + geom_density_2d()
If you want to control the number of axis tick marks as well, you can, for example, do something like this:
d2d = function(data, var1, var2, col, nx=5, ny=5, exp=0.01) {
require(scales)
# If the colour variable is numeric, convert to factor
if(is.numeric(data[,col])) {
data[,col] = as.factor(data[,col])
}
# Create plot, but expand x and y ranges well beyond data
p=ggplot(data, aes_string(var1, var2, colour=col)) +
geom_density_2d() +
scale_x_continuous(limits=c(min(data[,var1]) - 2*diff(range(data[,var1])),
max(data[,var1]) + 2*diff(range(data[,var1])))) +
scale_y_continuous(limits=c(min(data[,var2]) - 2*diff(range(data[,var2])),
max(data[,var2]) + 2*diff(range(data[,var2]))))
# Get min and max x and y values among all density curves
pb = ggplot_build(p)
xyscales = lapply(pb$data[[1]][,c("x","y")], function(var) {
rng = range(var)
rng + c(-exp*diff(rng), exp*diff(rng))
})
# Set x and y ranges to include all of outer density curves
ggplot(data, aes_string(var1, var2, colour=col)) +
geom_density_2d() +
scale_x_continuous(limits=xyscales[[1]], breaks=pretty_breaks(n=nx)) +
scale_y_continuous(limits=xyscales[[2]], breaks=pretty_breaks(n=ny))
}