I have a slightly unusual question to ask. I have several datasets with coordinates of political parties and issues in their campaign, arranged in a two-dimensional space. These datasets are the results of multidimensional scaling, and I am using R to plot them as simple scatter plots with ggplot
. I use the UK as an example to show what I mean.
Here is the dataset:
dat <- structure(list(horizontal = c(0.204471737146378, -0.444747358560562,
-0.342559009790421, 0.83488667011261, 0.561371266841888, 0.885410964488983,
-0.329168140888214, -0.676190733909607, -0.0879427865147591,
-0.257560282945633, -0.487674087285995, -0.0497645996510983,
0.542662084102631, -0.721681654453278, -0.0316252149641514, 0.332207173109055,
-0.643045961856842, 0.506858110427856, -0.324039697647095, -0.86803138256073,
0.837070941925049, 0.559091985225677), vertical = c(-0.14312070608139,
0.55092453956604, -0.118618287146091, -0.066099539399147, -0.0786356627941132,
0.476942390203476, -0.206086233258247, -0.338285326957703, 0.0132009144872427,
-0.415686339139938, -0.0648649260401726, -0.205041542649269,
0.118428349494934, 0.294788777828217, -0.375703752040863, 0.247018560767174,
0.0151952970772982, -0.273895233869553, -0.278548806905746, 0.607526957988739,
-0.100193984806538, 0.34084951877594), party_id = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, "cons", "lab_uk", "libdem", "cons",
"lab_uk", "libdem", "cons", "lab_uk", "libdem", "ukip", "snp",
"gr_uk"), issue = c("issue", "issue", "issue", "issue", "issue",
"issue", "issue", "issue", "issue", "issue", "party", "party",
"party", "party", "party", "party", "party", "party", "party",
"party", "party", "party"), object_n = c("welfare", "ecolib",
"ecoreform", "europe", "cultlib", "immig", "security", "defense",
"edu", "infra", "cons_05", "lab_uk_05", "libdem_05", "cons_10",
"lab_uk_10", "libdem_10", "cons_15", "lab_uk_15", "libdem_15",
"ukip_15", "snp_15", "gr_uk_15")), .Names = c("horizontal", "vertical",
"party_id", "issue", "object_n"), row.names = c(NA, -22L), class = c("tbl_df",
"tbl", "data.frame"))
I tweak some of the theme parameters (which you don't neccessarily have to do, esp the font part) with:
theme_set(theme_classic(base_size = 16) +
theme(axis.line=element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(),axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
text=element_text(family="Century Gothic", size=10)))
and use the following code to create the plot:
dat$ff <- ifelse(dat$issue == "issue", "bold", "plain")
ggplot(dat, aes(vertical, horizontal)) +
geom_point(data=subset(dat, dat$issue=="issue"),
color = 'black', size=5, shape=3, show.legend = F) +
geom_point(data=subset(dat, dat$issue=="party"),
aes(shape=party_id, colour=party_id, fill=party_id),
size=2) +
geom_text_repel(aes(label = object_n, size=issue, fontface=ff),
family = "Century Gothic", show.legend = F) +
scale_colour_grey("Parties", start = 0, end = .6) +
scale_fill_grey("Parties", start = 0, end = .6) +
scale_size_manual("Parties", values = c(3.3, 2.8)) +
scale_shape_manual("Parties", values=c(15, 21, 23, 24, 25, 11, 8,
10,12,13,4,0,1,14,7,9))
which results in a plot similar to this:
In MDS analysis only the relative positions matter, the absolute coordinates are not important. Therefore, I would like to "rotate" the scatter plots in a way that the axis defined by the points "ecolib" and "welfare" always form the horizontal dimension, with "welfare" on the left and "ecolib" on the right. Everything else can freefloat around them.
I realize I need to adjust the raw coordinates and center them around the two points. However, I could not yet figure out how. Any help is much appreciated!
To put welfare and ecolib on the x axis, shift all the points so that the line between these two points goes through the origin and then rotate all the points by the angle that the line between the two points makes with the x axis.
library(tidyverse)
library(ggrepel)
# Extract the welfare and ecolib rows
n = dat %>% filter(grepl("ecolib|welfare", object_n))
# Get the slope and intercept of the line between them
slope = diff(n$vertical)/diff(n$horizontal)
intercept = n$vertical[2] - slope*n$horizontal[2]
# To check this, draw the line to show that we have the correct line
# Assuming you've saved your previous plot as object p and have switched
# horizontal and vertical to be, respectively, on the x and y axes
p + geom_abline(slope=slope, intercept=intercept)
# Shift y-values by value of intercept so that line connecting welfare and ecolib
# goes through the origin
dat$ynew = dat$vertical - intercept
# Get rotation angle to rotate welfare and ecolib to x-axis
angle = atan((n$vertical[1] - intercept)/n$horizontal[1])
# Rotate all points by the angle we just calculated and add the new x and y
# values to the data frame
# %*% is the matrix multiplication operator and the second matrix is the
# rotation matrix for the transformation
rot = as.matrix(dat[,c("horizontal", "ynew")]) %*% matrix(c(cos(angle), -sin(angle), sin(angle), cos(angle)), nrow=2, byrow=TRUE)
dat$xnew = rot[,1]
dat$ynew = rot[,2]
Now we're ready to plot the new shifted and rotated values:
ggplot(dat, aes(xnew, ynew)) +
geom_point(data=subset(dat, dat$issue=="issue"),
color = 'black', size=5, shape=3, show.legend = F) +
geom_point(data=subset(dat, dat$issue=="party"),
aes(shape=party_id, colour=party_id, fill=party_id),
size=2) +
geom_text_repel(aes(label = object_n, size=issue, fontface=ff),
family = "Century Gothic", show.legend = F) +
scale_colour_grey("Parties", start = 0, end = .6) +
scale_fill_grey("Parties", start = 0, end = .6) +
scale_size_manual("Parties", values = c(3.3, 2.8)) +
scale_shape_manual("Parties", values=c(15, 21, 23, 24, 25, 11, 8,
10,12,13,4,0,1,14,7,9))