Search code examples
rggplot2geom-text

conditional size and face in geom_text_repel


I am working on a fairly large scatter plot, and I am using ggrepel as a method to avoid overlapping labels. However, I need to distinguish between some data points, and the idea would be to also rely on size and face to do so.

I post the dataset:

structure(list(horizontal = c(-0.352696359157562, 0.780073940753937, 
0.0911642983555794, -0.153335213661194, -0.540096700191498, -0.615496337413788, 
0.310178399085999, 0.797275483608246, -0.157139003276825, -0.33426907658577, 
0.253628611564636, 0.474333256483078, 0.0280635561794043, -0.522810518741608, 
0.712112784385681, 0.156166926026344, 0.378900289535522, -0.10096962749958, 
-0.751460909843445, 0.000464908313006163, -0.368376433849335, 
-0.0745543912053108, -0.496564328670502, 0.703955709934235, -0.10785873234272, 
0.371504008769989, -0.586291670799255, 0.191669017076492, -0.154560878872871, 
0, 0.843597233295441, 0.449101448059082, -0.241541013121605, 
-0.0455610118806362, -0.382365942001343, -0.00278944987803698, 
0.0590433552861214, -0.328732430934906, 0.0665241554379463, -0.384352385997772, 
-0.104266256093979, -0.330000370740891, 0.638868570327759, 0.319561064243317, 
-0.373574942350388, 0.298344343900681, -0.394538104534149, -0.994185328483582, 
0.654802262783051, -0.0239639095962048, -0.14082495868206, 0.726947605609894, 
0.067158117890358, 1.09101295471191, 0.808771371841431, -0.511610448360443, 
0.873182356357574, 0.33600190281868, -0.62741494178772, 0.253036916255951, 
-0.717691659927368, 0.329070538282394, 0.364699423313141, -0.0409119315445423, 
-0.356035232543945, -0.00621286546811461, 0.333550602197647, 
-0.327464520931244, 0.163056030869484, 0.441578358411789, -0.463090926408768, 
0.791485249996185, -1.08877301216125, 0.0416726917028427, -0.581684827804565, 
0.389889031648636, -0.32378751039505, 0.184145957231522, -0.291793346405029, 
-0.046744417399168, -0.593223035335541, -0.619257867336273, -0.41385293006897, 
0.565666615962982, -0.486125022172928, 0.246443659067154, -0.0412923097610474, 
0.893342435359955, 0.337903827428818, -0.551677107810974, 0.401258170604706, 
-0.628006637096405, 0.148221224546432, -0.0568033419549465, -0.530756235122681
), vertical = c(-0.176369309425354, 0.474502354860306, 0.0566342882812023, 
0.611396670341492, 0.376279890537262, 0.59516716003418, -0.177679508924484, 
-0.549056708812714, 0.0260348655283451, -0.0717226788401604, 
-0.104350790381432, -0.202235117554665, -0.110944032669067, 0.147080093622208, 
-0.0262884516268969, 0.0451383702456951, 0.615623116493225, 0.445720255374908, 
0.0480968803167343, 0.400624185800552, 0.248091980814934, -0.716846406459808, 
0.11407159268856, -0.152194082736969, 0.0246401410549879, 0.150926142930984, 
-0.437859088182449, 0.0160182043910027, 0.20544721186161, 0.197501495480537, 
-0.0254431646317244, 0.218464657664299, 0.119692757725716, 0.0423489212989807, 
0.0680879354476929, 0.0461949817836285, -0.037192665040493, -0.0420530699193478, 
-0.252572000026703, -0.365248888731003, -0.542886137962341, 0.195641875267029, 
-0.243147045373917, 0.156293720006943, 0.128018841147423, 0.592842638492584, 
0.29944321513176, 0.0551972948014736, 0.0954329967498779, -0.230890363454819, 
0.291117131710052, -0.123327493667603, 0.298259824514389, 0.291962414979935, 
0.0570146627724171, -0.248049721121788, -0.147798582911491, -0.363093376159668, 
0.121045224368572, -0.186850875616074, 0.0238371174782515, 0.201136231422424, 
0.0619173347949982, -0.0573527812957764, -0.345680475234985, 
-0.637051165103912, -0.174467399716377, -0.155659765005112, -1.08539187908173, 
-0.414698243141174, 0.499438345432281, 0.0502946302294731, -1.10627043247223, 
0.559918701648712, 0.125187128782272, 0.52006334066391, -0.0163563191890717, 
-0.787765979766846, 0.10916892439127, 0.313010096549988, 0.167831897735596, 
0.128610551357269, 0.0104815689846873, -0.134907931089401, 0.116861052811146, 
-0.164915651082993, -1.09490132331848, -0.140782698988914, 0.54309743642807, 
0.206757411360741, 0.346103101968765, -0.108830809593201, 0.241710051894188, 
-0.327760368585587, 0.185540676116943), issue = c("issue", "issue", 
"issue", "issue", "issue", "issue", "issue", "issue", "issue", 
"issue", "issue", "party", "party", "party", "party", "party", 
"party", "party", "party", "party", "party", "party", "party", 
"party", "party", "party", "party", "party", "party", "party", 
"party", "party", "party", "party", "party", "party", "party", 
"party", "party", "party", "party", "party", "party", "party", 
"party", "party", "party", "party", "party", "party", "party", 
"party", "party", "party", "party", "party", "party", "party", 
"party", "party", "party", "party", "party", "party", "party", 
"party", "party", "party", "party", "party", "party", "party", 
"party", "party", "party", "party", "party", "party", "party", 
"party", "party", "party", "party", "party", "party", "party", 
"party", "party", "party", "party", "party", "party", "party", 
"party", "party"), year = c("9999", "9999", "9999", "9999", "9999", 
"9999", "9999", "9999", "9999", "9999", "9999", "05", "05", "05", 
"05", "05", "05", "05", "05", "06", "06", "06", "06", "06", "06", 
"06", "06", "07", "07", "07", "07", "07", "07", "07", "07", "07", 
"07", "07", "08", "08", "08", "08", "08", "09", "09", "09", "09", 
"09", "10", "10", "10", "10", "10", "10", "10", "10", "11", "11", 
"11", "11", "11", "11", "11", "11", "11", "11", "12", "12", "12", 
"12", "12", "12", "12", "12", "12", "13", "13", "13", "13", "13", 
"13", "13", "13", "15", "15", "15", "15", "15", "15", "15", "15", 
"15", "16", "16", "16"), object_n = c("welfare", "ecolib", "ecoreform", 
"europe", "cultlib", "immig", "security", "defense", "edu", "env", 
"infra", "cons05", "lab_uk05", "libdem05", "union05", "spd05", 
"fdp_d05", "gr_d05", "linke/pds05", "övp06", "spö06", "fpö06", 
"gr_a06", "cda06", "pvda06", "vvd06", "sp06", "rpr/ump07", "psf07", 
"udf07", "svp07", "fdp_ch07", "sps07", "cvp07", "gr_ch07", "fg07", 
"ff07", "lab_irl07", "övp08", "spö08", "fpö08", "gr_a08", "bzö08", 
"union09", "spd09", "fdp_d09", "gr_d09", "linke/pds09", "cons10", 
"lab_uk10", "libdem10", "cda10", "pvda10", "vvd10", "rr_nl10", 
"sp10", "svp11", "fdp_ch11", "sps11", "cvp11", "gr_ch11", "fg11", 
"ff11", "lab_irl11", "sf11", "indep_irl11", "rpr/ump12", "psf12", 
"rr_f12", "cda12", "pvda12", "vvd12", "rr_nl12", "d6212", "sp12", 
"övp13", "spö13", "fpö13", "gr_a13", "union13", "spd13", "gr_d13", 
"linke/pds13", "cons15", "lab_uk15", "libdem15", "ukip15", "svp15", 
"fdp_ch15", "sps15", "cvp15", "gr_ch15", "fg16", "ff16", "sf16"
)), .Names = c("horizontal", "vertical", "issue", "year", "object_n"
), row.names = c(NA, -95L), class = c("tbl_df", "tbl", "data.frame"
))

Currently I use the code below to specify the plot:

ggplot(dat, aes(horizontal, vertical)) +
  geom_point(data=subset(dat, dat$issue=="issue"),
             color = 'black', size=5, shape=3) +
  geom_point(data=subset(dat, dat$issue=="party"),
             color = 'black', aes(shape=year), size=2) +
  scale_shape_manual(values=c(19,15,17,10,11,12,18,13,14,7,8,9,0,5,1))+
  geom_text_repel(data=subset(dat, dat$issue=="party"),
                  aes(horizontal, vertical, label = object_n), size=2.5, family = "Franklin Gothic Demi") +
  geom_text_repel(data=subset(dat, dat$issue=="issue"),
                  aes(horizontal, vertical, label = object_n), size=3.2,
                  family = "Franklin Gothic Demi",
                  fontface = 'bold') +
  theme_classic(base_size = 16) +
  theme(axis.line=element_blank(),axis.text.x=element_blank(),
        axis.text.y=element_blank(),axis.ticks=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),legend.position="none",
        panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),plot.background=element_blank())

This works fine, and produces the following plot:

enter image description here

However, the problem is that given I specify geom_text_repel for the two subsets, the labels will overlap, which kills the point of using this geom. Do you have any idea how to set it up in one geom_text_repel with conditional size and face? I have tried something similar to what is suggested here, but I continuously run into an error: ggplot2 doesn't know how to deal with data of class uneval

This is for example what I have tried:

ff <- function(issue){ifelse(dat$issue=="issue",3.2,2.5)}

mds <- ggplot(dat, aes(horizontal, vertical)) +
  geom_point(data=subset(dat, dat$issue=="issue"),
             color = 'black', size=5, shape=3) +
  geom_point(data=subset(dat, dat$issue=="party"),
             color = 'black', aes(shape=year), size=2) +
  scale_shape_manual(values=c(19,15,17,10,11,12,18,13,14,7,8,9,0,5,1))+
  geom_text_repel(dat,aes(horizontal, vertical, label = object_n, size = factor(issue)),
                  family = "Franklin Gothic Demi") + scale_size_manual(values=ff(issue)) +
  theme_classic(base_size = 16) +
  theme(axis.line=element_blank(),axis.text.x=element_blank(),
        axis.text.y=element_blank(),axis.ticks=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank(),legend.position="none",
        panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),plot.background=element_blank())
mds

Any help is much appreciated!


Solution

  • does this help you? Just making a new variable for fontface and using scale_size_manual to mimic your existing text sizing:

    library(tidyverse)
    library(ggrepel)
    
    # this is a nice helper fxn if you are using the same theme a lot :)
    theme_set(theme_classic(base_size = 16) +
                      theme(axis.line=element_blank(),axis.text.x=element_blank(),
                            axis.text.y=element_blank(),axis.ticks=element_blank(),
                            axis.title.x=element_blank(),
                            axis.title.y=element_blank(),legend.position="none",
                            panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),
                            panel.grid.minor=element_blank(),plot.background=element_blank()))
    
    dat$ff <- ifelse(dat$issue == "issue", "bold", "plain")
    
    ggplot(dat, aes(horizontal, vertical)) +
        geom_point(data=subset(dat, dat$issue=="issue"),
                   color = 'black', size=5, shape=3) +
        geom_point(data=subset(dat, dat$issue=="party"),
                   color = 'black', aes(shape=year), size=2) +
        scale_shape_manual(values=c(19,15,17,10,11,12,18,13,14,7,8,9,0,5,1)) +
        geom_text_repel(aes(label = object_n, size = issue, fontface = ff)) +
        scale_size_manual(values = c(3.5, 2.5))
    

    Remember the final sizing of the text is going to be dictated partially by the size of the graphics device you are using to record. This is what it looks like as a png with a width of 800 px and height of 511 px: enter image description here