Search code examples
rggplot2ggrepel

control x position using ggrepel


I am using the library ggrepel to avoid label overlapp.

With the code below I got not overlapping labels but they are all at the rigth. I would like to control the x position, that is why I used x = ColB in aes()) but it does not work as I expected.

enter image description here

What is wrong in my code and how could I obtain what I expect ? Please see at the end, example of image I would prefere to obtained.

library(ggplot2)
library(ggrepel)

data_test = data.frame(ColA = c(1, 41.5, 0.35, 8.7, 3.25, 14.25, 2.35, 
                                10.8, 7.3, 22.6, 0.2, 14.2, 34.75, 13.8, 14.15, 2.5, 28.85, 35.5, 
                                7.35, 3.1, 9.15, 7.7, 12.7, 3.75, 5.5, 9.55, 8.5, 25, 8.1, 97.65, 
                                18.7, 5.6, 13.8, 0.7, 31.35, 15.55, 6.2, 10.45, 32.75, 24.8, 
                                18.4, 78.6, 5.25, 30.9, 18.2, 4.6, 2.45, 2.8, 3.75, 15.9, 1, 
                                13.5, 13.45, 20.2, 9.75, 13.5, 1.5, 3.6, 1, 23.6, 0.45, 5.7, 
                                60.45, 0.3, 0.3, 17.25, 2.1, 12, 3.6, 29.65, 3.7, 1.25, 38.8, 
                                9, 1.1, 4.1, 11.1, 1.5, 2.8, 13.75, 4, 1.25, 0, 46.6, 5.25, 106.65, 
                                9.3, 35, 9.85, 7.5, 3.6, 0.3, 20.35, 31.7, 24.75, 3, 9.3, 15.3, 
                                3, 6.5, 12, 7.75, 143.75, 7, 11.1, 22.9, 0.1, 1.35, 5.75, 1.1, 
                                6.75, 9.5, 4.25, 5.35, 9.75, 35.4, 1, 8.5, 1.5, 1, 6, 0, 63.15, 
                                17.6, 12.25, 7.2, 6.5, 15.95, 11.35, 8.25, 2.5, 24.95, 109.3, 
                                4.35, 28.05, 14.65, 18.55, 74.85, 4.25, 31.15, 10, 11, 17.6, 
                                8.1, 19.1, 2.35, 4.75, 24.95, 40.75, 1, 9.15, 38, 13.5, 2.1, 
                                2, 13.5, 0.5, 3.5, 27, 7.2, 17.1, 1.6, 0.5, 6.35, 15, 17.2, 4.5, 
                                10.6),
                       ColB = c("1", "1", "1", "1", "2", "1", "1", 
                                "1", "1", "1", "1", "1", "1", "1", "1", "2", "1", "1", "1", "1", 
                                "1", "1", "2", "1", "1", "1", "1", "1", "1", "1", "1", "1", "2", 
                                "1", "1", "1", "1", "1", "1", "2", "1", "1", "2", "1", "1", "2", 
                                "1", "2", "1", "1", "1", "1", "1", "1", "1", "1", "1", "2", "1", 
                                "1", "2", "1", "1", "1", "1", "1", "1", "1", "1", "1", NA, "1", 
                                "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
                                "1", "1", "1", "1", "1", "2", "1", "1", "1", "1", "1", "1", "1", 
                                "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "2", "2", 
                                "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "2", 
                                "1", "1", "1", "1", "1", "1", "1", "2", "1", "1", "1", "2", "1", 
                                "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", 
                                "1", "1", "1", "1", "1", "1", "1", "1", "1", "2", "1", "1", "1", 
                                "1", "1", "1", "1", "1"),
                       outlier=c(NA, "G000001", NA, NA, NA,
                                 NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                 NA, NA, NA, NA, NA, NA, NA, NA, "G000002", NA, NA, NA, NA, NA, NA, 
                                 NA, NA, NA, NA, NA, "G000003", NA, NA, NA, NA, NA, NA, NA, NA, 
                                 NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "G000004", NA, 
                                 NA, NA, NA, NA, NA, NA, NA, NA, "G000005", NA, NA, NA, NA, NA, 
                                 NA, NA, NA, NA, NA, "G000006", NA, "G000007", NA, NA, NA, NA, NA, 
                                 NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "G000008", NA, NA, 
                                 NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                 NA, "G0000011", NA, NA, NA, NA, NA, NA, NA, NA, NA, "G0000013", NA, 
                                 NA, NA, NA, "G0000010", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                 "G0000012", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
                                 NA, NA, NA, NA, NA, NA))


> data_test
      ColA ColB  outlier
1     1.00    1     <NA>
2    41.50    1  G000001
3     0.35    1     <NA>
4     8.70    1     <NA>
5     3.25    2     <NA>
6    14.25    1     <NA>
7     2.35    1     <NA>
8    10.80    1     <NA>
9     7.30    1     <NA>
10   22.60    1     <NA>
11    0.20    1     <NA>
12   14.20    1     <NA>
13   34.75    1     <NA>
14   13.80    1     <NA>
15   14.15    1     <NA>

ggplot(data_test, aes(x=ColB, y=ColA)) +
  geom_boxplot() +
  geom_text_repel(aes(label=outlier, x=ColB), na.rm=TRUE, show.legend = F) 

enter image description here


Solution

  • This behavior looks like a bug to me and results from the missing value in ColB, i.e. the NA category. A fix would be to replace the missing value with an "NA" string. Additionally I restricted the repelling to the y direction and used nudge_x to shift the labels:

    library(ggplot2)
    library(ggrepel)
    
    data_test |> 
      transform(
        ColB = ifelse(is.na(ColB), "NA", ColB)
      ) |> 
      ggplot(aes(x = ColB, y = ColA)) +
      geom_boxplot() +
      geom_text_repel(
        aes(label = outlier),
        na.rm = TRUE,
        show.legend = FALSE,
        direction = "y",
        nudge_x = .4
      )