Search code examples
rggplot2coordinatesaxis-labels

incorrect labels in ggplot in R


I have a dataframe:

   key    year flag     amount   pct   pos
   <chr> <int> <lgl>     <dbl> <dbl> <dbl>
 1 A      2017 TRUE  13870255. 0.487 0.244
 2 A      2017 FALSE 14609185. 0.513 0.744
 3 B      2017 TRUE  23562809. 0.807 0.403
 4 B      2017 FALSE  5641352. 0.193 0.903
 5 C      2017 TRUE  22683017. 0.772 0.386
 6 C      2017 FALSE  6686562. 0.228 0.886
 7 D      2017 TRUE  14840593. 0.500 0.250
 8 D      2017 FALSE 14846822. 0.500 0.750
 9 A      2018 FALSE 16131222. 0.485 0.242
10 A      2018 TRUE  17155225. 0.515 0.742

I have created wrapepd pied plots with percentage labels inside each plot - highlighted inside trhe black box. The first plot in the left-upper corner seems to be correct, but then the middle-upped plot has inverserd colors - surface for 51.54% is smaller than surface for 48.46%. How can the labels be fixed such that the surface corresponds to the percantage?

enter image description here

code used for ggplot:

ggplot(valuesDT, aes(x=1,y=pct,fill=flag)) +
          geom_bar(stat="identity",width=2)+
          coord_polar(theta='y')+
          theme_classic()+
          theme(axis.ticks=element_blank(), axis.title=element_blank(),
                axis.line.y = element_blank(),
                axis.line.x = element_blank(),
            axis.text.y = element_blank(), panel.grid  = element_blank(),
            axis.text.x = element_blank(),
            plot.title = element_text(color="blue", size=16, face="bold")) +
  scale_fill_manual(values = c("grey", "green"), labels = c("F", "non-T"), name = "")+
  geom_text(aes(y = pos, label = paste0(round(pct*100, digits = 2), " %")), size = 3)+
  facet_grid(key ~ year) 

Which looks alright. But then i noticed that the labels were not correct

data:

structure(list(key = c("A", "A", "B", "B", "C", "C", "D", "D", 
"A", "A", "B", "B", "C", "C", "D", "D", "A", "A", "B", "B", "C", 
"C", "D", "D"), year = c(2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
2017L, 2017L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 
2018L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L, 2019L
), flag = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, 
FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, TRUE, 
TRUE, FALSE, TRUE, FALSE, TRUE, FALSE), amount = c(13870254.87, 
14609184.5199999, 23562809.1300003, 5641352.16999989, 22683016.61, 
6686562.28999991, 14840593.21, 14846822.3, 16131221.89, 17155224.79, 
26853512.1700003, 6337822.7600002, 26324941.5199999, 7775499.19999981, 
16527294.6700001, 18136429.54, 18660941.7799999, 19628554.06, 
29955135.69, 7154960.09000001, 28728476.97, 8923411.36999992, 
18634207.47, 18495786.62), pct = c(0.487026962857644, 0.512973037142356, 
0.806830536509883, 0.193169463490117, 0.772330331573125, 0.227669668426875, 
0.499895088711952, 0.500104911288048, 0.484618320635959, 0.515381679364041, 
0.809051887386679, 0.190948112613322, 0.771982442577655, 0.228017557422345, 
0.476789353904222, 0.523210646095778, 0.487364520493513, 0.512635479506487, 
0.807196399265127, 0.192803600734873, 0.763002288506204, 0.236997711493796, 
0.501864003124596, 0.498135996875404), pos = c(0.243513481428822, 
0.743513481428822, 0.403415268254941, 0.903415268254941, 0.386165165786563, 
0.886165165786563, 0.249947544355976, 0.749947544355976, 0.24230916031798, 
0.742309160317979, 0.404525943693339, 0.904525943693339, 0.385991221288828, 
0.885991221288828, 0.238394676952111, 0.738394676952111, 0.243682260246756, 
0.743682260246756, 0.403598199632564, 0.903598199632563, 0.381501144253102, 
0.881501144253102, 0.250932001562298, 0.750932001562298)), row.names = c(NA, 
-24L), groups = structure(list(year = c(2017L, 2017L, 2017L, 
2017L, 2018L, 2018L, 2018L, 2018L, 2019L, 2019L, 2019L, 2019L
), key = c("A", "B", "C", "D", "A", "B", "C", "D", "A", "B", 
"C", "D"), .rows = structure(list(1:2, 3:4, 5:6, 7:8, 9:10, 11:12, 
    13:14, 15:16, 17:18, 19:20, 21:22, 23:24), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, 12L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

Solution

  • Try this with group enabled by key in your aes(). Here the code and in the plot you can see how areas are properly placed about what you mentioned in the question:

    library(tidyverse)
    #Code
    ggplot(valuesDT, aes(x=1,y=pct,fill=flag,group=key)) +
      geom_bar(stat="identity",width=2)+
      coord_polar(theta='y')+
      theme_classic()+
      theme(axis.ticks=element_blank(), axis.title=element_blank(),
            axis.line.y = element_blank(),
            axis.line.x = element_blank(),
            axis.text.y = element_blank(), panel.grid  = element_blank(),
            axis.text.x = element_blank(),
            plot.title = element_text(color="blue", size=16, face="bold")) +
      scale_fill_manual(values = c("grey", "green"), labels = c("non-T","F"), name = "")+
      geom_text(aes(y = pos, label = paste0(round(pct*100, digits = 2), " %")), size = 3)+
      facet_grid(key ~ year) 
    

    Output:

    enter image description here

    Also, be careful on how you assign the labels, without using the scale_fill_*() option this is the output:

    #Code 2
    ggplot(valuesDT, aes(x=1,y=pct,fill=flag,group=key)) +
      geom_bar(stat="identity",width=2)+
      coord_polar(theta='y')+
      theme_classic()+
      theme(axis.ticks=element_blank(), axis.title=element_blank(),
            axis.line.y = element_blank(),
            axis.line.x = element_blank(),
            axis.text.y = element_blank(), panel.grid  = element_blank(),
            axis.text.x = element_blank(),
            plot.title = element_text(color="blue", size=16, face="bold")) +
      # scale_fill_manual(values = c("grey", "green"), labels = c("non-T","F"), name = "")+
      geom_text(aes(y = pos, label = paste0(round(pct*100, digits = 2), " %")), size = 3)+
      facet_grid(key ~ year) 
    

    Output:

    enter image description here

    And it can be seen that percentages and areas are placed properly.