Search code examples
rggplot2aes

Circular barchart customization using tidyverse


I am following a tutorial in r-graph-gallery for circular barchart customization adjusted with my data because I want to see the 5 highest costs per topic. This is my data structure:

> dput(dtd4)
structure(list(topic = structure(c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 
5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 
8L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L), .Label = c("Topic 1", 
"Topic 10", "Topic 2", "Topic 3", "Topic 4", "Topic 5", "Topic 6", 
"Topic 7", "Topic 8", "Topic 9"), class = "factor"), parent = c("Jhon", 
"Mary", "Sean", "Lissa", "Tatiana", "Jhon", "Mary", "Sean", "Lissa", 
"Tatiana", "Jhon", "Mary", "Sean", "Lissa", "Tatiana", "Jhon", 
"Mary", "Sean", "Lissa", "Tatiana", "Jhon", "Mary", "Sean", "Lissa", 
"Tatiana", "Jhon", "Mary", "Sean", "Lissa", "Tatiana", "Jhon", 
"Mary", "Sean", "Lissa", "Tatiana", "Jhon", "Mary", "Sean", "Lissa", 
"Tatiana", "Jhon", "Mary", "Sean", "Lissa", "Tatiana", "Jhon", 
"Mary", "Sean", "Lissa", "Tatiana"), cost = c(9671, 9671, 9671, 
9671, 9671, 19842, 20704, 19842, 19842, 19449, 21448, 20251, 
21448, 21448, 21448, 21448, 19842, 19842, 19842, 19842, 19842, 
19842, 19842, 19842, 19842, 19842, 19842, 22031, 19842, 19842, 
19842, 19842, 19842, 19842, 19842, 19842, 19842, 19842, 19842, 
19842, 19842, 19842, 19842, 19842, 19842, 21448, 21448, 21448, 
21448, 22132)), row.names = c(NA, -50L), class = "data.frame")

The code below is the one I am using after adjustment.

library(tidyverse)

# Set a number of 'empty bar' to add at the end of each group
empty_bar <- 3
to_add <- data.frame( matrix(NA, empty_bar*nlevels(dtd4$topic), ncol(dtd4)) )
colnames(to_add) <- colnames(dtd4)
to_add$topic <- rep(levels(dtd4$topic), each=empty_bar)
dtd4 <- rbind(dtd4, to_add)
dtd4 <- dtd4 %>% arrange(topic)
dtd4$id <- seq(1, nrow(dtd4))


# Get the name and the y position of each label
label_data <- dtd4
number_of_bar <- nrow(label_data)
angle <- 90 - 360 * (label_data$id-0.5) /number_of_bar     # I substract 0.5 because the letter must have the angle of the center of the bars. Not extreme right(1) or extreme left (0)
label_data$hjust <- ifelse( angle < -90, 1, 0)
label_data$angle <- ifelse(angle < -90, angle+180, angle)

# prepare a data frame for base lines
base_data <- dtd4 %>% 
  group_by(topic) %>% 
  summarize(start=min(id), end=max(id) - empty_bar) %>% 
  rowwise() %>% 
  mutate(title=mean(c(start, end)))

# prepare a data frame for grid (scales)
grid_data <- base_data
grid_data$end <- grid_data$end[ c( nrow(grid_data), 1:nrow(grid_data)-1)] + 1
grid_data$start <- grid_data$start - 1
grid_data <- grid_data[-1,]

# Make the plot
p <- ggplot(dtd4, aes(x=as.factor(id), y=cost, fill=topic)) +       # Note that id is a factor. If x is numeric, there is some space between the first bar

  geom_bar(aes(x=as.factor(id), y=cost, fill=topic), stat="identity", alpha=0.5) +

  # Add a val=100/75/50/25 lines. I do it at the beginning to make sur barplots are OVER it.
  geom_segment(data=grid_data, aes(x = end, y = 80, xend = start, yend = 80), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
  geom_segment(data=grid_data, aes(x = end, y = 60, xend = start, yend = 60), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
  geom_segment(data=grid_data, aes(x = end, y = 40, xend = start, yend = 40), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +
  geom_segment(data=grid_data, aes(x = end, y = 20, xend = start, yend = 20), colour = "grey", alpha=1, size=0.3 , inherit.aes = FALSE ) +

  # Add text showing the value of each 100/75/50/25 lines
  annotate("text", x = rep(max(data$id),4), y = c(20, 40, 60, 80), label = c("20", "40", "60", "80") , color="grey", size=3 , angle=0, fontface="bold", hjust=1) +

  geom_bar(aes(x=as.factor(id), y=cost, fill=topic), stat="identity", alpha=0.5) +
  ylim(-100,120) +
  theme_minimal() +
  theme(
    legend.position = "none",
    axis.text = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank(),
    plot.margin = unit(rep(-1,4), "cm") 
  ) +
  coord_polar() + 
  geom_text(data=label_data, aes(x=id, y=cost+10, label=parent, hjust=hjust), color="black", fontface="bold",alpha=0.6, size=2.5, angle= label_data$angle, inherit.aes = FALSE ) +

  # Add base line information
  geom_segment(data=base_data, aes(x = start, y = -5, xend = end, yend = -5), colour = "black", alpha=0.8, size=0.6 , inherit.aes = FALSE )  +
  geom_text(data=base_data, aes(x = title, y = -18, label=topic), hjust=c(1,1,0,0), colour = "black", alpha=0.8, size=4, fontface="bold", inherit.aes = FALSE)

p

Original code works perfectly, but mine is giving me this error:

Error: Aesthetics must be either length 1 or the same as the data (10): hjust
In addition: Warning messages:
1: Removed 80 rows containing missing values (position_stack). 
2: Removed 80 rows containing missing values (position_stack). 

And I get lost because each data set is giving a similar output everytime I check, but when going through 'p' something is wrong.


Solution

  • The length of base_data differs from the length of the vector provided to hjust in the last line of your plot. Replace it with something like the following, where hjust is a vector the same length as the data:

    geom_text(data=base_data, aes(x = title, y = -18, label=topic), hjust=c(rep(0,5),rep(1,5)), colour = "black", alpha=0.8, size=4, fontface="bold", inherit.aes = FALSE)
    

    You will likely need to adjust the hjust vector to give what you want, but this should get you on your way.

    Additionally, you did not provide data, so the my output isn't working. Hopefully you know what data is where you are taking max(data$id).