I have a dataframe for which I want to visualize different things. In every barplot the same characteristic of the variable/attribute should be have the same color. For example:
data_mtcars <- mtcars
data <- data_mtcars %>%
group_by(am, gear) %>%
summarise(Freq = sum(mpg)) %>%
group_by(am) %>%
mutate(Prop = Freq / sum(Freq)) %>%
arrange(desc(Prop))
First plot with three characteristics of the variable "gear".
ggplot(data) +
aes(x = am, y = Prop, fill = reorder(gear, Prop), width=0.5) +
geom_col() + scale_y_continuous(labels = function(x) paste0(eval(x*100), "%")) +
geom_text(aes(label = if_else(Prop>0.05, scales::percent(Prop),NULL)), position = position_stack(0.4))+
theme_minimal() +
theme(legend.title = element_blank()) + ylab("") + xlab("") +
scale_fill_brewer(palette = "Set3")
which gives me purple color for characteristic "3" of variable gear. If I change the number of characteristics the color should not be changed.
df <- data[data$gear!=4,]
ggplot(df) +
aes(x = am, y = Prop, fill = reorder(gear, Prop), width=0.5) +
geom_col() + scale_y_continuous(labels = function(x) paste0(eval(x*100), "%")) +
# scale_x_date(breaks = unique(df_sum_EAD$Stichtag) , date_labels = "%d.%m.%Y") +
geom_text(aes(label = if_else(Prop>0.05, scales::percent(Prop),NULL)), position = position_stack(0.4)) + theme_minimal() +
theme(legend.title = element_blank()) + ylab("") + xlab("")+ scale_fill_brewer(palette = "Set3")
Now same characteristics have different colors (characteristic "3" is yellow). How to fix this problem. I have tried to fix the levels of the factor variable but I dont know how to include in an appropriate argument in plot.
data_mtcars$gear <- factor(data_mtcars$gear, levels=levels(as.factor(data_mtcars$gear)), ordered=T)
To get consistent colors for categories you can make use of a named color vector which can then be used via scale_color/fill_manual
to always set the same color for each category:
library(dplyr)
library(ggplot2)
data_mtcars <- mtcars
data <- data_mtcars %>%
group_by(am, gear) %>%
summarise(Freq = sum(mpg)) %>%
group_by(am) %>%
mutate(Prop = Freq / sum(Freq)) %>%
arrange(desc(Prop))
#> `summarise()` regrouping output by 'am' (override with `.groups` argument)
data <- mutate(data, gear = reorder(gear, Prop))
# Named vector of colors
colors_gear <- scales::brewer_pal(palette = "Set2")(length(levels(data$gear)))
colors_gear <- setNames(colors_gear, levels(data$gear))
make_plot <- function(d) {
ggplot(d) +
aes(x = am, y = Prop, fill = reorder(gear, Prop), width=0.5) +
geom_col() + scale_y_continuous(labels = function(x) paste0(eval(x*100), "%")) +
geom_text(aes(label = if_else(Prop>0.05, scales::percent(Prop),NULL)), position = position_stack(0.4)) + theme_minimal() +
theme(legend.title = element_blank()) + ylab("") + xlab("")+
scale_fill_manual(values = colors_gear)
}
make_plot(data)
make_plot(data[data$gear!=4,])