I am trying to calculate some percentages that I can then plot, and am running into trouble. Visually, my goal is to get something like the attached plot below (which my current code produces), except with percentages broken down by whether or not they graduated.
To be more clear, in my example data below, I'd want to plot something that would tell me: "Within employment category 0/1/2, what percent of white individuals graduated vs. did not graduate". And then have that for each of the race, gender, and yr (year) variables.
Here is my initial code. The issue, I think, is in my code chunk where I am trying to calculate percentages. My current code is calculating those for the whole group, instead of breaking each down by grad vs. not.
Code:
test2 <- test %>%
mutate(
genderf = factor(2-gender),
racef = factor(race),
yrf = factor(yr),
gradf = factor(grad)) %>%
select(genderf, racef, yrf, gradf, employ) %>%
pivot_longer(!employ, names_to = "group", values_to = "levels")
test3 <- test2 %>%
group_by(group, levels) %>%
mutate(group_n = n()) %>%
group_by(group, levels, employ) %>%
summarize(percent = round(100*n()/group_n[1], 1))
test3 <- test3 %>%
mutate(var = case_when(group == "genderf" & levels == 1 ~ "female",
group == "genderf" & levels == 2 ~ "male",
group == "racef" & levels == 1 ~ "white",
group == "racef" & levels == 2 ~ "black",
group == "racef" & levels == 3 ~ "hispanic",
group == "racef" & levels == 4 ~ "otherrace",
group == "yrf" & levels == 11 ~ "class '11",
group == "yrf" & levels == 15 ~ "class '15")) %>%
filter(!is.na(var))
ggplot(test3, aes(x = percent, y = var, fill = group)) +
geom_col(orientation = "y", width = .9) +
facet_grid(group ~ employ,
scales = "free", space = "free_y") +
labs(title = "Demographic breakdown of 'Score'",
y = "",
x = "Percent") +
theme(legend.position = "none",
strip.text.y = element_blank())
test <- structure(list(gender = c(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0), race = c(0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,
1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 1,
2, 3, 0, 1, 2, 3, 0, 1, 2, 1, 1, 1, 2, 2, 2, 0, 1, 1, 3, 1, 2,
3, 0, 1, 1, 0), yr = c(11, 15, 11, 15, 11, 15, 11, 15, 11, 15,
11, 15, 11, 15, 11, 15, 11, 15, 11, 15, 11, 15, 11, 15, 11, 15,
11, 15, 11, 15, 11, 15, 11, 15, 11, 15, 11, 15, 11, 15, 11, 15,
11, 15, 11, 15, 11, 15, 11, 15, 11, 15, 11, 15, 11, 15, 11, 15,
11), grad = c(0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1,
1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0,
0), employ = c(1, 2, 2, 0, 1, 2, 1, 2, 2, 0, 0, 0, 1, 1, 2, 0,
1, 0, 1, 1, 2, 0, 1, 2, 1, 1, 2, 0, 1, 2, 2, 2, 0, 2, 2, 0, 2,
2, 2, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 2, 0, 2, 0, 2, 2, 0, 2, 1,
2)), row.names = c(NA, -59L), spec = structure(list(cols = list(
gender = structure(list(), class = c("collector_double",
"collector")), race = structure(list(), class = c("collector_double",
"collector")), yr = structure(list(), class = c("collector_double",
"collector")), grad = structure(list(), class = c("collector_double",
"collector")), employ = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), problems = <pointer: 0x000001283e363ab0>, class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
Are you looking for something like this? I simplified things quite a bit.
library(tidyverse)
test |>
mutate(gender = c("female", "male")[gender + 1],
race = c("white","black", "hispanic", "otherrace")[race + 1],
yr = glue::glue("class_{yr}")) |>
pivot_longer(-c(grad, employ), names_to = "group") |>
group_by(employ, group, value)|>
summarise(percent = sum(grad)/n() * 100, .groups = "drop") |>
ggplot(aes(x = percent, y = value, fill = group)) +
geom_col(orientation = "y", width = .9) +
facet_grid(group ~ employ,
scales = "free", space = "free_y") +
labs(title = "Demographic breakdown of 'Score'",
y = "",
x = "Percent") +
theme(legend.position = "none",
strip.text.y = element_blank())