Search code examples
rggplot2dplyrlikert

Sort of a likert plot based on the sorting of another likert plot in R


I have a data frame in R that I use to calculate percentages and present them in a likert plot plus a bar plot. In the middle I have a bar plot that has the percentages of the NA's in this data frame in each question within each grouping level. I want to match the questions of the middle likert plot with the one at left (i.e the left likert plot is my base and dependent on this plot to match the q1:q6 in the middle plot). How can I achieve this in R ?

Any help?


library(ggstats)
library(dplyr)
library(ggplot2)


likert_levels <- c(
  "Strongly disagree",
  "Disagree",
  "Neither agree nor disagree",
  "Agree",
  "Strongly agree"
)
set.seed(42)
df <-
  tibble(
    grouping = sample(c(LETTERS[1:9]), 150, replace = TRUE),
    q1 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q2 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q3 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q4 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q5 = sample(c(likert_levels, NA), 150, replace = TRUE),
    q6 = sample(c(likert_levels, NA), 150, replace = TRUE)
  ) |>
  mutate(across(-grouping, ~ factor(.x, levels = likert_levels)))

filter_df = df %>%
  dplyr::select(grouping) %>%
  dplyr::group_by(grouping) %>%
  dplyr::summarise(n = n()) %>%
  dplyr::filter(n >= 18)%>%
  dplyr::arrange(desc(n))
parameters = as.vector(filter_df[[1]])

# Seed used to create the data
set.seed(42)

data_fun <- function(.data) {
  .data |>
    mutate(
      .question = interaction(grouping, .question),
      .question = reorder(
        .question,
        ave(as.numeric(.answer), .question, FUN = \(x) {
          sum(x %in% 4:5) / length(x[!is.na(x)])
        }),
        decreasing = TRUE
      )
    )
}

df = df%>%
  filter(grouping %in% parameters)

v1 <- gglikert(df, q1:q6,
               facet_rows = vars(grouping),
               add_totals = TRUE,
               data_fun = data_fun
) +
  scale_y_discrete(
    labels = ~ gsub("^.*\\.", "", .x)
  ) +
  labs(y = NULL) +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    legend.position = "bottom",
    strip.text = element_text(color = "black", face = "bold"),
    strip.placement = "outside"
  ) +
  theme(strip.text.y = element_text(angle = 0)) +
  facet_wrap(
    facets = vars(grouping),
    labeller = labeller(grouping = label_wrap_gen(width = 5)),
    ncol = 1, scales = "free_y",
    strip.position = "right"
  )

v2 <- filter_df %>%
  ggplot2::ggplot(aes(y = grouping, x = n)) +
  geom_bar(stat = "identity", fill = "lightgrey") +
  geom_text(aes(label = n), position = position_stack(vjust = 0.5)) +
  scale_y_discrete(
    limits = rev, expand = c(0, 0)
  ) +
  facet_wrap(
    facets = vars(grouping),
    labeller = labeller(grouping = label_wrap_gen(width = 10)),
    ncol = 1, scales = "free_y",
    strip.position = "left"
  ) +
  theme_light() +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    legend.position = "none",
    strip.text.y = element_blank()
  ) +
  labs(x = NULL, y = NULL)

availability_levels <- c(
  "available",
  "not_available"
)

df_ava = df%>%
  
  pivot_longer(!grouping, names_to = "question", values_to = "response")%>%
  mutate(count2 = case_when(is.na(response) ~ "not_available",
                            TRUE ~"available"))%>%
  select(-response)%>%
  group_by(grouping,question)%>%
  summarise(
    total = n(),
    available_percent = sum(count2 == "available") / total * 100,
    not_available_percent = round(sum(count2 == "not_available") / total * 100,0),
    .groups = 'drop'
  )%>%
  select(grouping,question,not_available_percent)

df_ava


v3 <- df_ava%>%
  ggplot2::ggplot(aes(y = question, x = not_available_percent)) +
  geom_bar(stat = "identity", fill = "lightgrey") +
  geom_text(aes(label = paste0(not_available_percent, "%")), 
            size = 2.5,
            position = position_stack(vjust = 0.5)) +
  scale_y_discrete(
    limits = rev, expand = c(0, 0)
  ) +
  facet_wrap(
    facets = vars(grouping),
    labeller = labeller(grouping = label_wrap_gen(width = 10)),
    ncol = 1, scales = "free_y",
    strip.position = "left"
  ) +
  theme_light() +
  theme(
    panel.border = element_rect(color = "gray", fill = NA),
    axis.text.x = element_blank(),
    legend.position = "bottom"#,
  #  strip.text.y = element_blank()
  ) +
  labs(x = NULL, y = NULL)

v1+v3+v2+ plot_layout(widths = c(3,1,.5)
) &
  theme(legend.position = "bottom")

enter image description here


Solution

  • Here is a quick and easy approach to achieve your desired result which uses gglikert_data to replicate the dataset used under the hood for v1 then uses the levels for the .question column of this data to set the order of the question column in the v3 data, where of course we have to first create the interaction with grouping and clean up the y axis labels as you did for v1:

    library(ggstats)
    library(dplyr)
    library(ggplot2)
    
    v1_data <- gglikert_data(df, q1:q6, data_fun = data_fun)
    
    v3 <- df_ava %>%
      mutate(
        question = interaction(grouping, question),
        question = factor(question, levels = levels(v1_data$.question))
      ) |>
      ggplot2::ggplot(aes(y = question, x = not_available_percent)) +
      geom_bar(stat = "identity", fill = "lightgrey") +
      geom_text(aes(label = paste0(not_available_percent, "%")),
        size = 2.5,
        position = position_stack(vjust = 0.5)
      ) +
      scale_y_discrete(
        labels = ~ gsub("^.*\\.", "", .x),
        limits = rev, 
        expand = c(0, 0)
      ) +
      facet_wrap(
        facets = vars(grouping),
        labeller = labeller(grouping = label_wrap_gen(width = 10)),
        ncol = 1, scales = "free_y",
        strip.position = "left"
      ) +
      theme_light() +
      theme(
        panel.border = element_rect(color = "gray", fill = NA),
        axis.text.x = element_blank(),
        legend.position = "bottom"
      ) +
      labs(x = NULL, y = NULL)
    
    v1 + v3 + v2 + plot_layout(widths = c(3, 1, .5)) &
      theme(legend.position = "bottom")
    

    enter image description here