Search code examples
rggplot2linear-regressionline-plotaesthetics

match color, line type AND shape in a SINGLE legend ggplot2


I'm struggling with changing a regression aes with ggplot2. In the plot below, how can I match the legends, colors, line types AND shapes according to TYPE?

  • I have this plot:

plot

  • the code is:

data %>%
  # filter(!is.na(IND_TEST_SCORE) & !is.na(SCORE)) %>% 
  ggplot(., aes(x = IND_TEST_SCORE, y = SCORE,
                color = TYPE, linetype = TYPE, shape = TYPE)) +
  geom_point(alpha = 0.1, color = "red") +
  scale_shape_manual(values = c(16, 17)) +  ## change shape type
  stat_smooth(formula = y ~ x, method = lm, se = T) +
  scale_linetype_manual(values = c("solid", "dashed")) +
  scale_color_manual(values = c(A = "yellow", B =  "cadetblue2"),
                     label = c(A = "TYPE A", B = "TYPE B")) +
  # scale_linetype(name = "Type:") +
  # facet_grid(~TYPE) +
  labs(x = "IND_TEST_SCORE",
       y = "SCORE",
       title = "Effect of IND_TEST_SCORE and TYPE on SCORE",
       color = "Type:") +
  theme_bw()

Questions:

  • 1 Match colors in scale_color_manual to TYPE
  • 2 Match the legends to the linetype definied in scale_linetype_manual
  • 3 match the legends with shapes and linetype to the one with color
  • 4 get rid of the duplicated legend

ps: I've seen some similar posts and here too , but I couldn't work around them to match everything that I needed to match. Thanks in advance.

  • data:
> dput(data)
structure(list(ID = structure(c("PART_1", "PART_1", "PART_2", 
"PART_2", "PART_3", "PART_3", "PART_4", "PART_4", "PART_5", "PART_5", 
"PART_6", "PART_6", "PART_7", "PART_7", "PART_8", "PART_8", "PART_9", 
"PART_9", "PART_10", "PART_10", "PART_11", "PART_11", "PART_12", 
"PART_12", "PART_13", "PART_13", "PART_14", "PART_14", "PART_15", 
"PART_15", "PART_16", "PART_16", "PART_17", "PART_17", "PART_18", 
"PART_18", "PART_19", "PART_19", "PART_20", "PART_20", "PART_21", 
"PART_21", "PART_22", "PART_22", "PART_23", "PART_23", "PART_24", 
"PART_24", "PART_25", "PART_25", "PART_26", "PART_26", "PART_27", 
"PART_27", "PART_28", "PART_28", "PART_29", "PART_29", "PART_30", 
"PART_30", "PART_31", "PART_31", "PART_32", "PART_32", "PART_33", 
"PART_33", "PART_34", "PART_34", "PART_35", "PART_35", "PART_36", 
"PART_36", "PART_37", "PART_37", "PART_38", "PART_38", "PART_39", 
"PART_39", "PART_40", "PART_40", "PART_41", "PART_41", "PART_42", 
"PART_42", "PART_43", "PART_43", "PART_44", "PART_44", "PART_45", 
"PART_45", "PART_46", "PART_46", "PART_47", "PART_47", "PART_48", 
"PART_48", "PART_49", "PART_49", "PART_50", "PART_50", "PART_51", 
"PART_51", "PART_52", "PART_52", "PART_53", "PART_53", "PART_54", 
"PART_54", "PART_55", "PART_55", "PART_56", "PART_56", "PART_57", 
"PART_57", "PART_58", "PART_58", "PART_59", "PART_59", "PART_60", 
"PART_60", "PART_61", "PART_61", "PART_62", "PART_62", "PART_63", 
"PART_63", "PART_64", "PART_64", "PART_65", "PART_65", "PART_66", 
"PART_66", "PART_67", "PART_67", "PART_68", "PART_68", "PART_69", 
"PART_69", "PART_70", "PART_70", "PART_71", "PART_71"), class = c("glue", 
"character")), IND_TEST_SCORE = c(100, 100, 36, 36, 32, 32, 96, 
96, 11, 11, 12, 12, 32, 32, 72, 72, 100, 100, 64, 64, 2, 2, 19, 
19, 99, 99, 86, 86, 60, 60, 108, 108, 95, 95, 35, 35, 60, 60, 
9, 9, 78, 78, 61, 61, 61, 61, 67, 67, 105, 105, 99, 99, 51, 51, 
21, 21, 65, 65, 30, 30, 0.9, 0.9, 77, 77, 54, 54, 14, 14, 103, 
103, 48, 48, 0.7, 0.7, 2, 2, 39, 39, 94, 94, 80, 80, 8, 8, 30, 
30, 103, 103, 113, 113, 91, 91, 59, 59, 56, 56, 86, 86, 99, 99, 
72, 72, 34, 34, 32, 32, 6, 6, 44, 44, 99, 99, 65, 65, 98, 98, 
110, 110, 102, 102, 87, 87, 50, 50, 89, 89, 36, 36, 93, 93, 8, 
8, 11, 11, 78, 78, 48, 48, 77, 77, 4, 4), SCORE = c(64.9, 64.25, 
63.8, 59, 71.73, NA, 67.67, 67.88, NA, 67.08, NA, NA, 52.5, 60.75, 
72.35, 64.42, 65.28, 71.17, 57.22, 58.42, NA, NA, NaN, 49.8, 
69, 63.36, 66.67, 65.2, NaN, NaN, 66.58, 70.2, 69, 62.85, 60.55, 
NaN, 56.29, 61.6, 67.45, 53.92, 68.4, 62.63, 64.25, NA, NaN, 
NaN, 50.86, 50.46, 67.83, 65.14, 65.96, 60.58, 57, 63.29, 53.07, 
NA, 66.89, 64.33, NaN, NaN, NA, NA, 59, 68.57, 61.5, NA, NA, 
NA, 65.9, 66.3, 64.07, NA, NA, 57.29, NA, NA, 57.91, 53.5, 67.89, 
63.48, 68.75, NA, 68.5, 57.07, NaN, NaN, 63.24, 61.82, 66.19, 
NA, 60.59, 68.61, 59.24, 57.1, 54.33, 62.84, 64.39, 63, 65.83, 
61.91, 65.71, 58.38, 63, NaN, 63.78, 61.56, 63.62, NA, 64, NaN, 
65.08, 65.55, NA, 63.8, 67.61, 65, 67.57, 63.14, 72.71, 67.31, 
65.46, 67.75, 61.71, 57.62, NA, 63.31, 57.62, 54.83, NA, 66.43, 
NA, NA, NA, NA, 64, 64.67, 61.33, 57.92, 62.64, 59, NA, NA), 
    TYPE = structure(c(2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("A", 
    "B"), class = "factor")), row.names = c(NA, -142L), class = c("tbl_df", 
"tbl", "data.frame"))

Solution

    1. You’re getting different legends because you set the name and label args within scale_color_manual(), but not in your other scale specifications. You could fix this by copying label = c(A = "TYPE A", B = "TYPE B"), name = "Type:" to the other scale_*_manual() calls; however it’s easier and more succinct to just change the variable name and labels in the data before plotting, as below.
    2. You have color = "black" set within geom_point(), which is overriding the color aesthetic you set in ggplot(aes()). I also think alpha = 0.1 might be too light to show up, on my device at least (though that surprised me).
    library(dplyr)
    library(ggplot2)
    
    data %>%
      mutate(`Type:` = paste("TYPE", TYPE)) %>%
      ggplot(aes(x = IND_TEST_SCORE, y = SCORE,
                    color = `Type:`, linetype = `Type:`, shape = `Type:`)) +
      geom_point(alpha = 0.25) +
      scale_shape_manual(values = c(16, 17)) +  ## change shape type
      stat_smooth(formula = y ~ x, method = lm, se = T) +
      scale_linetype_manual(values = c("solid", "dashed")) +
      scale_color_manual(values = c("yellow", "cadetblue2")) +
      labs(x = "IND_TEST_SCORE",
           y = "SCORE",
           title = "Effect of IND_TEST_SCORE and TYPE on SCORE") +
      theme_bw()
    

    PS - you can also color the error bands by adding fill = Type:, setting an alpha level in stat_smooth(), and using your manual color scale for both color and fill by adding aesthetics = c("color", "fill"):

    data %>%
      mutate(`Type:` = paste("TYPE", TYPE)) %>%
      ggplot(aes(x = IND_TEST_SCORE, y = SCORE,
                    color = `Type:`, fill = `Type:`, linetype = `Type:`, shape = `Type:`)) +
      geom_point(alpha = 0.25) +
      scale_shape_manual(values = c(16, 17)) +  ## change shape type
      stat_smooth(formula = y ~ x, method = lm, se = T, alpha = .15) +
      scale_linetype_manual(values = c("solid", "dashed")) +
      scale_color_manual(values = c("yellow", "cadetblue2"), aesthetics = c("color", "fill")) +
      labs(x = "IND_TEST_SCORE",
           y = "SCORE",
           title = "Effect of IND_TEST_SCORE and TYPE on SCORE") +
      theme_bw()