Search code examples
rscatter-plotcategorical-datadummy-variable

Scatterplot with multi variables


Scatterplot reference

data set

Can someone help me create three scatter plots as in the first picture? Ideally using the plot() function.


Solution

  • require(tidyverse)
    require(ggplot2) 
    
    df <- tibble(
      image = 1:18,
      m_r_exsal = rnorm(18, 5, 2), 
      m_r_sal = rnorm(18, 6, 2), 
      female = c(rep(1, 18/2), rep(0, 18/2)), 
      lg_salary = rnorm(18, 5, 1.5), 
      deviation = rnorm(18, 1, 1), 
      chinese = c(rep(1, 6), rep(0, 18/3*2)), 
      european = c(rep(0, 6), rep(1, 6), rep(0, 6)), 
      american = c(rep(0, 18/3*2), rep(1, 6))
    )  
    

    Example data:

    # A tibble: 18 x 9
       image m_r_exsal m_r_sal female lg_salary deviation chinese european american
       <int>     <dbl>   <dbl>  <dbl>     <dbl>     <dbl>   <dbl>    <dbl>    <dbl>
     1     1      6.19    9.56      1      5.48     1.54        1        0        0
     2     2     10.1     5.17      1      3.77    -0.755       1        0        0
     3     3      4.96    1.91      1      6.75     0.381       1        0        0
     4     4      5.10    4.57      1      4.61    -0.207       1        0        0
     5     5     -1.25    6.57      1      2.33     0.880       1        0        0
     6     6      6.77    9.10      1      3.07     1.03        1        0        0
     7     7      4.04    4.84      1      4.56     1.95        0        1        0
     8     8      3.72    4.72      1      5.32     1.17        0        1        0
     9     9      7.59    7.05      1      6.24    -0.224       0        1        0
    10    10      4.09    3.94      0      5.60     2.52        0        1        0
    11    11      4.15    6.05      0      7.08    -0.152       0        1        0
    12    12      6.07    5.27      0      5.79    -0.323       0        1        0
    13    13      4.49    4.64      0      5.97     0.457       0        0        1
    14    14      6.74    4.70      0      3.38     0.377       0        0        1
    15    15      7.46    9.02      0      6.65     1.85        0        0        1
    16    16      4.29    5.26      0      4.07     2.18        0        0        1
    17    17      2.33    1.58      0      8.43     1.06        0        0        1
    18    18      4.78    8.75      0      5.03     0.101       0        0        1
    

    Making the plot:

    df %>%  
      mutate(chinese = case_when(chinese == 1 ~ "chinese"), 
             european = case_when(european == 1 ~ "european"), 
             american = case_when(american == 1 ~ "american"), 
             female = case_when(female == 1 ~ "female", 
                                TRUE ~ "male")) %>%  
      unite(country, chinese:american, remove = TRUE, sep = "") %>%  
      mutate(country = country %>% str_remove_all("NA")) %>%  
      ggplot() + 
      aes(lg_salary, deviation, col = female) + 
      geom_point() + 
      geom_smooth(method = "lm", se = FALSE) + 
      facet_wrap(~ country)
    

    The output:

    enter image description here