I have two df:
the first df contains aptitude data. I need to create a new column sten_score
which is based on raw score and the particular subscale
. I am currently using case_when
to manually go through each subscale and score range to assign sten_score
in df#1.
I want to see if there is an efficient way to do this. ChatGPT suggested left_join
but that feels error-prone (multiple common elements...
# A tibble: 119 × 5
response_id subscale score grade domain
<chr> <chr> <dbl> <dbl> <chr>
1 R_43vyKDjzHx5D7kJ Abstract Reasoning 20 10 AR
2 R_43vyKDjzHx5D7kJ Language Aptitude 22 10 LA
3 R_43vyKDjzHx5D7kJ Mechanical Reasoning 12 10 MR
4 R_43vyKDjzHx5D7kJ Numerical Aptitude 21 10 NA
5 R_43vyKDjzHx5D7kJ Perceptual Aptitude 60 10 PA
6 R_43vyKDjzHx5D7kJ Spatial Aptitude 21 10 SA
7 R_43vyKDjzHx5D7kJ Verbal Reasoning 23 10 VR
8 R_45Ut4Vj2cnerN2F Abstract Reasoning 24 10 AR
9 R_45Ut4Vj2cnerN2F Language Aptitude 25 10 LA
10 R_45Ut4Vj2cnerN2F Mechanical Reasoning 18 10 MR
# Dput below ----------------
structure(list(response_id = c("R_43vyKDjzHx5D7kJ", "R_43vyKDjzHx5D7kJ",
"R_43vyKDjzHx5D7kJ", "R_43vyKDjzHx5D7kJ", "R_43vyKDjzHx5D7kJ",
"R_43vyKDjzHx5D7kJ", "R_43vyKDjzHx5D7kJ", "R_45Ut4Vj2cnerN2F",
"R_45Ut4Vj2cnerN2F", "R_45Ut4Vj2cnerN2F"), subscale = c("Abstract Reasoning",
"Language Aptitude", "Mechanical Reasoning", "Numerical Aptitude",
"Perceptual Aptitude", "Spatial Aptitude", "Verbal Reasoning",
"Abstract Reasoning", "Language Aptitude", "Mechanical Reasoning"
), score = c(20, 22, 12, 21, 60, 21, 23, 24, 25, 18), grade = c(10,
10, 10, 10, 10, 10, 10, 10, 10, 10), domain = c("AR", "LA", "MR",
"NA", "PA", "SA", "VR", "AR", "LA", "MR")), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
DF #2 contains sten_scores
based on grades and subscales:
# A tibble: 140 × 7
domain min_score max_score sten mean sd grade
<chr> <int> <int> <int> <dbl> <dbl> <dbl>
1 LA 0 6 1 17.8 5.75 10
2 LA 7 9 2 17.8 5.75 10
3 LA 10 12 3 17.8 5.75 10
4 LA 13 14 4 17.8 5.75 10
5 LA 15 17 5 17.8 5.75 10
6 LA 18 20 6 17.8 5.75 10
7 LA 21 23 7 17.8 5.75 10
8 LA 24 26 8 17.8 5.75 10
9 LA 27 28 9 17.8 5.75 10
10 LA 29 30 10 17.8 5.75 10
# dput below ------------------
structure(list(domain = c("LA", "LA", "LA", "LA", "LA", "LA",
"LA", "LA", "LA", "LA", "AR", "AR", "AR", "AR", "AR", "AR", "AR",
"AR", "AR", "AR", "VR", "VR", "VR", "VR", "VR", "VR", "VR", "VR",
"VR", "VR", "MR", "MR", "MR", "MR", "MR", "MR", "MR", "MR", "MR",
"MR", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA",
"SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", "SA", "PA",
"PA", "PA", "PA", "PA", "PA", "PA", "PA", "PA", "PA", "LA", "LA",
"LA", "LA", "LA", "LA", "LA", "LA", "LA", "LA", "AR", "AR", "AR",
"AR", "AR", "AR", "AR", "AR", "AR", "AR", "VR", "VR", "VR", "VR",
"VR", "VR", "VR", "VR", "VR", "VR", "MR", "MR", "MR", "MR", "MR",
"MR", "MR", "MR", "MR", "MR", "NA", "NA", "NA", "NA", "NA", "NA",
"NA", "NA", "NA", "NA", "SA", "SA", "SA", "SA", "SA", "SA", "SA",
"SA", "SA", "SA", "PA", "PA", "PA", "PA", "PA", "PA", "PA", "PA",
"PA", "PA"), min_score = c(0L, 7L, 10L, 13L, 15L, 18L, 21L, 24L,
27L, 29L, 0L, 3L, 6L, 9L, 12L, 16L, 19L, 22L, 25L, 28L, 0L, 5L,
8L, 11L, 14L, 17L, 20L, 23L, 26L, 29L, 0L, 4L, 7L, 9L, 11L, 13L,
16L, 18L, 20L, 23L, 0L, 3L, 6L, 10L, 13L, 16L, 20L, 23L, 26L,
29L, 0L, 3L, 5L, 8L, 11L, 13L, 16L, 19L, 22L, 24L, 0L, 12L, 20L,
26L, 32L, 38L, 44L, 51L, 57L, 59L, 0L, 5L, 7L, 10L, 13L, 16L,
19L, 22L, 25L, 28L, 0L, 2L, 5L, 8L, 11L, 14L, 17L, 20L, 23L,
26L, 0L, 4L, 6L, 9L, 12L, 15L, 18L, 21L, 24L, 27L, 0L, 4L, 6L,
8L, 10L, 12L, 14L, 16L, 18L, 21L, 0L, 1L, 4L, 7L, 10L, 13L, 16L,
19L, 23L, 26L, 0L, 2L, 5L, 7L, 10L, 12L, 15L, 17L, 20L, 22L,
0L, 10L, 17L, 24L, 31L, 38L, 44L, 51L, 57L, 59L), max_score = c(6L,
9L, 12L, 14L, 17L, 20L, 23L, 26L, 28L, 30L, 2L, 5L, 8L, 11L,
15L, 18L, 21L, 24L, 27L, 30L, 4L, 7L, 10L, 13L, 16L, 19L, 22L,
25L, 28L, 30L, 3L, 6L, 8L, 10L, 12L, 15L, 17L, 19L, 22L, 30L,
2L, 5L, 9L, 12L, 15L, 19L, 22L, 25L, 28L, 30L, 2L, 4L, 7L, 10L,
12L, 15L, 18L, 21L, 23L, 30L, 11L, 19L, 25L, 31L, 37L, 43L, 50L,
56L, 58L, 60L, 4L, 6L, 9L, 12L, 15L, 18L, 21L, 24L, 27L, 30L,
1L, 4L, 7L, 10L, 13L, 16L, 19L, 22L, 25L, 30L, 3L, 5L, 8L, 11L,
14L, 17L, 20L, 23L, 26L, 30L, 3L, 5L, 7L, 9L, 11L, 13L, 15L,
17L, 20L, 30L, NA, 3L, 6L, 9L, 12L, 15L, 18L, 22L, 25L, 30L,
1L, 4L, 6L, 9L, 11L, 14L, 16L, 19L, 21L, 30L, 9L, 16L, 23L, 30L,
37L, 43L, 50L, 56L, 58L, 60L), sten = c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), mean = c(17.84,
17.84, 17.84, 17.84, 17.84, 17.84, 17.84, 17.84, 17.84, 17.84,
15.16, 15.16, 15.16, 15.16, 15.16, 15.16, 15.16, 15.16, 15.16,
15.16, 16.76, 16.76, 16.76, 16.76, 16.76, 16.76, 16.76, 16.76,
16.76, 16.76, 12.91, 12.91, 12.91, 12.91, 12.91, 12.91, 12.91,
12.91, 12.91, 12.91, 15.79, 15.79, 15.79, 15.79, 15.79, 15.79,
15.79, 15.79, 15.79, 15.79, 12.84, 12.84, 12.84, 12.84, 12.84,
12.84, 12.84, 12.84, 12.84, 12.84, 38.62, 38.62, 38.62, 38.62,
38.62, 38.62, 38.62, 38.62, 38.62, 38.62, 15.68, 15.68, 15.68,
15.68, 15.68, 15.68, 15.68, 15.68, 15.68, 15.68, 13.25, 13.25,
13.25, 13.25, 13.25, 13.25, 13.25, 13.25, 13.25, 13.25, 14.89,
14.89, 14.89, 14.89, 14.89, 14.89, 14.89, 14.89, 14.89, 14.89,
11.8, 11.8, 11.8, 11.8, 11.8, 11.8, 11.8, 11.8, 11.8, 11.8, 12.85,
12.85, 12.85, 12.85, 12.85, 12.85, 12.85, 12.85, 12.85, 12.85,
11.78, 11.78, 11.78, 11.78, 11.78, 11.78, 11.78, 11.78, 11.78,
11.78, 37.07, 37.07, 37.07, 37.07, 37.07, 37.07, 37.07, 37.07,
37.07, 37.07), sd = c(5.75, 5.75, 5.75, 5.75, 5.75, 5.75, 5.75,
5.75, 5.75, 5.75, 6.34, 6.34, 6.34, 6.34, 6.34, 6.34, 6.34, 6.34,
6.34, 6.34, 6.05, 6.05, 6.05, 6.05, 6.05, 6.05, 6.05, 6.05, 6.05,
6.05, 4.56, 4.56, 4.56, 4.56, 4.56, 4.56, 4.56, 4.56, 4.56, 4.56,
6.66, 6.66, 6.66, 6.66, 6.66, 6.66, 6.66, 6.66, 6.66, 6.66, 5.33,
5.33, 5.33, 5.33, 5.33, 5.33, 5.33, 5.33, 5.33, 5.33, 12.76,
12.76, 12.76, 12.76, 12.76, 12.76, 12.76, 12.76, 12.76, 12.76,
5.83, 5.83, 5.83, 5.83, 5.83, 5.83, 5.83, 5.83, 5.83, 5.83, 6.05,
6.05, 6.05, 6.05, 6.05, 6.05, 6.05, 6.05, 6.05, 6.05, 5.94, 5.94,
5.94, 5.94, 5.94, 5.94, 5.94, 5.94, 5.94, 5.94, 4.11, 4.11, 4.11,
4.11, 4.11, 4.11, 4.11, 4.11, 4.11, 4.11, 6.12, 6.12, 6.12, 6.12,
6.12, 6.12, 6.12, 6.12, 6.12, 6.12, 4.97, 4.97, 4.97, 4.97, 4.97,
4.97, 4.97, 4.97, 4.97, 4.97, 13.66, 13.66, 13.66, 13.66, 13.66,
13.66, 13.66, 13.66, 13.66, 13.66), grade = c(10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9)), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -140L))
Is there an efficient way to assign sten_scores
given this context?
dplyr
supports overlap joins, e.g. between(x, y_lower, y_upper)
, that can be combined with other join specs:
# "one-to-one" to throw an error if there are other relationships
left_join(
df1, df2,
by = join_by(domain, grade, between(score, min_score, max_score)),
relationship = "one-to-one"
)
#> # A tibble: 10 × 10
#> response_id subscale score grade domain min_score max_score sten mean sd
#> <chr> <chr> <dbl> <dbl> <chr> <int> <int> <int> <dbl> <dbl>
#> 1 R_43vyKDjz… Abstrac… 20 10 AR 19 21 7 15.2 6.34
#> 2 R_43vyKDjz… Languag… 22 10 LA 21 23 7 17.8 5.75
#> 3 R_43vyKDjz… Mechani… 12 10 MR 11 12 5 12.9 4.56
#> 4 R_43vyKDjz… Numeric… 21 10 NA 20 22 7 15.8 6.66
#> 5 R_43vyKDjz… Percept… 60 10 PA 59 60 10 38.6 12.8
#> 6 R_43vyKDjz… Spatial… 21 10 SA 19 21 8 12.8 5.33
#> 7 R_43vyKDjz… Verbal … 23 10 VR 23 25 8 16.8 6.05
#> 8 R_45Ut4Vj2… Abstrac… 24 10 AR 22 24 8 15.2 6.34
#> 9 R_45Ut4Vj2… Languag… 25 10 LA 24 26 8 17.8 5.75
#> 10 R_45Ut4Vj2… Mechani… 18 10 MR 18 19 8 12.9 4.56