library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 3.6.3
#> Warning: package 'ggplot2' was built under R version 3.6.3
#> Warning: package 'tidyr' was built under R version 3.6.3
#> Warning: package 'purrr' was built under R version 3.6.3
#> Warning: package 'dplyr' was built under R version 3.6.3
#> Warning: package 'stringr' was built under R version 3.6.3
#> Warning: package 'forcats' was built under R version 3.6.3
library(gapminder)
#> Warning: package 'gapminder' was built under R version 3.6.2
gap <- gapminder %>%
filter(continent == "Asia") %>%
mutate(yr1952 = year - 1952)
gap_nested <- gap %>%
group_by(country) %>%
nest()
gap_nested
#> # A tibble: 33 x 2
#> # Groups: country [33]
#> country data
#> <fct> <list>
#> 1 Afghanistan <tibble [12 x 6]>
#> 2 Bahrain <tibble [12 x 6]>
#> 3 Bangladesh <tibble [12 x 6]>
#> 4 Cambodia <tibble [12 x 6]>
#> 5 China <tibble [12 x 6]>
#> 6 Hong Kong, China <tibble [12 x 6]>
#> 7 India <tibble [12 x 6]>
#> 8 Indonesia <tibble [12 x 6]>
#> 9 Iran <tibble [12 x 6]>
#> 10 Iraq <tibble [12 x 6]>
#> # ... with 23 more rows
gap_nested$data[[1]]
#> # A tibble: 12 x 6
#> continent year lifeExp pop gdpPercap yr1952
#> <fct> <int> <dbl> <int> <dbl> <dbl>
#> 1 Asia 1952 28.8 8425333 779. 0
#> 2 Asia 1957 30.3 9240934 821. 5
#> 3 Asia 1962 32.0 10267083 853. 10
#> 4 Asia 1967 34.0 11537966 836. 15
#> 5 Asia 1972 36.1 13079460 740. 20
#> 6 Asia 1977 38.4 14880372 786. 25
#> 7 Asia 1982 39.9 12881816 978. 30
#> 8 Asia 1987 40.8 13867957 852. 35
#> 9 Asia 1992 41.7 16317921 649. 40
#> 10 Asia 1997 41.8 22227415 635. 45
#> 11 Asia 2002 42.1 25268405 727. 50
#> 12 Asia 2007 43.8 31889923 975. 55
gap_fitted <- gap_nested %>%
mutate(fit = map(data, ~ lm(lifeExp ~ yr1952, data = .x)))
gap_fitted
#> # A tibble: 33 x 3
#> # Groups: country [33]
#> country data fit
#> <fct> <list> <list>
#> 1 Afghanistan <tibble [12 x 6]> <lm>
#> 2 Bahrain <tibble [12 x 6]> <lm>
#> 3 Bangladesh <tibble [12 x 6]> <lm>
#> 4 Cambodia <tibble [12 x 6]> <lm>
#> 5 China <tibble [12 x 6]> <lm>
#> 6 Hong Kong, China <tibble [12 x 6]> <lm>
#> 7 India <tibble [12 x 6]> <lm>
#> 8 Indonesia <tibble [12 x 6]> <lm>
#> 9 Iran <tibble [12 x 6]> <lm>
#> 10 Iraq <tibble [12 x 6]> <lm>
#> # ... with 23 more rows
gap_fitted$fit[[1]]
#>
#> Call:
#> lm(formula = lifeExp ~ yr1952, data = .x)
#>
#> Coefficients:
#> (Intercept) yr1952
#> 29.9073 0.2753
gap_fitted <- gap_fitted %>%
mutate(
intercept = map_dbl(fit, ~ coef(.x)[["(Intercept)"]]),
slope = map_dbl(fit, ~ coef(.x)[["yr1952"]])
)
gap_fitted
#> # A tibble: 33 x 5
#> # Groups: country [33]
#> country data fit intercept slope
#> <fct> <list> <list> <dbl> <dbl>
#> 1 Afghanistan <tibble [12 x 6]> <lm> 29.9 0.275
#> 2 Bahrain <tibble [12 x 6]> <lm> 52.7 0.468
#> 3 Bangladesh <tibble [12 x 6]> <lm> 36.1 0.498
#> 4 Cambodia <tibble [12 x 6]> <lm> 37.0 0.396
#> 5 China <tibble [12 x 6]> <lm> 47.2 0.531
#> 6 Hong Kong, China <tibble [12 x 6]> <lm> 63.4 0.366
#> 7 India <tibble [12 x 6]> <lm> 39.3 0.505
#> 8 Indonesia <tibble [12 x 6]> <lm> 36.9 0.635
#> 9 Iran <tibble [12 x 6]> <lm> 45.0 0.497
#> 10 Iraq <tibble [12 x 6]> <lm> 50.1 0.235
#> # ... with 23 more rows
Created on 2020-07-29 by the reprex package (v0.3.0)
Can anyone explain to me the actual text that goes in the place of shortcuts used in the above code like ~, .x. Sometimes simple "." will be used. Is there package which substitutes actual names in place of those shortcuts. Is the pronoun .x indicates the data argument in the map function?
The symbol ~ indicates you're dealing with a formula.
In the rlang package (and by extention in the tidyverse packages) a formula can be interpreted as a anonymous function.
Therefore if you write:
map(data, ~ lm(lifeExp ~ yr1952, data = .x))
It's like writing:
map(data, function(x) lm(lifeExp ~ yr1952, data = x))
The first method is really useful and more readable than a anonymous function.
When you deal with more than one input you can also use .y
or even ..1
, ..2
, etc.
The function that allows the magic is purrr::as_mapper
that eventually calls rlang::as_function
.
Does this answer your question?