Currently struggling with a problem finding the country where billionaires are the oldest and youngest on average in a given country (data set shown below).
Moreover, I need to remove countries that have less than five observations.
I have code below that shows a table of age, worth in billions, and country code. I have countries sorted, but I am unsure of the best strategy to find which country has the youngest/oldest billionaires.
I have one line struck out where I am trying to spread by country name, but I think that will probably be messy.
Small sample here:
# A tibble: 2,614 x 22
age category citizenship company.name company.type `country code` founded
<int> <chr> <chr> <chr> <chr> <chr> <int>
1 NA Financi… Saudi Arab… Rolaco Trad… new SAU 1968
2 34 Financi… United Sta… Fidelity In… new USA 1946
3 59 Non-Tra… Brazil Companhia B… new BRA 1948
4 61 New Sec… Germany Ratiopharm new DEU 1881
5 NA Financi… Hong Kong Swire new HKG 1816
6 NA Traded … Bahrain YBA Kanoo new BHR 1890
7 NA New Sec… Japan Otsuka Hold… new JPN 1921
8 NA Traded … Japan Sony new JPN 1946
9 66 Financi… Japan Mori Buildi… new JPN 1959
10 NA Traded … France Chanel new FRA 1909
# … with 2,604 more rows, and 15 more variables: `from emerging` <chr>,
# gdp <dbl>, gender <chr>, industry <chr>, inherited <chr>, name <chr>,
# rank <int>, region <chr>, relationship <chr>, sector <chr>, `was
# founder` <chr>, `was political` <chr>, wealth.type <chr>, `worth in
# billions` <dbl>, year <int>
dput(head(bil))
structure(list(age = c(NA, 34L, 59L, 61L, NA, NA), category = c("Financial",
"Financial", "Non-Traded Sectors", "New Sectors", "Financial",
"Traded Sectors"), citizenship = c("Saudi Arabia", "United States",
"Brazil", "Germany", "Hong Kong", "Bahrain"), company.name = c("Rolaco Trading and Contracting Company",
"Fidelity Investments", "Companhia Brasileira de Distribui?ao",
"Ratiopharm", "Swire", "YBA Kanoo"), company.type = c("new",
"new", "new", "new", "new", "new"), country_code = c("SAU", "USA",
"BRA", "DEU", "HKG", "BHR"), founded = c(1968L, 1946L, 1948L,
1881L, 1816L, 1890L), `from emerging` = c("True", "True", "True",
"True", "True", "True"), gdp = c(1.58e+11, 8.1e+12, 8.54e+11,
2.5e+12, 1.6e+11, 6.1e+09), gender = c("male", "female", "male",
"male", "male", "male"), industry = c("Money Management", "Money Management",
"Retail, Restaurant", "Technology-Medical", "Money Management",
"Consumer"), inherited = c("True", "True", "True", "True", "True",
"True"), name = c("Abdul Aziz Al-Sulaiman", "Abigail Johnson",
"Abilio dos Santos Diniz", "Adolf Merckle", "Adrian and John Swire",
"Ahmed Ali Kanoo"), rank = c(404L, 145L, 322L, 388L, 162L, 383L
), region = c("Middle East/North Africa", "North America", "Latin America",
"Europe", "East Asia", "Middle East/North Africa"), relationship = c("founder",
"relation", "relation", "relation", "relation", "relation"),
sector = c("construction", "investment banking", "retail",
"pharmaceuticals", "trading company", "shipping"), `was founder` = c("True",
"True", "True", "True", "True", "True"), `was political` = c("False",
"False", "False", "False", "False", "True"), wealth.type = c("self-made finance",
"inherited", "inherited", "inherited", "inherited", "inherited"
), worth_billions = c(1, 2.5, 1.2, 1, 2.2, 1), year = c(1996L,
1996L, 1996L, 1996L, 1996L, 1996L)), row.names = c(NA, -6L
), spec = structure(list(cols = list(age = structure(list(), class = c("collector_integer",
"collector")), category = structure(list(), class = c("collector_character",
"collector")), citizenship = structure(list(), class = c("collector_character",
"collector")), company.name = structure(list(), class = c("collector_character",
"collector")), company.type = structure(list(), class = c("collector_character",
"collector")), `country code` = structure(list(), class = c("collector_character",
"collector")), founded = structure(list(), class = c("collector_integer",
"collector")), `from emerging` = structure(list(), class = c("collector_character",
"collector")), gdp = structure(list(), class = c("collector_double",
"collector")), gender = structure(list(), class = c("collector_character",
"collector")), industry = structure(list(), class = c("collector_character",
"collector")), inherited = structure(list(), class = c("collector_character",
"collector")), name = structure(list(), class = c("collector_character",
"collector")), rank = structure(list(), class = c("collector_integer",
"collector")), region = structure(list(), class = c("collector_character",
"collector")), relationship = structure(list(), class = c("collector_character",
"collector")), sector = structure(list(), class = c("collector_character",
"collector")), `was founder` = structure(list(), class = c("collector_character",
"collector")), `was political` = structure(list(), class = c("collector_character",
"collector")), wealth.type = structure(list(), class = c("collector_character",
"collector")), `worth in billions` = structure(list(), class = c("collector_double",
"collector")), year = structure(list(), class = c("collector_integer",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector"))), class = "col_spec"), class = c("tbl_df", "tbl",
"data.frame"))
load("bil.RData")
print(bil)
# Renaming a few columns for spacing issues
colnames(bil)[21] <- "worth_billions"
colnames(bil)[6] <- "country_code"
# Finding where billionaires are oldest/youngest on average,
# ... then removing less than five observations
bil %>%
filter(!is.na(age)) %>%
select(age, worth_billions, country_code) %>%
group_by(age, worth_billions, country_code) %>%
mutate(count = n()) %>%
arrange(country_code) %>%
#spread(key = country_code, value = "USA") %>%
print()
I expect to find the country that has the oldest billionaires and youngest billionaires, excluding countries with fewer than five observations. Any help is appreciated!
After removing the NA elements in 'age' (filter
), grouped by 'country_code' and filter
out the groups having less than 5 billionaires, then summarise
the mean
of 'age' and slice
the row having the max
imum value for 'ageMean'
library(dplyr)
bil %>%
filter(!is.na(age)) %>%
group_by(country_code) %>%
filter(sum(worth_billions) > 1.0) > 5) %>%
summarise(ageMean = mean(age)) %>%
slice(which.max(ageMean))