My goal is to use some sort of automated process where I can input a team name for team across all occurances in a chunk of code. I have been trying tom ake a function to do so. I have provided a subset of manipulations I would like to make, the manual coding that would do the same action, and a function with "team" in place of the example team name "falcons"
Tidied, manual version:
library(dplyr)
falconscap2022 <- falconscap2022 |>
mutate(cash = sum(average),
nump = n(),
.by = posg) |>
mutate(cap_pct = percent(cash/sum(cash)))
falconspicks2022 <- mutate(falconspicks2022, top100 = as.numeric(pick) <= 100, .keep = "unused")
falcons2022stuff <- falconscap2022 %>%
left_join(falconspicks2022, by = "posg")
Data:
falconscap2022 <- structure(list(average = c(18333333, 16823333, 9375000, 8227624,
5500000, 5383617, 5250000, 4850000, 3681822, 3576437), posg = c("OL",
"Front7", "QB", "TE", "DB", "WR", "RB", "ST", "OL", "DB")), row.names = c(NA,
-10L), class = "data.frame")
falconspicks2022 <- structure(list(pick = c("10", "109", "200", "94", "109"), posg = c("OL",
"Front7", "DB", "WR", "DB")), row.names = c(NA, -5L), class = "data.frame")
Untidied, function version:
datacleanup <- function(team){
##each team will have their own precleaned data assigned, example of falcons would be falconscap2022
teamcap2022$average <- gsub(",", "", teamcap2022$average)
teamcap2022$average <- gsub("\\$", "", teamcap2022$average)
teamcap2022$average <- as.numeric(teamcap2022$average)
teamcap2022 <- teamcap2022 %>%
group_by(posg) %>%
mutate(cash = sum(average),
nump = n())
teamcap2022 <- teamcap2022 %>%
ungroup() %>%
mutate(cap_pct = percent(cash/sum(cash)))
##each team will also have their own precleaned picks df, example is falconspicks2022
teampicks2022 <- teampicks2022 %>%
mutate(
top100 = case_when(
pick < 101 ~ 1,
TRUE ~ 0
),
latepicks = case_when(
pick > 100 ~ 1,
TRUE ~ 0
)
) %>% select(c("top100", "latepicks", "posg"))
team2022stuff <- teamcap2022 %>%
left_join(teampicks2022, by = "posg")
}
I would like to be able to do something like datacleanup(falcons) to get the same output as the manual code
datacleanup2 <- function(df, df2) {
df |>
mutate(average = stringr::str_remove_all(average, ",|\\$") |>
as.numeric()) |>
mutate(cash = sum(average),
nump = n(), .by= posg) |>
mutate(cap_pct = scales::percent(cash/sum(cash))) |>
left_join(df2 |>
mutate(top100 = as.numeric(pick < 101),
latepicks = as.numeric(pick > 100)) |>
select(posg, top100, latepicks))
}
datacleanup2(falconscap2022, falconspicks2022)
Result
Joining with `by = join_by(posg)`
average posg cash nump cap_pct top100 latepicks
1 18333333 OL 22015155 2 19.64% 1 0
2 16823333 Front7 16823333 1 15.01% 0 1
3 9375000 QB 9375000 1 8.36% NA NA
4 8227624 TE 8227624 1 7.34% NA NA
5 5500000 DB 9076437 2 8.10% 0 1
6 5500000 DB 9076437 2 8.10% 0 1
7 5383617 WR 5383617 1 4.80% 0 1
8 5250000 RB 5250000 1 4.68% NA NA
9 4850000 ST 4850000 1 4.33% NA NA
10 3681822 OL 22015155 2 19.64% 1 0
11 3576437 DB 9076437 2 8.10% 0 1
12 3576437 DB 9076437 2 8.10% 0 1