There is dataframe raw_df
as below:
library(tidyverse)
detail <- data.frame(cat = c("a","a","a","b","b","b","b","c","c"),
single_amount = c(1,3,7,2,1,4,6,1,6))
total <- data.frame(cat = c("a","b","c"),
total_amount = c(20,10,9))
raw_df <- detail %>% left_join(total, by = 'cat')
I defined self function allocate_data_m
to allocate total_amount
when cumsum(single_data)
is less then total_amount
allocate_data_m <- function(data, single_data, total_data) {
out <- cumsum_single_data <- rep('NA', nrow(data))
for (grouprow in seq_len(nrow(data))) {
cumsum_single_data[grouprow] <-
if_else(grouprow == 1, data[grouprow, single_data],
cumsum_single_data[grouprow-1] + data[grouprow, single_data])
out[grouprow] <- if_else(cumsum_single_data[grouprow] < data[grouprow, total_data], data[grouprow, single_data], 0)
}
out
}
when run the allocate_data_m
in mutate
, the error occur. Anyone can help ? Thanks!
raw_df %>%
group_by(cat) %>%
mutate(amount_x_all = allocate_data_m(cur_data(), single_amount, total_amount))
The wished result as below , I am just wondering how the function allocate_data_m
can't work.
raw_df %>%
group_by(cat) %>%
mutate(amount_x_all = if_else(cumsum(single_amount) < total_amount, single_amount, 0))
A minimal revision to your function:
"NA"
with NA
. The former is literal character string, and the latter refers to the missing value in R.ifelse()
with if (...) {...} else {...}
. Otherwise, cumsum_single_data[grouprow-1]
will throws an error when grouprow
is 1.ifelse()
outside the for-loop, because it can be vectorizedly evaluated.allocate_data_m <- function(single_data, total_data) {
cumsum_single_data <- rep(NA, length(single_data))
for (grouprow in seq_len(length(single_data))) {
cumsum_single_data[grouprow] <- if(grouprow == 1) {
single_data[grouprow]
} else {
cumsum_single_data[grouprow-1] + single_data[grouprow]
}
}
out <- ifelse(cumsum_single_data < total_data, single_data, 0)
out
}
raw_df %>%
group_by(cat) %>%
mutate(amount_x_all = allocate_data_m(single_amount, total_amount)) %>%
ungroup()
# # A tibble: 9 × 4
# cat single_amount total_amount amount_x_all
# <chr> <dbl> <dbl> <dbl>
# 1 a 1 20 1
# 2 a 3 20 3
# 3 a 7 20 7
# 4 b 2 10 2
# 5 b 1 10 1
# 6 b 4 10 4
# 7 b 6 10 0
# 8 c 1 9 1
# 9 c 6 9 6
Use column names instead of column vectors. In this case, you must use column names with double quotes when you call allocate_data_m()
inside mutate()
.
allocate_data_m <- function(data, single_data, total_data) {
if(is(data, "tbl")) data <- as.data.frame(data)
cumsum_single_data <- rep(NA, nrow(data))
for (grouprow in seq_len(nrow(data))) {
cumsum_single_data[grouprow] <- if(grouprow == 1) {
data[grouprow, single_data]
} else {
cumsum_single_data[grouprow-1] + data[grouprow, single_data]
}
}
out <- ifelse(cumsum_single_data < data[, total_data], data[, single_data], 0)
out
}
raw_df %>%
group_by(cat) %>%
mutate(amount_x_all = allocate_data_m(cur_data(), "single_amount", "total_amount")) %>%
ungroup()