I have a data frame formatted like so:
GameId | Eval |
---|---|
1 | 1.00 |
1 | 1.50 |
1 | 0.50 |
2 | -1.00 |
2 | 2.00 |
2 | 1.00 |
3 | 1.50 |
3 | -0.50 |
I would like a new column which accounts for differences between the current row and the previous, but resets at the start of the next GameId so that it looks like so:
GameId | Eval | Diff |
---|---|---|
1 | 1.00 | NA |
1 | 1.50 | 0.50 |
1 | 0.50 | -1.00 |
2 | -1.00 | NA |
2 | 2.00 | 3.00 |
2 | 1.00 | -2.00 |
3 | 1.50 | NA |
3 | -0.50 | -2.00 |
In base R you might do :
data <- data.frame(
GameId = c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L),
Eval = c(1, 1.5, 0.5, -1, 2, 1, 1.5, -0.5)
)
data$Diff <- with(data, ave(Eval, GameId, FUN = function(x) c(NA, diff(x))))
data
#> GameId Eval Diff
#> 1 1 1.0 NA
#> 2 1 1.5 0.5
#> 3 1 0.5 -1.0
#> 4 2 -1.0 NA
#> 5 2 2.0 3.0
#> 6 2 1.0 -1.0
#> 7 3 1.5 NA
#> 8 3 -0.5 -2.0
Or using {dplyr}
library(dplyr)
data %>%
group_by(GameId) %>%
mutate(Diff = c(NA, diff(Eval))) %>%
ungroup()
#> # A tibble: 8 × 3
#> GameId Eval Diff
#> <int> <dbl> <dbl>
#> 1 1 1 NA
#> 2 1 1.5 0.5
#> 3 1 0.5 -1
#> 4 2 -1 NA
#> 5 2 2 3
#> 6 2 1 -1
#> 7 3 1.5 NA
#> 8 3 -0.5 -2