Sample frame with data
df.100 <- data.frame(
Rok = c( rep(2020, 12), rep(2021,12)),
Msc = c(rep(seq(1,12,1),2)),
WB_1 = c(rep(c(1,210,15),2), rep(c(11.3,18.9,210.1),2), rep(c(0.4,1.9,10.1),2), rep(c(25.4,13.9,31.1),2) ),
WB_2 = c(rep(c(110,10,15),2), rep(c(11.3,19.9,410.1),2), rep(c(2.4,19.9,15.1),2), rep(c(45.4,31.9,31.6),2) ),
WB_13 = c(rep(c(15.2,118,15.3),2), rep(c(0.4,1.9,10.1),2), rep(c(11.3,18.9,210.1),2), rep(c(45.4,19.9,37.1),2) )
)
When I needed to get a summary grouped by years, I used this solution:
df.100 <- df.100 %>%
group_by(Rok) %>%
summarise(
across(starts_with('WB'), ~ sum (.x, na.rm = T)),
)
And now I need to get a summary for individual years, but covering only 9 months (January to September)
df.100.9 <- df.100 %>%
group_by(Rok) %>%
summarise(
across(starts_with('WB'), ~ sum (.x, na.rm = T)),
# with condition
across(starts_with('WB'), ~ sum (.[Msc <= 9], na.rm = T), .names = "9_{col}"),
)
Unfortunately, the values for all 12 months are the same as for 9 months:
> df.100.9
# A tibble: 2 × 7
Rok WB_1 WB_2 WB_13 `9_WB_1` `9_WB_2` `9_WB_13`
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2020 933. 1153. 322. 933. 1153. 322.
2 2021 166. 293. 685. 166. 293. 685.
>
How do I correctly enter the months condition?
I have figured out it's due to the column names from the first sum you take, but I don't have an explanation for why that messes up the second sum yet. If you throw an "x", for example, at the front of the new names, those columns no longer interfere with the second across(starts_with('WB'), ~ sum(...
. See the addition of .names = "x{col}"
.
df.100 %>%
group_by(Rok) %>%
summarise(
across(starts_with('WB'), ~ sum (.x, na.rm = T), .names = "x{col}"),
# with condition
across(starts_with('WB'),
~ sum(case_when(Msc <= 9 ~ .x, .default = 0), na.rm = T), .names = "9_{col}")
)
# A tibble: 2 × 7
Rok xWB_1 xWB_2 xWB_13 `9_WB_1` `9_WB_2` `9_WB_13`
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2020 933. 1153. 322. 692. 711. 309.
2 2021 166. 293. 685. 95.2 184. 583