I've got this DF:
structure(list(Date = structure(c(18605, 18604, 18598, 18597,
18590, 18584, 18583, 18578, 18570, 18569, 18563, 18562, 18557,
18549, 18548, 18542, 18541, 18536, 18534, 18529, 18521, 18520,
18515, 18508, 18500, 18499, 18493, 18492, 18486, 18485, 18479,
18478, 18472, 18471, 18465, 18464, 18458, 18457, 18450, 18445,
18444, 18437, 18436, 18430, 18429, 18424, 18416, 18415, 18410,
18409, 18403, 18402, 18396, 18388, 18387, 18381, 18380, 18374,
18373, 18368, 18367, 18360, 18359, 18354, 18340, 18338, 18331,
18325, 18317, 18312, 18289, 18282, 18275, 18268), class = "Date"),
V1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0.3, 0, 0, 0, 0, 0.4, 0, 0, 0, 0, 0.2, 0, 0, 0, 0, 0.7, 0,
0, 0, 0, 0, 0.5, 0, 0, 0, 0, 0.3, 0, 0, 0, 0, 0, 0.4, 0,
0, 0, 0.3, 0, 0, 0, 0, 0, 0, 0, 0, 0.6, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0), V2 = c(0, 0, 0.1, 0, 0, 0.1, 0, 0.2, 0, 0.2,
0.1, 0, 0.2, 0.2, 0, 0.1, 0, 0, 0.1, 0, 0.2, 0, 0, 0.4, 0.2,
0, 0.3, 0, 0.2, 0, 0.3, 0, 0.6, 0, 0.4, 0, 0, 0.2, 0, 0.4,
0.6, 0, 0.3, 0, 0.2, 0.7, 0, 0.1, 0.3, 0, 0.2, 0, 0, 0, 0.3,
0, 0.1, 0.3, 0, 0, 0.3, 0.2, 0, 0, 0, 0, 0.6, 0, 0.4, 0,
0.2, 0, 0, 0.2), V3 = c(0, 0.3, 0, 0.3, 0.4, 0, 0.2, 0, 0.3,
0, 0, 0.2, 0, 0, 0.2, 0, 0.2, 0, 0, 0.1, 0, 0.2, 0, 0, 0,
0.3, 0, 0, 0, 0.4, 0, 0.3, 0, 0.7, 0, 0.2, 0.5, 0.4, 0, 0.4,
0, 0.8, 0.4, 0, 0.2, 0.6, 0.3, 0.2, 0, 0, 0, 0.4, 0.4, 0,
0.2, 0.3, 0, 0.2, 0.3, 0.4, 0, 0.7, 0, 0, 1.4, 0, 0, 1.4,
0, 1, 0, 0, 0.3, 0), V4 = c(0, 0.4, 0, 0.1, 0.1, 0, 0.1,
0, 0, 0.1, 0, 0.1, 0.2, 0, 0.2, 0, 0.2, 0.3, 0, 0, 0, 0.2,
0.3, 0.3, 0, 0, 0, 0.5, 0, 0.6, 0, 0.7, 0, 0, 0, 1.2, 1,
0, 0.3, 0, 1.1, 0, 0, 0.4, 0, 0, 0, 0, 0.2, 0.2, 0, 0, 0.2,
0, 0, 0.1, 0, 0, 0, 0.2, 0.3, 0, 0.2, 0.3, 0, 1.8, 0, 0,
0, 0, 0, 0.2, 0, 0)), row.names = c(NA, -74L), class = c("tbl_df",
"tbl", "data.frame"))
I'd like to mutate column V1, V2, V3 and V4 to instead of showing the current values posted here, I'd instead like to show their differences from the mean average in their respective columns. So mean of V4 = 0.1635135, so the fourth value should be = 0.4-0.1635135 = 0.2364865.
I've tried doing it piecemeal (doing each column individually), by doing the following, but I keep getting computing errors:
df <- df %>% mutate(across(2, x - mean())
How can I finish this?
Solution 1: Use a purrr
-style function in across()
df %>%
mutate(across(V1:V4, ~ .x - mean(.x)))
# # A tibble: 74 × 5
# Date V1 V2 V3 V4
# <date> <dbl> <dbl> <dbl> <dbl>
# 1 2020-12-09 -0.05 -0.128 -0.204 -0.164
# 2 2020-12-08 -0.05 -0.128 0.0959 0.236
# 3 2020-12-02 -0.05 -0.0284 -0.204 -0.164
# 4 2020-12-01 -0.05 -0.128 0.0959 -0.0635
# 5 2020-11-24 -0.05 -0.128 0.196 -0.0635
# ...
Solution 2: Select variables with across()
and pass it to scale(x, scale = FALSE)
df %>%
mutate(as_tibble(scale(across(V1:V4), scale = FALSE)))
# # A tibble: 74 × 5
# Date V1 V2 V3 V4
# <date> <dbl> <dbl> <dbl> <dbl>
# 1 2020-12-09 -0.05 -0.128 -0.204 -0.164
# 2 2020-12-08 -0.05 -0.128 0.0959 0.236
# 3 2020-12-02 -0.05 -0.0284 -0.204 -0.164
# 4 2020-12-01 -0.05 -0.128 0.0959 -0.0635
# 5 2020-11-24 -0.05 -0.128 0.196 -0.0635
# ...