I have a temperature and times database from two weather stations that looks like this:
# A tibble: 6 × 7
Station Date Time Temperature Tmin Tmed Tmax
<chr> <date> <time> <dbl> <dbl> <dbl> <dbl>
1 F 2021-10-15 00:11:46 16.8 15.2 17.1 20.4
2 F 2021-10-15 00:41:46 16.5 15.2 17.1 20.4
3 F 2021-10-15 01:11:46 16.2 15.2 17.1 20.4
4 F 2021-10-15 01:41:46 15.6 15.2 17.1 20.4
5 F 2021-10-15 02:11:46 15.9 15.2 17.1 20.4
6 F 2021-10-15 02:41:46 16.1 15.2 17.1 20.4
here is a reproducible example of the first two days (sorry - I know it's a mess) obtained via dput()
:
structure(list(Station = c("F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F"), Date = structure(c(18915,
18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915,
18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915,
18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915,
18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915,
18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915, 18915,
18915, 18915, 18916, 18916, 18916, 18916, 18916, 18916, 18916,
18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916,
18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916,
18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916,
18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916, 18916,
18916, 18916, 18916, 18916, 18916), class = "Date"), Time = structure(c(706,
2506, 4306, 6106, 7906, 9706, 11506, 13306, 15106, 16906, 18706,
20506, 22306, 24106, 25906, 27706, 29506, 31306, 33106, 34906,
36706, 38506, 40306, 42106, 43906, 45706, 47506, 49306, 51106,
52906, 54706, 56506, 58306, 60106, 61906, 63706, 65506, 67306,
69106, 70906, 72706, 74506, 76306, 78106, 79906, 81706, 83506,
85306, 706, 2506, 4306, 6106, 7906, 9706, 11506, 13306, 15106,
16906, 18706, 20506, 22306, 24106, 25906, 27706, 29506, 31306,
33106, 34906, 36706, 38506, 40306, 42106, 43906, 45706, 47506,
49306, 51106, 52906, 54706, 56506, 58306, 60106, 61906, 63706,
65506, 67306, 69106, 70906, 72706, 74506, 76306, 78106, 79906,
81706, 83506, 85306), class = c("hms", "difftime"), units = "secs"),
Temperature = c(16.8, 16.5, 16.2, 15.6, 15.9, 16.1, 16.4,
16.2, 16, 16, 16.2, 16.2, 15.9, 16, 16, 16.4, 16.2, 16.5,
16.1, 16.4, 16.8, 16.6, 18.6, 16.9, 18.6, 19.5, 18.5, 18.5,
20.4, 19.1, 19.8, 19.7, 18.1, 17.4, 17.4, 16.9, 15.8, 16.8,
16.9, 16.8, 17, 15.2, 16.2, 17.4, 18.1, 18.3, 18, 17.9, 17.6,
17.9, 17.7, 17.7, 17.7, 17.8, 18.1, 18.3, 18.1, 16.2, 18,
18.8, 18.6, 19.1, 18.9, 17.9, 16.2, 17.3, 19.3, 20.2, 20.7,
20.9, 22.2, 22.3, 21.2, 21.1, 20.1, 23.3, 21.4, 20.2, 19.8,
18.9, 19.8, 20.1, 20.4, 19.5, 18.8, 18, 17.9, 17.9, 17.8,
18, 17.9, 16.5, 16.8, 16.5, 16.7, 16.7), Tmin = c(15.2, 15.2,
15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2,
15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2,
15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2,
15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 15.2,
15.2, 15.2, 15.2, 15.2, 15.2, 15.2, 16.2, 16.2, 16.2, 16.2,
16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2,
16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2,
16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2,
16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2, 16.2,
16.2, 16.2, 16.2, 16.2), Tmed = c(17.1, 17.1, 17.1, 17.1,
17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1,
17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1,
17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1,
17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1, 17.1,
17.1, 17.1, 17.1, 17.1, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333, 18.8083333333333, 18.8083333333333,
18.8083333333333, 18.8083333333333), Tmax = c(20.4, 20.4,
20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4,
20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4,
20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4,
20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 20.4,
20.4, 20.4, 20.4, 20.4, 20.4, 20.4, 23.3, 23.3, 23.3, 23.3,
23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3,
23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3,
23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3,
23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3, 23.3,
23.3, 23.3, 23.3, 23.3)), row.names = c(NA, -96L), class = c("tbl_df",
"tbl", "data.frame"))
and I would like to add a column to tell me if temperature at given time is close to the daily minimum temperature.
The best way to do this seemed dplyr::between
function, and I tried to write it like this:
TimeTempReprod %>%
group_by(Date, Station) %>%
mutate(y = between(Temperature, Tmin, Tmin + 2))
What I get in the console when I run this code is:
Error in `mutate()`:
! Problem while computing `y = dplyr::between(Temperature, Tmin, Tmin + 2)`.
ℹ The error occurred in group 1: Date = 2021-10-15, Station = "F".
Caused by error in `dplyr::between()`:
! `left` must be length 1
I tried to look for answers to this problem but I couldn't find it related to the between
function in other places...
I hope this question is understandable, and I am sorry if it has problems. It is the first question I post to stackexchange after learning from it for two years now, so I still have to learn how to use it properly. Thanks to who will find the time to help me!
You need to capture one value, and Tmin
is capturing the entire vector of values for each group, so to solve the problem you can use a function that takes out one value out of the vector. Since the vector is made of the same values, many functions can work, e.g. min
, or first
:
TimeTempReprod %>%
group_by(Date, Station) %>%
mutate(y = between(Temperature, min(Tmin), min(Tmin) + 2))
gives out:
# A tibble: 96 × 8
# Groups: Date, Station [2]
Station Date Time Temperature Tmin Tmed Tmax y
<chr> <date> <time> <dbl> <dbl> <dbl> <dbl> <lgl>
1 F 2021-10-15 00:11:46 16.8 15.2 17.1 20.4 TRUE
2 F 2021-10-15 00:41:46 16.5 15.2 17.1 20.4 TRUE
3 F 2021-10-15 01:11:46 16.2 15.2 17.1 20.4 TRUE
4 F 2021-10-15 01:41:46 15.6 15.2 17.1 20.4 TRUE
5 F 2021-10-15 02:11:46 15.9 15.2 17.1 20.4 TRUE
6 F 2021-10-15 02:41:46 16.1 15.2 17.1 20.4 TRUE
7 F 2021-10-15 03:11:46 16.4 15.2 17.1 20.4 TRUE
8 F 2021-10-15 03:41:46 16.2 15.2 17.1 20.4 TRUE
9 F 2021-10-15 04:11:46 16 15.2 17.1 20.4 TRUE
10 F 2021-10-15 04:41:46 16 15.2 17.1 20.4 TRUE
# … with 86 more rows