I have a weather dataset with observations collected at 15-minute intervals for several weeks. I would like to extract only the last hour
of weather data for each week
and disregard the rest.
In the week 15
for example, I only want to keep rows from the cell 147 to 150 (last four rows - each row represents 15 minutes: 4*15 = 1 hour) and ignore the rest of the weather data.
Reproducible example
df <- structure(
list(
week = structure(
c(
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
1L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
2L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
3L,
4L,
4L,
4L,
4L,
4L,
4L,
4L,
4L,
4L,
4L,
5L,
5L,
5L,
5L,
5L,
5L,
5L,
5L,
5L,
5L,
6L,
6L,
6L,
6L,
6L,
6L,
6L,
6L,
6L,
6L,
7L,
7L,
7L,
7L,
7L,
7L,
7L,
7L,
7L,
7L,
8L,
8L,
8L,
8L,
8L,
8L,
8L,
8L,
8L,
8L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
9L,
10L,
10L,
10L,
10L,
10L,
10L,
10L,
10L,
10L,
10L,
11L,
11L,
11L,
11L,
11L,
11L,
11L,
11L,
11L,
11L,
12L,
12L,
12L,
12L,
12L,
12L,
12L,
12L,
12L,
12L,
13L,
13L,
13L,
13L,
13L,
13L,
13L,
13L,
13L,
13L,
14L,
14L,
14L,
14L,
14L,
14L,
14L,
14L,
14L,
14L,
15L,
15L,
15L,
15L,
15L,
15L,
15L,
15L,
15L,
15L,
16L,
16L,
16L,
16L,
16L,
16L,
16L,
16L,
16L,
16L,
17L,
17L,
17L,
17L,
17L,
17L,
17L,
17L,
17L,
17L,
18L,
18L,
18L,
18L,
18L,
18L,
18L,
18L,
18L,
18L,
19L,
19L,
19L,
19L,
19L,
19L,
19L,
19L,
19L,
19L,
20L,
20L,
20L,
20L,
20L,
20L,
20L,
20L,
20L,
20L,
21L,
21L,
21L,
21L,
21L,
21L,
21L,
21L,
21L,
21L,
22L,
22L,
22L,
22L,
22L,
22L,
22L,
22L,
22L,
22L,
23L,
23L,
23L,
23L,
23L,
23L,
23L,
23L,
23L,
23L,
24L,
24L,
24L,
24L,
24L,
24L,
24L,
24L,
24L,
24L,
25L,
25L,
25L,
25L,
25L,
25L,
25L,
25L,
25L,
25L,
26L,
26L,
26L,
26L,
26L,
26L,
26L,
26L,
26L,
26L
),
.Label = c(
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"10",
"11",
"12",
"13",
"14",
"15",
"16",
"17",
"18",
"19",
"20",
"21",
"22",
"23",
"24",
"25",
"26"
),
class = "factor"
),
precipitation = c(
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1.59999999999997,
0.800000000000011,
0.600000000000023,
0.199999999999989,
0.399999999999977,
0.400000000000034,
1,
0.799999999999955,
0.400000000000034,
0.800000000000011,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
9.60000000000002,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0.200000000000045,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0.200000002980232,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
)
),
class = c("tbl_df",
"tbl", "data.frame"),
row.names = c(NA,-260L)
)
Since you don't have a "time" variable, you're asking for the "last 4 rows for each week", in which case:
library(dplyr)
slice_tail(df, n=4, by=week)
# # A tibble: 104 × 2
# week precipitation
# <fct> <dbl>
# 1 1 0
# 2 1 0
# 3 1 0
# 4 1 0
# 5 2 0
# 6 2 0
# 7 2 0
# 8 2 0
# 9 3 0
# 10 3 0
# # ℹ 94 more rows
# # ℹ Use `print(n = ...)` to see more rows