r dataframe dplyr data-manipulation data-munging

How to select only the last hour of weather data from each week in R?

I have a weather dataset with observations collected at 15-minute intervals for several weeks. I would like to extract only the last hour of weather data for each week and disregard the rest.

In the week 15for example, I only want to keep rows from the cell 147 to 150 (last four rows - each row represents 15 minutes: 4*15 = 1 hour) and ignore the rest of the weather data.

Reproducible example

df <- structure(
  list(
    week = structure(
      c(
        1L,
        1L,
        1L,
        1L,
        1L,
        1L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        2L,
        2L,
        2L,
        2L,
        2L,
        2L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        3L,
        3L,
        3L,
        3L,
        3L,
        3L,
        4L,
        4L,
        4L,
        4L,
        4L,
        4L,
        4L,
        4L,
        4L,
        4L,
        5L,
        5L,
        5L,
        5L,
        5L,
        5L,
        5L,
        5L,
        5L,
        5L,
        6L,
        6L,
        6L,
        6L,
        6L,
        6L,
        6L,
        6L,
        6L,
        6L,
        7L,
        7L,
        7L,
        7L,
        7L,
        7L,
        7L,
        7L,
        7L,
        7L,
        8L,
        8L,
        8L,
        8L,
        8L,
        8L,
        8L,
        8L,
        8L,
        8L,
        9L,
        9L,
        9L,
        9L,
        9L,
        9L,
        9L,
        9L,
        9L,
        9L,
        10L,
        10L,
        10L,
        10L,
        10L,
        10L,
        10L,
        10L,
        10L,
        10L,
        11L,
        11L,
        11L,
        11L,
        11L,
        11L,
        11L,
        11L,
        11L,
        11L,
        12L,
        12L,
        12L,
        12L,
        12L,
        12L,
        12L,
        12L,
        12L,
        12L,
        13L,
        13L,
        13L,
        13L,
        13L,
        13L,
        13L,
        13L,
        13L,
        13L,
        14L,
        14L,
        14L,
        14L,
        14L,
        14L,
        14L,
        14L,
        14L,
        14L,
        15L,
        15L,
        15L,
        15L,
        15L,
        15L,
        15L,
        15L,
        15L,
        15L,
        16L,
        16L,
        16L,
        16L,
        16L,
        16L,
        16L,
        16L,
        16L,
        16L,
        17L,
        17L,
        17L,
        17L,
        17L,
        17L,
        17L,
        17L,
        17L,
        17L,
        18L,
        18L,
        18L,
        18L,
        18L,
        18L,
        18L,
        18L,
        18L,
        18L,
        19L,
        19L,
        19L,
        19L,
        19L,
        19L,
        19L,
        19L,
        19L,
        19L,
        20L,
        20L,
        20L,
        20L,
        20L,
        20L,
        20L,
        20L,
        20L,
        20L,
        21L,
        21L,
        21L,
        21L,
        21L,
        21L,
        21L,
        21L,
        21L,
        21L,
        22L,
        22L,
        22L,
        22L,
        22L,
        22L,
        22L,
        22L,
        22L,
        22L,
        23L,
        23L,
        23L,
        23L,
        23L,
        23L,
        23L,
        23L,
        23L,
        23L,
        24L,
        24L,
        24L,
        24L,
        24L,
        24L,
        24L,
        24L,
        24L,
        24L,
        25L,
        25L,
        25L,
        25L,
        25L,
        25L,
        25L,
        25L,
        25L,
        25L,
        26L,
        26L,
        26L,
        26L,
        26L,
        26L,
        26L,
        26L,
        26L,
        26L
      ),
      .Label = c(
        "1",
        "2",
        "3",
        "4",
        "5",
        "6",
        "7",
        "8",
        "9",
        "10",
        "11",
        "12",
        "13",
        "14",
        "15",
        "16",
        "17",
        "18",
        "19",
        "20",
        "21",
        "22",
        "23",
        "24",
        "25",
        "26"
      ),
      class = "factor"
    ),
    precipitation = c(
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      1.59999999999997,
      0.800000000000011,
      0.600000000000023,
      0.199999999999989,
      0.399999999999977,
      0.400000000000034,
      1,
      0.799999999999955,
      0.400000000000034,
      0.800000000000011,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      9.60000000000002,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      1,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0.200000000000045,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0.200000002980232,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0
    )
  ),
  class = c("tbl_df",
            "tbl", "data.frame"),
  row.names = c(NA,-260L)
)

Solution

Since you don't have a "time" variable, you're asking for the "last 4 rows for each week", in which case:

library(dplyr)
slice_tail(df, n=4, by=week)
# # A tibble: 104 × 2
#    week  precipitation
#    <fct>         <dbl>
#  1 1                 0
#  2 1                 0
#  3 1                 0
#  4 1                 0
#  5 2                 0
#  6 2                 0
#  7 2                 0
#  8 2                 0
#  9 3                 0
# 10 3                 0
# # ℹ 94 more rows
# # ℹ Use `print(n = ...)` to see more rows