Search code examples
rdplyrdata-manipulationdata-miningdata-munging

How to select a certain period based on a condition in R?


I have conducted several experiments that lasted for about a week. For each week, I'd like keep periods/rows when precipitation began to the end of the week.

I know how to discard dry weeks, but I'm not sure how to select periods from the beginning of rain until the end of wet weeks.

My data looks like this

enter image description here

In the week 15, I only want to keep rows from the cell 144 to 150 and discard the rest. The complete week 16 can be discarded because it's a completely dry week.

Note: It is fine if there are multiple dry/wet periods when precipitation began and until the end of the week. The key is to ignore dry period before the occurrence of rain in each week. Thanks for any assistance.

Here is a reproducible example

df <- structure(
  list(
    week = structure(
      c(
        1L,
        1L,
        1L,
        1L,
        1L,
        1L,
        1L,
        1L,
        1L,
        1L,
        2L,
        2L,
        2L,
        2L,
        2L,
        2L,
        2L,
        2L,
        2L,
        2L,
        3L,
        3L,
        3L,
        3L,
        3L,
        3L,
        3L,
        3L,
        3L,
        3L,
        4L,
        4L,
        4L,
        4L,
        4L,
        4L,
        4L,
        4L,
        4L,
        4L,
        5L,
        5L,
        5L,
        5L,
        5L,
        5L,
        5L,
        5L,
        5L,
        5L,
        6L,
        6L,
        6L,
        6L,
        6L,
        6L,
        6L,
        6L,
        6L,
        6L,
        7L,
        7L,
        7L,
        7L,
        7L,
        7L,
        7L,
        7L,
        7L,
        7L,
        8L,
        8L,
        8L,
        8L,
        8L,
        8L,
        8L,
        8L,
        8L,
        8L,
        9L,
        9L,
        9L,
        9L,
        9L,
        9L,
        9L,
        9L,
        9L,
        9L,
        10L,
        10L,
        10L,
        10L,
        10L,
        10L,
        10L,
        10L,
        10L,
        10L,
        11L,
        11L,
        11L,
        11L,
        11L,
        11L,
        11L,
        11L,
        11L,
        11L,
        12L,
        12L,
        12L,
        12L,
        12L,
        12L,
        12L,
        12L,
        12L,
        12L,
        13L,
        13L,
        13L,
        13L,
        13L,
        13L,
        13L,
        13L,
        13L,
        13L,
        14L,
        14L,
        14L,
        14L,
        14L,
        14L,
        14L,
        14L,
        14L,
        14L,
        15L,
        15L,
        15L,
        15L,
        15L,
        15L,
        15L,
        15L,
        15L,
        15L,
        16L,
        16L,
        16L,
        16L,
        16L,
        16L,
        16L,
        16L,
        16L,
        16L,
        17L,
        17L,
        17L,
        17L,
        17L,
        17L,
        17L,
        17L,
        17L,
        17L,
        18L,
        18L,
        18L,
        18L,
        18L,
        18L,
        18L,
        18L,
        18L,
        18L,
        19L,
        19L,
        19L,
        19L,
        19L,
        19L,
        19L,
        19L,
        19L,
        19L,
        20L,
        20L,
        20L,
        20L,
        20L,
        20L,
        20L,
        20L,
        20L,
        20L,
        21L,
        21L,
        21L,
        21L,
        21L,
        21L,
        21L,
        21L,
        21L,
        21L,
        22L,
        22L,
        22L,
        22L,
        22L,
        22L,
        22L,
        22L,
        22L,
        22L,
        23L,
        23L,
        23L,
        23L,
        23L,
        23L,
        23L,
        23L,
        23L,
        23L,
        24L,
        24L,
        24L,
        24L,
        24L,
        24L,
        24L,
        24L,
        24L,
        24L,
        25L,
        25L,
        25L,
        25L,
        25L,
        25L,
        25L,
        25L,
        25L,
        25L,
        26L,
        26L,
        26L,
        26L,
        26L,
        26L,
        26L,
        26L,
        26L,
        26L
      ),
      .Label = c(
        "1",
        "2",
        "3",
        "4",
        "5",
        "6",
        "7",
        "8",
        "9",
        "10",
        "11",
        "12",
        "13",
        "14",
        "15",
        "16",
        "17",
        "18",
        "19",
        "20",
        "21",
        "22",
        "23",
        "24",
        "25",
        "26"
      ),
      class = "factor"
    ),
    precipitation = c(
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      1.59999999999997,
      0.800000000000011,
      0.600000000000023,
      0.199999999999989,
      0.399999999999977,
      0.400000000000034,
      1,
      0.799999999999955,
      0.400000000000034,
      0.800000000000011,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      9.60000000000002,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      1,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0.200000000000045,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0.200000002980232,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0
    )
  ),
  class = c("tbl_df",
            "tbl", "data.frame"),
  row.names = c(NA,-260L)
)

Solution

  • We can use filter(cumany(.)) for this:

    library(dplyr)
    out <- df %>%
      group_by(week) %>%
      filter(cumany(precipitation > 0)) %>%
      ungroup()
    filter(out, week == "15")
    # # A tibble: 7 × 2
    #   week  precipitation
    #   <fct>         <dbl>
    # 1 15             9.60
    # 2 15             0   
    # 3 15             0   
    # 4 15             0   
    # 5 15             0   
    # 6 15             0   
    # 7 15             0