Search code examples
rdplyrrle

Could runs in dplyr within groups


I have

so.df<-structure(list(yeard = c(2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 2011, 
2011, 2011, 2011, 2011, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 
2012), ydayd = c(3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 18, 19, 20, 
21, 24, 25, 26, 27, 28, 31, 32, 33, 34, 35, 38, 39, 40, 41, 42, 
45, 46, 47, 48, 49, 53, 54, 55, 56, 59, 60, 61, 62, 63, 66, 67, 
68, 69, 70, 73, 74, 75, 76, 77, 80, 81, 82, 83, 84, 87, 88, 89, 
90, 91, 94, 95, 96, 97, 98, 101, 102, 103, 104, 105, 108, 109, 
110, 111, 115, 116, 117, 118, 119, 122, 123, 124, 125, 126, 129, 
130, 131, 132, 133, 136, 137, 138, 139, 140, 143, 144, 145, 146, 
147, 151, 152, 153, 154, 157, 158, 159, 160, 161, 164, 165, 166, 
167, 168, 171, 172, 173, 174, 175, 178, 179, 180, 181, 182, 186, 
187, 188, 189, 192, 193, 194, 195, 196, 199, 200, 201, 202, 203, 
206, 207, 208, 209, 210, 213, 214, 215, 216, 217, 220, 221, 222, 
223, 224, 227, 228, 229, 230, 231, 234, 235, 236, 237, 238, 241, 
242, 243, 244, 245, 249, 250, 251, 252, 255, 256, 257, 258, 259, 
262, 263, 264, 265, 266, 269, 270, 271, 272, 273, 276, 277, 278, 
279, 280, 283, 284, 285, 286, 287, 290, 291, 292, 293, 294, 297, 
298, 299, 300, 301, 304, 305, 306, 307, 308, 311, 312, 313, 314, 
315, 318, 319, 320, 321, 322, 325, 326, 327, 329, 332, 333, 334, 
335, 336, 339, 340, 341, 342, 343, 346, 347, 348, 349, 350, 353, 
354, 355, 356, 357, 361, 362, 363, 364, 3, 4, 5, 6, 9, 10, 11, 
12, 13, 17, 18, 19, 20, 23, 24, 25, 26, 27, 30, 31, 32, 33, 34, 
37, 38, 39, 40, 41, 44, 45, 46, 47, 48, 52, 53, 54, 55, 58, 59, 
60, 61, 62, 65, 66, 67, 68, 69, 72, 73, 74, 75, 76, 79, 80, 81, 
82, 83, 86, 87, 88, 89, 90, 93, 94, 95, 96, 100, 101, 102, 103, 
104, 107, 108, 109, 110, 111, 114, 115, 116, 117, 118, 121, 122, 
123, 124, 125, 128, 129, 130, 131, 132, 135, 136, 137, 138, 139, 
142, 143, 144, 145, 146, 150, 151, 152, 153, 156, 157, 158, 159, 
160, 163, 164, 165, 166, 167, 170, 171, 172, 173, 174, 177, 178, 
179, 180, 181, 184, 185, 187, 188, 191, 192, 193, 194, 195, 198, 
199, 200, 201, 202, 205, 206, 207, 208, 209, 212, 213, 214, 215, 
216, 219, 220, 221, 222, 223, 226, 227, 228, 229, 230, 233, 234, 
235, 236, 237, 240, 241, 242, 243, 244, 248, 249, 250, 251, 254, 
255, 256, 257, 258, 261, 262, 263, 264, 265, 268, 269, 270, 271, 
272, 275, 276, 277, 278, 279, 282, 283, 284, 285, 286, 289, 290, 
291, 292, 293, 296, 297, 298, 299, 300, 305, 306, 307, 310, 311, 
312, 313, 314, 317, 318, 319, 320, 321, 324, 325, 326, 328, 331, 
332, 333, 334, 335, 338, 339, 340, 341, 342, 345, 346, 347, 348, 
349, 352, 353, 354, 355, 356, 359, 361, 362, 363, 366), new.high = c(0, 
0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 
1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), class = "data.frame", row.names = c(NA, 
-502L))


so.df<-tibble(so.df) %>% arrange(yeard, ydayd )

# A tibble: 502 × 3
   yeard ydayd new.high
   <dbl> <dbl>    <dbl>
 1  2011     3        0
 2  2011     4        0
 3  2011     5        0
 4  2011     6        1
 5  2011     7        1
 6  2011    10        1
 7  2011    11        1
 8  2011    12        0
 9  2011    13        0
10  2011    14        0
# … with 492 more rows

I would like to calculate for every yeard the maximum number of sequential O of new.high within each year.

basically a kind of what

so.df.rle<-rle(so.df$new.high)
so.df.rle$lengths[so.df.rle$values==0]

does, but organized by year


Solution

  • You can create function which calculates maximum sequential length of 0s within vector:

    maxLength <- function(vec){
      res <- rle(vec)
      max(res[["lengths"]][res[["values"]] == 0])
    }
    

    and use it within aggregate function:

    aggregate(
      new.high ~ yeard,
      data = so.df[order(so.df$yeard, so.df$ydayd),],
      FUN = maxLength
    )
    

    or using dplyr package:

    so.df %>%
      arrange(yeard, ydayd) %>%
      group_by(yeard) %>%
      summarise(l = maxLength(new.high))