Calculate median grouping in multiple year increments R

I'm trying to use dplyr to calculate medians by grouping 3 different columns and in 3 year increments.

My data looks like this:

data <- data.frame("Year" = c("1990","1990", "1992", "1993", "1994", "1990", "1991", "1990", 
"1991", "1992", "1994", "1995"),"Type" = c("Al", "Al", "Al", "Al", "Al", "Al", "Al", "Cu", 
"Cu", "Cu", "Cu", "Cu"), "Frac" = c("F", "F", "F", "F", "F", "UF", "UF", "F", "F", "UF", 
"UF", "UF"), "Value" = c(0.1, 0.2, 0.3, 0.6, 0.7, 1.3, 1.5, 0.4, 0.2, 0.9, 2.3, 2.9))

I would like to calculate the median of "Value" in 3 year groupings and also grouping by "Type" and "Frac".

The problem is that sometimes there is a missing year, so I want it to group in 3 year increments based on the data that I have. Showing what I mean with my example data it would be grouped like this: (1990, 1992, 1993) for Al and F. Then just (1994) for Al and F since there's no more data for Al and F. Then (1990, 1991) for Al and UF since there's only 2 years worth of data. So basically I want it to be grouped by 3 years if possible, but if not, then do whatever is left over.

This is the end table I would like to have:

stats_wanted <- data.frame("Year" = c("1990, 1992, 1993", "1994", "1990, 1991", 
"1990, 1991", "1992, 1994, 1995"), "Type" = c("Al", "Al", "Al", "Cu", "Cu"), "Frac" = 
c("F", "F", "UF", "F", "UF"), "Median" = c(0.25, 0.7, 1.4, 0.3, 2.3))

Hopefully this makes sense... let me know if you have any questions :)!

Solution

I do not know dplyr, but here is a data.table solution.

library(data.table)
setDT(data)
data = data[order(Type,Frac,Year)]
# data = data[order(Year)] also works fine
data[
  !duplicated(.SD,by=c('Year','Type','Frac')),
  yeargroup:=0:(.N-1) %/% 3,
  .(Type,Frac)]
# !duplicated... selects only the first unique row by year,type,frac
# 0:(.N-1) gives 0 to N-1 for each Type,Frac group
# %/% 3 gives the remainder when divided by 3

> data
    Year Type Frac Value yeargroup
 1: 1990   Al    F   0.1         0
 2: 1990   Al    F   0.2        NA <- NA because dupe Year,Type,Frac
 3: 1992   Al    F   0.3         0
 4: 1993   Al    F   0.6         0
 5: 1994   Al    F   0.7         1
 6: 1990   Al   UF   1.3         0
 7: 1991   Al   UF   1.5         0
 8: 1990   Cu    F   0.4         0
 9: 1991   Cu    F   0.2         0
10: 1992   Cu   UF   0.9         0
11: 1994   Cu   UF   2.3         0
12: 1995   Cu   UF   2.9         0

# handle dupe Year,Type,Frac rows:
data[,yeargroup:=max(yeargroup,na.rm=T),.(Year,Type,Frac)]

> data
    Year Type Frac Value yeargroup
 1: 1990   Al    F   0.1         0
 2: 1990   Al    F   0.2         0 <- fixed NA
 3: 1992   Al    F   0.3         0
 4: 1993   Al    F   0.6         0
 5: 1994   Al    F   0.7         1
 6: 1990   Al   UF   1.3         0
 7: 1991   Al   UF   1.5         0
 8: 1990   Cu    F   0.4         0
 9: 1991   Cu    F   0.2         0
10: 1992   Cu   UF   0.9         0
11: 1994   Cu   UF   2.3         0
12: 1995   Cu   UF   2.9         0

stats_wanted = data[,
  .(Year=paste0(unique(Year),collapse=', '),Median=median(Value)),
  .(Type,Frac,yeargroup)]

> stats_wanted
   Type Frac yeargroup             Year Median
1:   Al    F         0 1990, 1992, 1993   0.25
2:   Al    F         1             1994   0.70
3:   Al   UF         0       1990, 1991   1.40
4:   Cu    F         0       1990, 1991   0.30
5:   Cu   UF         0 1992, 1994, 1995   2.30

PS: @ronak-shah posted a concise dplyr solution, which inspired me to post another data.table solution which is even conciser:

> data[
  order(Year),
  .(Year,Value,group=(rleid(Year)-1)%/%3),
  .(Type,Frac)
][,
  .(Year=paste0(unique(Year),collapse=', '),Median=median(Value)),
  .(Type,Frac,group)
]