Search code examples
rcut

how to keep the variable order in ```cut ```order while using ```complete```


I am using cut to generate subtotal of people count in age groups. The sample data and the codes are:

set.seed(12345)

#create a numeric variable Age       
AGE <- sample(0:110, 100, replace = TRUE)

Sample.data <-data.frame(AGE)

summary_data<- Sample.data %>% 
  group_by(grp =  cut(
    AGE,
    breaks=c(-Inf, 0,0.001, 0.083, 2, 13, 65,1000),
    right=TRUE,
    labels = c("Foetus(0 yr)","Neonate (0.001 - 0.082 yr)","Infant(0.083-1.999 yrs)","Child(2-12.999 yrs)", "Adolescent(13-17.999 yrs)","Adult(18-64.999 yrs.)","Elderly(65-199 yrs)")
  )) %>% summarise(TotalPeople = n())

Here is what I get:

enter image description here

I would like to keep the age groups that doesn't have observation in the table. So I add complete and fill. I was able to fill 0 for the empty age groups, but the age groups's order also changed which I do not want it. See codes and results:

summary_data<- Sample.data %>% 
  group_by(grp =  cut(
    AGE,
    breaks=c(-Inf, 0,0.001, 0.083, 2, 13, 65,1000),
    right=TRUE,
    labels = c("Foetus(0 yr)","Neonate (0.001 - 0.082 yr)","Infant(0.083-1.999 yrs)","Child(2-12.999 yrs)", "Adolescent(13-17.999 yrs)","Adult(18-64.999 yrs.)","Elderly(65-199 yrs)")
  )) %>% summarise(TotalPeople = n(),)%>% complete(grp = levels(grp), fill = list(TotalPeople = 0)) 

enter image description here

Is there a way to keep age groups in the order of cut order? Or is there a way to keep the age groups with 0 observation in another way so the age groups won't change its order? The ideal results should looks like this:

enter image description here


Solution

  • You can add ordered_result = TRUE. With table you can get the counts and with as.data.frame you get a data.frame of the counts per group.

    grp <- cut(Sample.data$AGE, breaks=c(-Inf, 0, 0.082, 2, 13, 18, 65, Inf)
     , ordered_result = TRUE
     , labels = c("Foetus(0 yr)","Neonate (0 - 0.082] yrs","Infant (0.082-2] yrs",
        "Child (2-13] yrs", "Adolescent (13-18] yrs","Adult (18-65] yrs",
        "Elderly >65 yrs"))
    as.data.frame(table(grp))
    #                      grp Freq
    #1            Foetus(0 yr)    1
    #2 Neonate (0 - 0.082] yrs    0
    #3    Infant (0.082-2] yrs    2
    #4        Child (2-13] yrs   14
    #5  Adolescent (13-18] yrs    5
    #6       Adult (18-65] yrs   32
    #7         Elderly >65 yrs   46
    

    To get in addition the mean age you can use xtabs

    x <- table(grp)
    cbind(TotalPeople=x, meanAge=xtabs(AGE ~ grp, cbind(AGE, addNA(grp))) / x)
    #                        TotalPeople   meanAge
    #Foetus(0 yr)                      1  0.000000
    #Neonate (0 - 0.082] yrs           0       NaN
    #Infant (0.082-2] yrs              2  1.500000
    #Child (2-13] yrs                 14  9.071429
    #Adolescent (13-18] yrs            5 15.000000
    #Adult (18-65] yrs                32 41.093750
    #Elderly >65 yrs                  46 87.434783
    

    Or you can use aggregate

    aggregate(AGE ~ grp, cbind(AGE, addNA(grp)), function(x) 
     c(TotalPeople=length(x), meanAge=mean(x)), drop=FALSE)
    #                      grp AGE.TotalPeople AGE.meanAge
    #1            Foetus(0 yr)        1.000000    0.000000
    #2 Neonate (0 - 0.082] yrs              NA          NA
    #3    Infant (0.082-2] yrs        2.000000    1.500000
    #4        Child (2-13] yrs       14.000000    9.071429
    #5  Adolescent (13-18] yrs        5.000000   15.000000
    #6       Adult (18-65] yrs       32.000000   41.093750
    #7         Elderly >65 yrs       46.000000   87.434783
    

    or you can use by

    by(AGE, grp, function(x) c(TotalPeople=length(x), meanAge=mean(x)))
    #grp: Foetus(0 yr)
    #TotalPeople     meanAge 
    #          1           0 
    #------------------------------------------------------------ 
    #grp: Neonate (0 - 0.082] yrs
    #NULL
    #------------------------------------------------------------ 
    #grp: Infant (0.082-2] yrs
    #TotalPeople     meanAge 
    #        2.0         1.5 
    #------------------------------------------------------------ 
    #grp: Child (2-13] yrs
    #TotalPeople     meanAge 
    #  14.000000    9.071429 
    #------------------------------------------------------------ 
    #grp: Adolescent (13-18] yrs
    #TotalPeople     meanAge 
    #          5          15 
    #------------------------------------------------------------ 
    #grp: Adult (18-65] yrs
    #TotalPeople     meanAge 
    #   32.00000    41.09375 
    #------------------------------------------------------------ 
    #grp: Elderly >65 yrs
    #TotalPeople     meanAge 
    #   46.00000    87.43478