Search code examples
rlabel

label cut() output with hyphen


I want a way to automatically edit the label output from the following cut:

library(dplyr)
library(janitor)
set.seed(1)
df <- data.frame(a = sample(0:50, 200, replace = T))
df <- df %>% 
  mutate(a_cut = cut(a, 
                     breaks = c(0:9, seq(10, 40, by = 5), Inf), include.lowest = FALSE,
                           right = FALSE))
tabyl(df, a_cut)
#     a_cut  n percent
#     [0,1)  5   0.025
#     [1,2)  3   0.015
#     [2,3)  1   0.005
#     [3,4)  1   0.005
#     [4,5)  0   0.000
#     [5,6)  7   0.035
#     [6,7)  3   0.015
#     [7,8)  0   0.000
#     [8,9)  3   0.015
#    [9,10)  4   0.020
#   [10,15) 15   0.075
#   [15,20) 17   0.085
#   [20,25) 24   0.120
#   [25,30) 17   0.085
#   [30,35) 21   0.105
#   [35,40) 27   0.135
#  [40,Inf) 52   0.260

into the following cut labels (a_cut):

# a_cut   n percent
#     0   5   0.025
#     1   3   0.015
#     2   1   0.005
#     3   1   0.005
#     4   0   0.000
#     5   7   0.035
#     6   3   0.015
#     7   0   0.000
#     8   3   0.015
#     9   4   0.020
# 10-14  15   0.075
# 15-19  17   0.085
# 20-24  24   0.120
# 25-29  17   0.085
# 30-34  21   0.105
# 35-39  27   0.135
# 40+    52   0.260

I think the cutr or kimisc package could do it but I cant figure out the correct configuration.

#e.g., comes close
kimisc::cut_format(df$a, c(1:9, seq(10, 40, by = 5)), sep = "-", paren = c("", "", "", ""))

thanks


Solution

  • You can add a labels argument to cut:

    library(dplyr)
    library(janitor)
    
    set.seed(1)
    df <- data.frame(a = sample(0:50, 200, replace = T))
    
    df <- df %>% 
      mutate(a_cut = cut(a, 
                         breaks = c(0:9, seq(10, 40, by = 5), Inf), 
                         labels = c(0:9, paste(2:7 * 5, 2:7 * 5 + 4, sep = '-'),
                                    '40+'),
                         include.lowest = FALSE,
                         right = FALSE))
    
    tabyl(df, a_cut)
    #>  a_cut  n percent
    #>      0  5   0.025
    #>      1  3   0.015
    #>      2  1   0.005
    #>      3  1   0.005
    #>      4  0   0.000
    #>      5  7   0.035
    #>      6  3   0.015
    #>      7  0   0.000
    #>      8  3   0.015
    #>      9  4   0.020
    #>  10-14 15   0.075
    #>  15-19 17   0.085
    #>  20-24 24   0.120
    #>  25-29 17   0.085
    #>  30-34 21   0.105
    #>  35-39 27   0.135
    #>    40+ 52   0.260