Search code examples
rdataframeintervalscut

How to creat multiple columns of dummies for intervals in R


data$Distance_100<-0
data$Distance_100[data$Distance<100]<-1

data$Distance_200<-0
data$Distance_200[data$Distance>=101&data$Distance<200]<-1

data$Distance_300<-0
data$Distance_300[data$Distance>=201&data$Distance<300]<-1

data$Distance_400<-0
data$Distance_400[data$Distance>=301&data$Distance<400]<-1

data$Distance_500<-0
data$Distance_500[data$Distance>=401&data$Distance<500]<-1

The outcome must be multiple columns. This code creat just one column data$DistanceCut5 = cut(data$Distance, breaks=c(0,100,200,300,400,500))


Solution

  • cut will create a single column, but if you want 1 column for each cut level you could do something like this:

    Example

    Libraries

    library(tidyverse)
    

    Code

    # Vector with a sequence from 0 to 500 by 100
    seq_0_500 <- seq(0,500,100)
    
    # Example data.frame
    tibble(
      # Variable distance = sequence from 1 to 500 by 1
      distance = 1:500
    ) %>% 
      mutate(
        #Create a categoric variable by 100: `(0,100]` `(100,200]` `(200,300]` `(300,400]` `(400,500]`
        distance_cut = cut(distance,seq_0_500, labels = paste0("Distance_",seq_0_500[-1])),
        #Auxiliar variable
        aux = 1
        ) %>% 
      # Pivot data to make one column for each cut level
      pivot_wider(names_from = distance_cut,values_from = aux) %>% 
      # Replace every NA for 0
      replace(is.na(.),0)
    

    Output

    # A tibble: 500 x 6
       distance Distance_100 Distance_200 Distance_300 Distance_400 Distance_500
          <int>        <dbl>        <dbl>        <dbl>        <dbl>        <dbl>
     1        1            1            0            0            0            0
     2        2            1            0            0            0            0
     3        3            1            0            0            0            0
     4        4            1            0            0            0            0
     5        5            1            0            0            0            0
     6        6            1            0            0            0            0
     7        7            1            0            0            0            0
     8        8            1            0            0            0            0
     9        9            1            0            0            0            0
    10       10            1            0            0            0            0
    # ... with 490 more rows