Search code examples
rmachine-learningdecision-tree

Error: C5.0 models require a factor outcome


I was testing this code to see if R is set up properly, but I kept receiving the error in the title.

My code:

require(C50) # the package that has the C5.0 decision tree
require(gmodels) # a package used draw diagrams and 
#graphs

print("Choose the data file when prompted")
dataset = read.table(file.choose(), header = T, sep=",")

# to exclude the DayNo column (column #1) 
dataset = dataset[,-1]
# apply the decision tree algorithm to the training data 
#feature columns, and class column (output), and generate a 
#DT Model.
model = C5.0(dataset[, -4], dataset[, 4])
# we plot the diagram of the generated decision tree
plot(model, type="s", main="Decision Tree 1\n[%100 data 
used to train the model]")

Input data:

Day,Outlook,Humidity,Wind,Play
D1,Sunny,High,Weak,No
D2,Sunny,High,Strong,No
D3,Overcast,High,Weak,Yes
D4,Rain,High,Weak,Yes
D5,Rain,Normal,Weak,Yes
D6,Rain,Normal,Strong,No
D7,Overcast,Normal,Strong,Yes
D8,Sunny,High,Weak,No
D9,Sunny,Normal,Weak,Yes
D10,Rain,Normal,Weak,Yes
D11,Sunny,Normal,Strong,Yes
D12,Overcast,High,Strong,Yes
D13,Overcast,Normal,Weak,Yes
D14,Rain,High,Strong,No

Solution

  • The Play column must be a 'factor' for C5.0

    text <-
    "
    Day,Outlook,Humidity,Wind,Play
    D1,Sunny,High,Weak,No
    D2,Sunny,High,Strong,No
    D3,Overcast,High,Weak,Yes
    D4,Rain,High,Weak,Yes
    D5,Rain,Normal,Weak,Yes
    D6,Rain,Normal,Strong,No
    D7,Overcast,Normal,Strong,Yes
    D8,Sunny,High,Weak,No
    D9,Sunny,Normal,Weak,Yes
    D10,Rain,Normal,Weak,Yes
    D11,Sunny,Normal,Strong,Yes
    D12,Overcast,High,Strong,Yes
    D13,Overcast,Normal,Weak,Yes
    D14,Rain,High,Strong,No
    "
    dataset <- read.table(text = text, header = TRUE, sep = ',')
    
    
    require(C50) # the package that has the C5.0 decision tree
    require(gmodels) # a package used draw diagrams and
    #graphs
    
    # Data is loaded from text string in this answer
    # print("Choose the data file when prompted")
    # dataset = read.table(file.choose(), header = T, sep=",")
    
    # to exclude the DayNo column (column #1)
    dataset = dataset[, -1]
    
    # The outcome must be a 'factor' (category)
    dataset$Play <- as.factor(dataset$Play)
    
    # apply the decision tree algorithm to the training data
    #feature columns, and class column (output), and generate a
    #DT Model.
    model = C5.0(dataset[,-4], dataset[, 4])
    # we plot the diagram of the generated decision tree
    plot(model, type = "s", main = "Decision Tree 1\n[%100 data
    used to train the model]")
    

    c50 decision tree