Search code examples
rmachine-learningclassificationr-caretadaboost

How to use the 'adaboost' method to Build Classification Trees wthin the Caret and fastAdaboost Packages in R


Issue

I'm attempting to use the 'adaboost' method within the Caret and fastAdaboost packages. My objective is to build a classification tree using `machine learning techniques in R for an upcoming project at university and I am following this tutorial here.

For this model (see below), I have downloaded the libraries caret and fastAdaboost and whenever I try to run my model, I get the message.

Error: object 'model_adaboost' not found

I don't understand what's wrong with this code (see below) as it's identical to my other models and I don't know why R cannot find my model.

Many thanks if anyone can lend a hand.

These models are running just fine:

#Random Forest
**# Train the model using rf
model_rf = train(Country ~., data=train.data, method='rf', metric=metric, tuneLength= tuneLength, trControl = fitControl)

model_rf

#Naive Bayes
nb_tune <- data.frame(usekernel = TRUE, fL = 0, adjust=seq(0, 5, by = 1))

model.nb1 = train(Country ~., data=train.data,'nb', trControl=fitControl, metric=metric, tuneLength=tuneLength, tuneGrid = nb_tune, laplace = 0:3)
model.nb1

Structure of my data frame

data.frame':    367 obs. of  10 variables:
 $ Country    : Factor w/ 3 levels "Italy","Turkey",..: 2 3 1 3 2 3 3 2 3 3 ...
 $ Low.Freq   : num  -0.1 0.381 0.705 0.441 -0.603 ...
 $ High.Freq  : num  -0.503 0.96 -0.371 0.207 -0.336 ...
 $ Peak.Freq  : num  -0.4751 -0.0966 -0.2089 -0.1952 -0.3184 ...
 $ Delta.Freq : num  -0.334 0.122 -0.567 -0.148 -0.132 ...
 $ Delta.Time : num  -0.445 1.565 -1.145 0.131 0.666 ...
 $ Peak.Time  : num  0.0289 0.1897 -0.4765 -0.029 0.1492 ...
 $ Center.Freq: num  -0.5294 -0.0507 -0.1589 -0.0819 -0.405 ...
 $ Start.Freq : num  0.672 1.787 0.403 0.388 -1.068 ...
 $ End.Freq   : num  -0.5393 -0.8247 -0.0148 -0.9138 0.0482 ...

R CODE

library(caret)
library(fastAdaboost)

#Data is 'Clusters_Dummy_2'

##Produce a new version of the dataframe 'Clusters_Dummy' with the rows shuffled
NewClusters=Cluster_Dummy_2[sample(1:nrow(Cluster_Dummy_2)),]

#Produce a dataframe
NewCluster<-as.data.frame(NewClusters)

#display
print(NewCluster)

#Check the structure of the data
str(NewCluster)

#Number of rows
nrow(NewCluster)

#Split the data frame into 70% to 30% train and test data
training.parameters <- Cluster_Dummy_2$Country %>% 
createDataPartition(p = 0.7, list = FALSE)
train.data <- NewClusters[training.parameters, ]
test.data <- NewClusters[-training.parameters, ]


##Auxiliary function for controlling model fitting      

fitControl <- trainControl(## 10-fold CV
                          method = "repeatedcv",
                          number = 10,
                          ## repeated ten times
                          repeats = 10,
                          classProbs = TRUE,
                          verbose = TRUE)


fitGrid_2 <- expand.grid(mfinal = (1:3)*3,         # This is new!
                         maxdepth = c(1, 3),       # ...and this
                         coeflearn = c("Breiman"),
                         iter=100) # ...and this

model_adaboost = train(Country ~ ., data=train.data, method='adaboost', tuneLength = tuneLength, metric=metric, trControl = fitControl,
                       tuneGrid=fitGrid_2, verbose=TRUE)
model_adaboost

Data

 structure(list(Low.Freq = c(435L, 94103292L, 1L, 2688L, 8471L, 
    28818L, 654755585L, 468628164L, 342491L, 2288474L, 3915L, 411L, 
    267864894L, 3312618L, 5383L, 8989443L, 1894L, 534981L, 9544861L, 
    3437614L, 475386L, 7550764L, 48744L, 2317845L, 5126197L, 2445L, 
    8L, 557450L, 450259742L, 21006647L, 9L, 7234027L, 59L, 9L, 605L, 
    9199L, 3022L, 30218156L, 46423L, 38L, 88L, 396396244L, 28934316L, 
    7723L, 95688045L, 679354L, 716352L, 76289L, 332826763L, 6L, 90975L, 
    83103577L, 9529L, 229093L, 42810L, 5L, 18175302L, 1443751L, 5831L, 
    8303661L, 86L, 778L, 23947L, 8L, 9829740L, 2075838L, 7434328L, 
    82174987L, 2L, 94037071L, 9638653L, 5L, 3L, 65972L, 0L, 936779338L, 
    4885076L, 745L, 8L, 56456L, 125140L, 73043989L, 516476L, 7L, 
    4440739L, 612L, 3966L, 8L, 9255L, 84127L, 96218L, 5690L, 56L, 
    3561L, 78738L, 1803363L, 809369L, 7131L, 0L), High.Freq = c(6071L, 
    3210L, 6L, 7306092L, 6919054L, 666399L, 78L, 523880161L, 4700783L, 
    4173830L, 30L, 811L, 341014L, 780L, 44749L, 91L, 201620707L, 
    74L, 1L, 65422L, 595L, 89093186L, 946520L, 6940919L, 655350L, 
    4L, 6L, 618L, 2006697L, 889L, 1398L, 28769L, 90519642L, 984L, 
    0L, 296209525L, 487088392L, 5L, 894L, 529L, 5L, 99106L, 2L, 926017L, 
    9078L, 1L, 21L, 88601017L, 575770L, 48L, 8431L, 194L, 62324996L, 
    5L, 81L, 40634727L, 806901520L, 6818173L, 3501L, 91780L, 36106039L, 
    5834347L, 58388837L, 34L, 3280L, 6507606L, 19L, 402L, 584L, 76L, 
    4078684L, 199L, 6881L, 92251L, 81715L, 40L, 327L, 57764L, 97668898L, 
    2676483L, 76L, 4694L, 817120L, 51L, 116712L, 666L, 3L, 42841L, 
    9724L, 21L, 4L, 359L, 2604L, 22L, 30490L, 5640L, 34L, 51923625L, 
    35544L), Peak.Freq = c(87005561L, 9102L, 994839015L, 42745869L, 
    32840L, 62737133L, 2722L, 24L, 67404881L, 999242982L, 3048L, 
    85315406L, 703037627L, 331264L, 8403609L, 3934064L, 50578953L, 
    370110665L, 3414L, 12657L, 40L, 432L, 7707L, 214L, 68588962L, 
    69467L, 75L, 500297L, 704L, 1L, 102659072L, 60896923L, 4481230L, 
    94124925L, 60164619L, 447L, 580L, 8L, 172L, 9478521L, 20L, 53L, 
    3072127L, 2160L, 27301893L, 8L, 4263L, 508L, 712409L, 50677L, 
    522433683L, 112844L, 193385L, 458269L, 93578705L, 22093131L, 
    6L, 9L, 1690461L, 0L, 4L, 652847L, 44767L, 21408L, 5384L, 304L, 
    721L, 651147L, 2426L, 586L, 498289375L, 945L, 6L, 816L, 46207L, 
    39135L, 6621028L, 66905L, 26905085L, 4098L, 0L, 14L, 88L, 530L, 
    97809006L, 90L, 6L, 260792844L, 9L, 833205723L, 99467321L, 5L, 
    8455640L, 54090L, 2L, 309L, 299161148L, 4952L, 454824L), Delta.Freq = c(5L, 
    78L, 88553L, 794L, 5L, 3859122L, 782L, 36L, 8756801L, 243169338L, 
    817789L, 8792384L, 7431L, 626921743L, 9206L, 95789L, 7916L, 8143453L, 
    6L, 4L, 6363L, 181125L, 259618L, 6751L, 33L, 37960L, 0L, 2L, 
    599582228L, 565585L, 19L, 48L, 269450424L, 70676581L, 7830566L, 
    4L, 86484313L, 21L, 90899794L, 2L, 72356L, 574280L, 869544L, 
    73418L, 6468164L, 2259L, 5938505L, 31329L, 1249L, 354L, 8817L, 
    3L, 2568L, 82809L, 29836269L, 5230L, 37L, 33752014L, 79307L, 
    1736L, 8522076L, 40L, 2289135L, 862L, 801448L, 8026L, 5L, 15L, 
    4393771L, 405914L, 71098L, 950288L, 8319L, 1396973L, 832L, 70L, 
    1746L, 61907L, 8709547L, 300750537L, 45862L, 91417085L, 79892L, 
    47765L, 5477L, 18L, 4186L, 2860L, 754038591L, 375L, 53809223L, 
    72L, 136L, 509L, 232325L, 13128104L, 1692L, 8581L, 23L), Delta.Time = c(1361082L, 
    7926L, 499L, 5004L, 3494530L, 213L, 64551179L, 70L, 797L, 5L, 
    72588L, 86976L, 5163L, 635080L, 3L, 91L, 919806257L, 81443L, 
    3135427L, 4410972L, 5810L, 8L, 46603718L, 422L, 1083626L, 48L, 
    15699890L, 7L, 90167635L, 446459879L, 2332071L, 761660L, 49218442L, 
    381L, 46L, 493197L, 46L, 798597155L, 45342274L, 6265842L, 6L, 
    3445819L, 351L, 1761227L, 214L, 959L, 908996387L, 6L, 3855L, 
    9096604L, 152664L, 7970052L, 32366926L, 31L, 5201618L, 114L, 
    7806411L, 70L, 239L, 5065L, 2L, 1L, 14472831L, 122042249L, 8L, 
    495604L, 29L, 8965478L, 2875L, 959L, 39L, 9L, 690L, 933626665L, 
    85294L, 580093L, 95934L, 982058L, 65244056L, 137508L, 29L, 7621L, 
    7527L, 72L, 2L, 315L, 6L, 2413L, 8625150L, 51298109L, 851L, 890460L, 
    160736L, 6L, 850842734L, 2L, 7L, 76969113L, 190536L), Peak.Time = c(1465265L, 
    452894L, 545076172L, 8226275L, 5040875L, 700530L, 1L, 3639L, 
    20141L, 71712131L, 686L, 923L, 770569738L, 69961L, 737458636L, 
    122403L, 199502046L, 6108L, 907L, 108078263L, 7817L, 4L, 6L, 
    69L, 721L, 786353L, 87486L, 1563L, 876L, 47599535L, 79295722L, 
    53L, 7378L, 591L, 6607935L, 954L, 6295L, 75514344L, 5742050L, 
    25647276L, 449L, 328566184L, 4L, 2L, 2703L, 21367543L, 63429043L, 
    708L, 782L, 909820L, 478L, 50L, 922L, 579882L, 7850L, 534L, 2157492L, 
    96L, 6L, 716L, 5L, 653290336L, 447854237L, 2L, 31972263L, 645L, 
    7L, 609909L, 4054695L, 455631L, 4919894L, 9L, 72713L, 9997L, 
    84090765L, 89742L, 5L, 5028L, 4126L, 23091L, 81L, 239635020L, 
    3576L, 898597785L, 6822L, 3798L, 201999L, 19624L, 20432923L, 
    18944093L, 930720236L, 1492302L, 300122L, 143633L, 5152743L, 
    417344L, 813L, 55792L, 78L), Center_Freq = c(61907L, 8709547L, 
    300750537L, 45862L, 91417085L, 79892L, 47765L, 5477L, 18L, 4186L, 
    2860L, 754038591L, 375L, 53809223L, 72L, 136L, 4700783L, 4173830L, 
    30L, 811L, 341014L, 780L, 44749L, 91L, 201620707L, 74L, 1L, 65422L, 
    595L, 89093186L, 946520L, 6940919L, 48744L, 2317845L, 5126197L, 
    2445L, 8L, 557450L, 450259742L, 21006647L, 9L, 7234027L, 59L, 
    9L, 651547554L, 45554L, 38493L, 91055218L, 38L, 1116474L, 2295482L, 
    3001L, 9L, 3270L, 141L, 53644L, 667983L, 565598L, 84L, 971L, 
    555498297L, 60431L, 6597L, 856943893L, 607815536L, 4406L, 79L, 
    4885076L, 745L, 8L, 56456L, 125140L, 73043989L, 516476L, 7L, 
    4440739L, 754038591L, 375L, 53809223L, 72L, 136L, 509L, 232325L, 
    13128104L, 1692L, 8581L, 23L, 5874213L, 4550L, 644668065L, 3712371L, 
    5928L, 8833L, 7L, 2186023L, 61627221L, 37297L, 716427989L, 21387L
    ), Start.Freq = c(426355L, 22073538L, 680374L, 41771L, 54L, 6762844L, 
    599171L, 108L, 257451851L, 438814L, 343045L, 4702L, 967787L, 
    1937L, 18L, 89301735L, 366L, 90L, 954L, 7337732L, 70891703L, 
    4139L, 10397931L, 940000382L, 7L, 38376L, 878528819L, 6287L, 
    738366L, 31L, 47L, 5L, 6L, 77848L, 2366508L, 45L, 3665842L, 7252260L, 
    6L, 61L, 3247L, 448348L, 1L, 705132L, 144L, 7423637L, 2L, 497L, 
    844927639L, 78978L, 914L, 131L, 7089563L, 927L, 9595581L, 2774463L, 
    1651L, 73509280L, 7L, 35L, 18L, 96L, 1L, 92545512L, 27354947L, 
    7556L, 65019L, 7480L, 71835L, 8249L, 64792L, 71537L, 349389666L, 
    280244484L, 82L, 6L, 40L, 353872L, 0L, 103L, 1255L, 4752L, 29L, 
    76L, 81185L, 14L, 9L, 470775630L, 818361265L, 57947209L, 44L, 
    24L, 41295L, 4L, 261449L, 9931404L, 773556640L, 930717L, 65007421L
    ), End.Freq = c(71000996L, 11613579L, 71377155L, 1942738L, 8760748L, 
    79L, 455L, 374L, 8L, 5L, 2266932L, 597833L, 155488L, 3020L, 4L, 
    554L, 4L, 16472L, 1945649L, 668181101L, 649780L, 22394365L, 93060602L, 
    172146L, 20472L, 23558847L, 190513L, 22759044L, 44L, 78450L, 
    205621181L, 218L, 69916344L, 23884L, 66L, 312148L, 7710564L, 
    4L, 422L, 744572L, 651547554L, 45554L, 38493L, 91055218L, 38L, 
    1116474L, 2295482L, 3001L, 9L, 3270L, 141L, 55595L, 38451L, 8660867L, 
    14L, 96L, 345L, 6L, 44L, 8235824L, 910517L, 1424326L, 87102566L, 
    53644L, 667983L, 565598L, 84L, 971L, 555498297L, 60431L, 6597L, 
    856943893L, 607815536L, 4406L, 79L, 7L, 28978746L, 7537295L, 
    6L, 633L, 345860066L, 802L, 1035131L, 602L, 2740L, 8065L, 61370968L, 
    429953765L, 981507L, 8105L, 343787257L, 44782L, 64184L, 12981359L, 
    123367978L, 818775L, 123745614L, 25345654L, 3L), Country = c("Holland", 
    "Holland", "Holland", "Holland", "Holland", "Holland", "Spain", 
    "Spain", "Spain", "Spain", "Spain", "Spain", "Spain", "Spain", 
    "Spain", "Spain", "Spain", "Spain", "Holland", "Holland", "Holland", 
    "Holland", "Holland", "Holland", "France", "France", "France", 
    "France", "France", "France", "France", "France", "France", "France", 
    "France", "Spain", "Spain", "Spain", "Spain", "Spain", "Spain", 
    "Spain", "Spain", "France", "France", "France", "France", "Holland", 
    "Holland", "Holland", "Holland", "Holland", "Holland", "Holland", 
    "Holland", "Holland", "Holland", "Holland", "Holland", "Holland", 
    "Spain", "Spain", "Spain", "Spain", "Spain", "Spain", "Spain", 
    "Holland", "Holland", "Holland", "Holland", "France", "France", 
    "France", "France", "France", "France", "France", "Spain", "Spain", 
    "Spain", "Spain", "Spain", "Spain", "Spain", "Spain", "Spain", 
    "Spain", "Spain", "Spain", "Spain", "Spain", "Spain", "Spain", 
    "Spain", "Spain", "France", "France", "France")), row.names = c(NA, 
    99L), class = "data.frame")
    

  [1]: https://www.machinelearningplus.com/machine-learning/caret-package/https://

Solution

  • There are a few things going on here. First, you hadn't defined metric, and executing the code says as much. I'll just use the default metric in what's below. Second, as I suggested in the comments, there are only two tuning parameters in the adaboost model, nIter and method ("Adaboost.MI" or "Real Adaboost"). So, we could change the tuning grid to be the following:

    fitGrid_2 <- expand.grid(nIter = seq(10, 100, by=10), 
                             method=c("Adaboost.MI", "Real Adaboost"))
    

    Now, when we run the model:

    model_adaboost = train(Country ~ ., 
                           data=train.data, 
                           method='adaboost', 
                           tuneLength = tuneLength, 
                           trControl = fitControl,
                           tuneGrid=fitGrid_2, 
                           verbose=TRUE)
    
    

    we get a load of warnings and empty results. The warnings are all like this:

    50: model fit failed for Fold03.Rep01: nIter=100, method=Adaboost.MI Error : Dependent variables must have two levels
    

    This suggests that adaboost requires the classification task to only have two possibilities. If we looked at just two countries, it works:

    NewClusters2 <- subset(NewClusters, Country %in% c("France", "Spain"))
    
    training.parameters <- NewClusters2$Country %>% 
      createDataPartition(p = 0.7, list = FALSE)
    train.data <- NewClusters2[training.parameters, ]
    test.data <- NewClusters2[-training.parameters, ]
    
    model_adaboost = train(Country ~ ., 
                           data=train.data, 
                           method='adaboost', 
                           tuneLength = tuneLength, 
                           trControl = fitControl,
                           tuneGrid=fitGrid_2, 
                           verbose=TRUE)