Caretensemble Classification Example
Here’s a quick demo of how to fit a binary classification model with caretEnsemble. Please note that I haven’t spent as much time debugging caretEnsemble for classification models, so there’s probably more bugs than my last post. Also note that multi class models are not yet supported.
#Setup
rm(list = ls(all = TRUE))
gc(reset=TRUE)
#> used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
#> Ncells 483861 25.9 1032254 55.2 NA 483861 25.9
#> Vcells 915310 7.0 8388608 64.0 98304 915310 7.0
set.seed(1234)
#Libraries
library(caret)
library(caretEnsemble)
#Data
library(mlbench)
dat <- mlbench.xor(500, 2)
X <- data.frame(dat$x)
Y <- factor(ifelse(dat$classes=='1', 'Yes', 'No'))
#Split train/test
train <- runif(nrow(X)) <= .66
#Setup CV Folds
#returnData=FALSE saves some space
folds=5
repeats=1
myControl <- trainControl(method='cv', number=folds, repeats=repeats,
returnResamp='none', classProbs=TRUE,
returnData=FALSE, savePredictions=TRUE,
verboseIter=FALSE, allowParallel=TRUE,
summaryFunction=twoClassSummary,
index=createMultiFolds(Y[train], k=folds, times=repeats))
PP <- c('center', 'scale')
#Train some models
model1 <- train(X[train,], Y[train], method='gbm', verbose=FALSE, trControl=myControl,
tuneGrid=expand.grid(.n.trees=500, .interaction.depth=15, .shrinkage = 0.01, .n.minobsinnode=10))
model2 <- train(X[train,], Y[train], method='blackboost', trControl=myControl)
model3 <- train(X[train,], Y[train], method='parRF', tuneLength=1, trControl=myControl)
model4 <- train(X[train,], Y[train], method='mlpWeightDecay', trControl=myControl, trace=FALSE, preProcess=PP)
model5 <- train(X[train,], Y[train], method='knn', trControl=myControl, preProcess=PP)
model6 <- train(X[train,], Y[train], method='earth', trControl=myControl, preProcess=PP)
model7 <- train(X[train,], Y[train], method='glm', trControl=myControl, preProcess=PP)
model8 <- train(X[train,], Y[train], method='svmRadial', trControl=myControl, preProcess=PP)
model9 <- train(X[train,], Y[train], method='gam', trControl=myControl, preProcess=PP)
model10 <- train(X[train,], Y[train], method='glmnet', trControl=myControl, preProcess=PP)
#Make a list of all the models
all.models <- list(model1, model2, model3, model4, model5, model6, model7, model8, model9, model10)
names(all.models) <- sapply(all.models, function(x) x$method)
all.models <- as.caretList(all.models)
sort(sapply(all.models, function(x) min(x$results$ROC)))
#> mlpWeightDecay glm glmnet gam earth blackboost knn svmRadial
#> 0.3856384 0.4099975 0.4147880 0.4346771 0.4882353 0.5000000 0.9915315 0.9964432
#> parRF gbm
#> 0.9994703 0.9998268
#Make a greedy ensemble - currently can only use RMSE
greedy <- caretEnsemble(all.models, iter=1000L)
print(greedy$ens_model$finalModel$model_weights)
#> No Yes
#> gbm_No 1 0
#> gbm_Yes 0 1
#> blackboost_No 0 0
#> blackboost_Yes 0 0
#> parRF_No 0 0
#> parRF_Yes 0 0
#> mlpWeightDecay_No 0 0
#> mlpWeightDecay_Yes 0 0
#> knn_No 0 0
#> knn_Yes 0 0
#> earth_No 0 0
#> earth_Yes 0 0
#> glm_No 0 0
#> glm_Yes 0 0
#> svmRadial_No 0 0
#> svmRadial_Yes 0 0
#> gam_No 0 0
#> gam_Yes 0 0
#> glmnet_No 0 0
#> glmnet_Yes 0 0
greedy$error
#> max_iter ROC Sens Spec ROCSD SensSD SpecSD
#> 1 100 0.9998217 0.9884034 0.9878788 0.0003985861 0.01588213 0.01659765
#Make a linear regression ensemble
linear <- caretStack(all.models, method='glm')
print(round(coef(linear$ens_model$finalModel), 2))
#> (Intercept) gbm blackboost parRF mlpWeightDecay knn earth glm
#> 64968.70 642.80 -92459.55 6.92 218.60 481.55 -35355.58 13868.35
#> svmRadial gam glmnet
#> 10.76 -1110.63 -19911.04
linear$error
#> parameter ROC Sens Spec ROCSD SensSD SpecSD
#> 1 none 0.9806543 0.9768067 0.9575758 0.011322 0.02400921 0.02710385
#Predict for test set:
library(caTools)
preds <- predict(all.models, newdata=X[!train,])
preds$ENS_greedy <- predict(greedy, newdata=X[!train,])[,'Yes']
preds$ENS_linear <- predict(linear, newdata=X[!train,])[,'Yes']
sort(colAUC(preds, Y[!train])[1,])
#> blackboost earth gam glmnet glm knn mlpWeightDecay svmRadial
#> 0.5000000 0.5000000 0.5490459 0.5638044 0.5642516 0.9950805 0.9992546 0.9997018
#> gbm parRF ENS_greedy ENS_linear
#> 1.0000000 1.0000000 1.0000000 1.0000000
Right now, this code fails for me if I try a model like a nnet or an SVM for stacking, so there’s clearly bugs to fix.
The greedy model relies 100% on the gbm, which makes sense as the gbm has an AUC of 1 on the training set. The linear model uses all of the models, and achieves an AUC of 1, but it contains some very large, negative weights, which makes it a harder model to understand.