Using PASWR2
and HSWRESTLER
library(PASWR2)
# iffy <- c(22, 27, 32, 35, 60)
noco <- c(8, 9)
# hsw <- HSWRESTLER[-iffy, -noco]
hsw <- HSWRESTLER[,-noco]
library(caret)
set.seed(589)
trainIndex <- createDataPartition(y = hsw$hwfat,
p = 0.75,
list = FALSE,
times = 1
)
train <- hsw[trainIndex, ]
test <- hsw[-trainIndex, ]
dim(train)
[1] 60 7
dim(test)
[1] 18 7
Stepwise selection with caret
fitControl <- trainControl(## 5-fold CV
method = "cv",
number = 5
)
set.seed(5)
stepMod <- train(hwfat ~ ., data = train,
method = "leapSeq",
trControl = fitControl,
verbose = FALSE)
stepMod
Linear Regression with Stepwise Selection
60 samples
6 predictor
No pre-processing
Resampling: Cross-Validated (5 fold)
Summary of sample sizes: 48, 48, 48, 48, 48
Resampling results across tuning parameters:
nvmax RMSE Rsquared MAE
2 3.164883 0.8841640 2.492955
3 4.286929 0.7916281 3.307002
4 3.341006 0.8665645 2.841942
RMSE was used to select the optimal model using the smallest value.
The final value used for the model was nvmax = 2.
summary(stepMod$finalModel)
Subset selection object
6 Variables (and intercept)
Forced in Forced out
age FALSE FALSE
ht FALSE FALSE
wt FALSE FALSE
abs FALSE FALSE
triceps FALSE FALSE
subscap FALSE FALSE
1 subsets of each size up to 2
Selection Algorithm: 'sequential replacement'
age ht wt abs triceps subscap
1 ( 1 ) " " " " " " "*" " " " "
2 ( 1 ) " " " " " " "*" "*" " "
coef(stepMod$finalModel, id = 2)
(Intercept) abs triceps
2.5159873 0.4120287 0.3987979
Test
yhat <- predict(stepMod, newdata = test)
RMSE <- sqrt(mean((test$hwfat - yhat)^2))
RMSE
[1] 3.011079
Forward selection with caret
fitControl <- trainControl(## 5-fold CV
method = "cv",
number = 5
)
set.seed(1)
fsMod <- train(hwfat ~ ., data = train,
method = "leapForward",
trControl = fitControl,
verbose = FALSE)
fsMod
Linear Regression with Forward Selection
60 samples
6 predictor
No pre-processing
Resampling: Cross-Validated (5 fold)
Summary of sample sizes: 48, 48, 48, 48, 48
Resampling results across tuning parameters:
nvmax RMSE Rsquared MAE
2 3.355908 0.8619088 2.749712
3 3.152095 0.8727356 2.635969
4 3.166971 0.8676960 2.632354
RMSE was used to select the optimal model using the smallest value.
The final value used for the model was nvmax = 3.
summary(fsMod$finalModel)
Subset selection object
6 Variables (and intercept)
Forced in Forced out
age FALSE FALSE
ht FALSE FALSE
wt FALSE FALSE
abs FALSE FALSE
triceps FALSE FALSE
subscap FALSE FALSE
1 subsets of each size up to 3
Selection Algorithm: forward
age ht wt abs triceps subscap
1 ( 1 ) " " " " " " "*" " " " "
2 ( 1 ) " " " " " " "*" "*" " "
3 ( 1 ) "*" " " " " "*" "*" " "
coef(fsMod$finalModel, id = 3)
(Intercept) age abs triceps
12.2629092 -0.6092815 0.4330800 0.3581533
Test
yhat <- predict(fsMod, newdata = test)
RMSE <- sqrt(mean((test$hwfat - yhat)^2))
RMSE
[1] 3.03234
Backward elimination with caret
fitControl <- trainControl(## 5-fold CV
method = "cv",
number = 5
)
set.seed(7)
beMod <- train(hwfat ~ ., data = train,
method = "leapBackward",
trControl = fitControl,
verbose = FALSE)
beMod
Linear Regression with Backwards Selection
60 samples
6 predictor
No pre-processing
Resampling: Cross-Validated (5 fold)
Summary of sample sizes: 48, 48, 48, 48, 48
Resampling results across tuning parameters:
nvmax RMSE Rsquared MAE
2 3.077262 0.8104051 2.504921
3 3.207873 0.7943391 2.637599
4 3.398228 0.7942529 2.857311
RMSE was used to select the optimal model using the smallest value.
The final value used for the model was nvmax = 2.
summary(beMod$finalModel)
Subset selection object
6 Variables (and intercept)
Forced in Forced out
age FALSE FALSE
ht FALSE FALSE
wt FALSE FALSE
abs FALSE FALSE
triceps FALSE FALSE
subscap FALSE FALSE
1 subsets of each size up to 2
Selection Algorithm: backward
age ht wt abs triceps subscap
1 ( 1 ) " " " " " " "*" " " " "
2 ( 1 ) " " " " " " "*" "*" " "
coef(beMod$finalModel, id = 2)
(Intercept) abs triceps
2.5159873 0.4120287 0.3987979
Test
yhat <- predict(beMod, newdata = test)
RMSE <- sqrt(mean((test$hwfat - yhat)^2))
RMSE
[1] 3.011079