1 Using PASWR2 and HSWRESTLER

library(PASWR2)
# iffy <- c(22, 27, 32, 35, 60)
noco <- c(8, 9)
# hsw <- HSWRESTLER[-iffy, -noco]
hsw <- HSWRESTLER[,-noco]
library(caret)
set.seed(589)
trainIndex <- createDataPartition(y = hsw$hwfat,
                              p = 0.75,
                              list = FALSE,
                              times = 1
                              )
train <- hsw[trainIndex, ]
test <- hsw[-trainIndex, ]
dim(train)
[1] 60  7
dim(test)
[1] 18  7

1.1 Stepwise selection with caret

fitControl <- trainControl(## 5-fold CV
                           method = "cv",
                           number = 5
                           )
set.seed(5)
stepMod <- train(hwfat ~ ., data = train, 
                 method = "leapSeq", 
                 trControl = fitControl,
                 verbose = FALSE)
stepMod
Linear Regression with Stepwise Selection 

60 samples
 6 predictor

No pre-processing
Resampling: Cross-Validated (5 fold) 
Summary of sample sizes: 48, 48, 48, 48, 48 
Resampling results across tuning parameters:

  nvmax  RMSE      Rsquared   MAE     
  2      3.164883  0.8841640  2.492955
  3      4.286929  0.7916281  3.307002
  4      3.341006  0.8665645  2.841942

RMSE was used to select the optimal model using the smallest value.
The final value used for the model was nvmax = 2.
summary(stepMod$finalModel)
Subset selection object
6 Variables  (and intercept)
        Forced in Forced out
age         FALSE      FALSE
ht          FALSE      FALSE
wt          FALSE      FALSE
abs         FALSE      FALSE
triceps     FALSE      FALSE
subscap     FALSE      FALSE
1 subsets of each size up to 2
Selection Algorithm: 'sequential replacement'
         age ht  wt  abs triceps subscap
1  ( 1 ) " " " " " " "*" " "     " "    
2  ( 1 ) " " " " " " "*" "*"     " "    
coef(stepMod$finalModel, id = 2)
(Intercept)         abs     triceps 
  2.5159873   0.4120287   0.3987979 

1.2 Test

yhat <- predict(stepMod, newdata = test)
RMSE <- sqrt(mean((test$hwfat - yhat)^2))
RMSE
[1] 3.011079

1.3 Forward selection with caret

fitControl <- trainControl(## 5-fold CV
                           method = "cv",
                           number = 5
                           )
set.seed(1)
fsMod <- train(hwfat ~ ., data = train, 
                 method = "leapForward", 
                 trControl = fitControl,
                 verbose = FALSE)
fsMod
Linear Regression with Forward Selection 

60 samples
 6 predictor

No pre-processing
Resampling: Cross-Validated (5 fold) 
Summary of sample sizes: 48, 48, 48, 48, 48 
Resampling results across tuning parameters:

  nvmax  RMSE      Rsquared   MAE     
  2      3.355908  0.8619088  2.749712
  3      3.152095  0.8727356  2.635969
  4      3.166971  0.8676960  2.632354

RMSE was used to select the optimal model using the smallest value.
The final value used for the model was nvmax = 3.
summary(fsMod$finalModel)
Subset selection object
6 Variables  (and intercept)
        Forced in Forced out
age         FALSE      FALSE
ht          FALSE      FALSE
wt          FALSE      FALSE
abs         FALSE      FALSE
triceps     FALSE      FALSE
subscap     FALSE      FALSE
1 subsets of each size up to 3
Selection Algorithm: forward
         age ht  wt  abs triceps subscap
1  ( 1 ) " " " " " " "*" " "     " "    
2  ( 1 ) " " " " " " "*" "*"     " "    
3  ( 1 ) "*" " " " " "*" "*"     " "    
coef(fsMod$finalModel, id = 3)
(Intercept)         age         abs     triceps 
 12.2629092  -0.6092815   0.4330800   0.3581533 

1.4 Test

yhat <- predict(fsMod, newdata = test)
RMSE <- sqrt(mean((test$hwfat - yhat)^2))
RMSE
[1] 3.03234

1.5 Backward elimination with caret

fitControl <- trainControl(## 5-fold CV
                           method = "cv",
                           number = 5
                           )
set.seed(7)
beMod <- train(hwfat ~ ., data = train, 
                 method = "leapBackward", 
                 trControl = fitControl,
                 verbose = FALSE)
beMod
Linear Regression with Backwards Selection 

60 samples
 6 predictor

No pre-processing
Resampling: Cross-Validated (5 fold) 
Summary of sample sizes: 48, 48, 48, 48, 48 
Resampling results across tuning parameters:

  nvmax  RMSE      Rsquared   MAE     
  2      3.077262  0.8104051  2.504921
  3      3.207873  0.7943391  2.637599
  4      3.398228  0.7942529  2.857311

RMSE was used to select the optimal model using the smallest value.
The final value used for the model was nvmax = 2.
summary(beMod$finalModel)
Subset selection object
6 Variables  (and intercept)
        Forced in Forced out
age         FALSE      FALSE
ht          FALSE      FALSE
wt          FALSE      FALSE
abs         FALSE      FALSE
triceps     FALSE      FALSE
subscap     FALSE      FALSE
1 subsets of each size up to 2
Selection Algorithm: backward
         age ht  wt  abs triceps subscap
1  ( 1 ) " " " " " " "*" " "     " "    
2  ( 1 ) " " " " " " "*" "*"     " "    
coef(beMod$finalModel, id = 2)
(Intercept)         abs     triceps 
  2.5159873   0.4120287   0.3987979 

1.6 Test

yhat <- predict(beMod, newdata = test)
RMSE <- sqrt(mean((test$hwfat - yhat)^2))
RMSE
[1] 3.011079