name: model-tuning description: Hyperparameter tuning in tidymodels with grids, Bayesian optimization, racing, and workflow finalization.
Model Tuning Patterns
Overview
Comprehensive hyperparameter optimization using the tune package. Covers grid search, iterative search, racing methods, and Bayesian optimization.
Tunable Parameters
Marking Parameters for Tuning
library(tidymodels)
# Use tune() placeholder in model specification
rf_spec <- rand_forest(
mtry = tune(),
trees = 1000,
min_n = tune()
) |>
set_engine("ranger") |>
set_mode("classification")
# Tune recipe steps
rec <- recipe(outcome ~ ., data = train) |>
step_pca(all_numeric_predictors(), num_comp = tune()) |>
step_normalize(all_numeric_predictors())
Parameter Objects (dials)
library(dials)
# View parameter information
mtry()
min_n()
trees()
learn_rate()
penalty()
# Customize parameter ranges
mtry(range = c(2, 20))
min_n(range = c(5, 50))
learn_rate(range = c(-3, -1), trans = log10_trans())
# Update based on data
mtry_final <- finalize(mtry(), train_data)
Grid Search
Regular Grid
# Evenly spaced grid
regular_grid <- grid_regular(
mtry(range = c(2, 10)),
min_n(range = c(2, 20)),
levels = 5 # 5 levels per parameter = 25 combinations
)
# Different levels per parameter
regular_grid <- grid_regular(
mtry(range = c(2, 10)),
min_n(range = c(2, 20)),
levels = c(mtry = 5, min_n = 3)
)
Random Grid
# Random sampling of parameter space
random_grid <- grid_random(
mtry(range = c(2, 10)),
min_n(range = c(2, 20)),
size = 50
)
Space-Filling Designs
# Latin hypercube (better coverage than random)
lhs_grid <- grid_latin_hypercube(
mtry(range = c(2, 10)),
min_n(range = c(2, 20)),
size = 30
)
# Maximum entropy grid
maxent_grid <- grid_max_entropy(
mtry(range = c(2, 10)),
min_n(range = c(2, 20)),
size = 30
)
Running Grid Search
# Basic grid search
tune_results <- workflow |>
tune_grid(
resamples = cv_folds,
grid = 20, # auto-generates grid
metrics = metric_set(roc_auc, accuracy),
control = control_grid(verbose = TRUE)
)
# With explicit grid
tune_results <- workflow |>
tune_grid(
resamples = cv_folds,
grid = my_grid,
metrics = metric_set(roc_auc, accuracy)
)
Bayesian Optimization
Basic Bayesian Tuning
# Iterative Bayesian search
tune_results <- workflow |>
tune_bayes(
resamples = cv_folds,
iter = 50, # maximum iterations
initial = 10, # initial grid points
metrics = metric_set(roc_auc),
control = control_bayes(
verbose = TRUE,
no_improve = 20 # stop if no improvement for 20 iters
)
)
Bayesian with Custom Initial Points
# Start with grid results
initial_grid <- workflow |>
tune_grid(
resamples = cv_folds,
grid = 10,
metrics = metric_set(roc_auc)
)
# Continue with Bayesian
bayes_results <- workflow |>
tune_bayes(
resamples = cv_folds,
iter = 40,
initial = initial_grid, # use grid results as initial points
metrics = metric_set(roc_auc)
)
Acquisition Functions
control_bayes(
# Expected improvement (default)
objective = exp_improve(),
# Probability of improvement
objective = prob_improve(),
# Confidence bound
objective = conf_bound(kappa = 2)
)
Racing Methods (finetune)
ANOVA Racing
library(finetune)
# Eliminate poor configurations early
race_results <- workflow |>
tune_race_anova(
resamples = cv_folds,
grid = 50,
metrics = metric_set(roc_auc),
control = control_race(
verbose = TRUE,
burn_in = 3 # evaluate all for first 3 resamples
)
)
# Plot racing results
plot_race(race_results)
Win/Loss Racing
# Statistical comparison of configurations
race_results <- workflow |>
tune_race_win_loss(
resamples = cv_folds,
grid = 50,
metrics = metric_set(roc_auc),
control = control_race()
)
Simulated Annealing
library(finetune)
# Simulated annealing search
sa_results <- workflow |>
tune_sim_anneal(
resamples = cv_folds,
iter = 50,
initial = 5,
metrics = metric_set(roc_auc),
control = control_sim_anneal(
verbose = TRUE,
cooling_coef = 0.1
)
)
Analyzing Tuning Results
Best Parameters
# Best by primary metric
best_params <- select_best(tune_results, metric = "roc_auc")
# Best within one SE of optimal (more parsimonious)
best_params <- select_by_one_std_err(
tune_results,
metric = "roc_auc",
mtry, min_n # parameters to minimize
)
# Best by percent loss from optimal
best_params <- select_by_pct_loss(
tune_results,
metric = "roc_auc",
limit = 2 # within 2% of best
)
Visualizing Results
# Performance across parameters
autoplot(tune_results)
# Specific parameter effects
autoplot(tune_results, type = "marginals")
# Performance vs parameters
autoplot(tune_results, type = "parameters")
# Collect all metrics
metrics_df <- collect_metrics(tune_results)
Finalizing Model
# Finalize workflow with best parameters
final_wf <- workflow |>
finalize_workflow(best_params)
# Final fit on all training data
final_fit <- final_wf |>
last_fit(data_split)
# Extract metrics on test set
collect_metrics(final_fit)
Parallel Processing
library(doParallel)
# Register parallel backend
cl <- makePSOCKcluster(parallel::detectCores() - 1)
registerDoParallel(cl)
# Tuning will automatically use parallel
tune_results <- workflow |>
tune_grid(
resamples = cv_folds,
grid = 100
)
# Stop cluster
stopCluster(cl)
Using future
library(future)
# Set up parallel plan
plan(multisession, workers = 4)
# With finetune racing
race_results <- workflow |>
tune_race_anova(
resamples = cv_folds,
grid = 100,
control = control_race(parallel_over = "everything")
)
Tuning XGBoost Example
# XGBoost with many tunable parameters
xgb_spec <- boost_tree(
trees = tune(),
tree_depth = tune(),
min_n = tune(),
loss_reduction = tune(),
sample_size = tune(),
mtry = tune(),
learn_rate = tune()
) |>
set_engine("xgboost") |>
set_mode("classification")
# Define parameter grid
xgb_grid <- grid_latin_hypercube(
trees(range = c(100, 1500)),
tree_depth(range = c(3, 15)),
min_n(range = c(2, 30)),
loss_reduction(),
sample_prop(range = c(0.5, 1)),
finalize(mtry(), train_data),
learn_rate(range = c(-3, -1)),
size = 50
)
# Use racing for efficiency
xgb_results <- workflow(rec, xgb_spec) |>
tune_race_anova(
resamples = cv_folds,
grid = xgb_grid,
metrics = metric_set(roc_auc)
)
Control Options Summary
| Function | Key Control Options |
|---|---|
tune_grid |
save_pred, verbose, allow_par |
tune_bayes |
no_improve, uncertain, objective |
tune_race_* |
burn_in, num_ties, alpha |
tune_sim_anneal |
cooling_coef, restart |
Best Practices
- Start with grid search to understand parameter space
- Use racing methods for large grids (>50 combinations)
- Use Bayesian optimization for expensive models
- Always set a seed for reproducibility
- Use stratified resampling for imbalanced outcomes
- Consider
select_by_one_std_errfor simpler models - Monitor for overfitting during iterative search