r-ml-frameworks

star 5

R machine learning frameworks. Use for unified ML workflows with tidymodels, caret, mlr3, and h2o.

LeoLin990405 By LeoLin990405 schedule Updated 1/30/2026

name: r-ml-frameworks description: R machine learning frameworks. Use for unified ML workflows with tidymodels, caret, mlr3, and h2o.

R ML Frameworks

Unified machine learning workflows.

tidymodels

library(tidymodels)

# 1. Split data
set.seed(123)
split <- initial_split(df, prop = 0.8, strata = target)
train <- training(split)
test <- testing(split)

# 2. Create recipe (preprocessing)
recipe <- recipe(target ~ ., data = train) %>%
  step_normalize(all_numeric_predictors()) %>%
  step_dummy(all_nominal_predictors()) %>%
  step_zv(all_predictors()) %>%
  step_impute_median(all_numeric_predictors())

# 3. Specify model
model_spec <- rand_forest(
  mtry = tune(),
  trees = 500,
  min_n = tune()
) %>%
  set_engine("ranger") %>%
  set_mode("classification")

# 4. Create workflow
wf <- workflow() %>%
  add_recipe(recipe) %>%
  add_model(model_spec)

# 5. Cross-validation
folds <- vfold_cv(train, v = 5, strata = target)

# 6. Tune hyperparameters
tune_results <- tune_grid(
  wf,
  resamples = folds,
  grid = 20,
  metrics = metric_set(roc_auc, accuracy)
)

# 7. Select best model
best_params <- select_best(tune_results, metric = "roc_auc")
final_wf <- finalize_workflow(wf, best_params)

# 8. Final fit
final_fit <- last_fit(final_wf, split)
collect_metrics(final_fit)

# 9. Predictions
predictions <- predict(final_fit$.workflow[[1]], test)

caret

library(caret)

# Train control
ctrl <- trainControl(
  method = "cv",
  number = 5,
  classProbs = TRUE,
  summaryFunction = twoClassSummary
)

# Train model
model <- train(
  target ~ .,
  data = train,
  method = "rf",
  trControl = ctrl,
  tuneLength = 10,
  metric = "ROC"
)

# Results
print(model)
plot(model)

# Predictions
pred <- predict(model, test)
pred_prob <- predict(model, test, type = "prob")

# Confusion matrix
confusionMatrix(pred, test$target)

# Variable importance
varImp(model)

mlr3

library(mlr3)
library(mlr3learners)
library(mlr3tuning)

# Task
task <- TaskClassif$new(id = "my_task", backend = df, target = "target")

# Learner
learner <- lrn("classif.ranger", predict_type = "prob")

# Resampling
resampling <- rsmp("cv", folds = 5)

# Benchmark
design <- benchmark_grid(
  tasks = task,
  learners = list(
    lrn("classif.ranger"),
    lrn("classif.xgboost"),
    lrn("classif.log_reg")
  ),
  resamplings = resampling
)
bmr <- benchmark(design)
bmr$aggregate(msr("classif.auc"))

# Tuning
learner <- lrn("classif.ranger",
  mtry = to_tune(1, 10),
  num.trees = to_tune(100, 500)
)
instance <- tune(
  tuner = tnr("grid_search"),
  task = task,
  learner = learner,
  resampling = resampling,
  measure = msr("classif.auc")
)

h2o

library(h2o)
h2o.init()

# Convert to H2O frame
train_h2o <- as.h2o(train)
test_h2o <- as.h2o(test)

# AutoML
aml <- h2o.automl(
  x = predictors,
  y = "target",
  training_frame = train_h2o,
  max_runtime_secs = 300,
  nfolds = 5
)

# Best model
leader <- aml@leader
h2o.performance(leader, test_h2o)

# Predictions
pred <- h2o.predict(leader, test_h2o)

# Shutdown
h2o.shutdown(prompt = FALSE)

Model Comparison

Framework Strengths Use Case
tidymodels Tidy syntax, modern General ML
caret Simple, many models Quick prototyping
mlr3 Flexible, extensible Research
h2o AutoML, scalable Production
Install via CLI
npx skills add https://github.com/LeoLin990405/r-analytics-skill --skill r-ml-frameworks
Repository Details
star Stars 5
call_split Forks 0
navigation Branch main
article Path SKILL.md
More from Creator
LeoLin990405
LeoLin990405 Explore all skills →