Optuna — Bayesian Hyperparameter Optimization

Optuna uses Bayesian optimization with Tree Parzen Estimators (TPE) to learn from past trials: if n_estimators=200 with lr=0.05 worked well, try nearby values in subsequent trials. This intelligent search finds better parameters with fewer evaluations than random search. Optuna supports early pruning (stop bad trials halfway), parallel execution, and visualization of the optimization landscape.

25 min•By Priygop Team•Updated 2026

Optuna with Pipeline Integration and Pruning

import optuna
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold, train_test_split
from sklearn.datasets import load_breast_cancer
import matplotlib.pyplot as plt

optuna.logging.set_verbosity(optuna.logging.WARNING)

cancer = load_breast_cancer()
X, y = cancer.data, cancer.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# OPTUNA OBJECTIVE FUNCTION
def objective(trial: optuna.Trial) -> float:
    params = {
        "n_estimators":     trial.suggest_int("n_estimators", 50, 500),
        "learning_rate":    trial.suggest_float("learning_rate", 0.005, 0.5, log=True),
        "max_depth":        trial.suggest_int("max_depth", 2, 8),
        "subsample":        trial.suggest_float("subsample", 0.6, 1.0),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 20),
        "max_features":     trial.suggest_float("max_features", 0.4, 1.0),
        "random_state":     42,
    }
    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("model",  GradientBoostingClassifier(**params)),
    ])
    auc_scores = cross_val_score(pipe, X_train, y_train, cv=cv, scoring="roc_auc", n_jobs=-1)
    return auc_scores.mean()

# RUN STUDY (TPE sampler = Bayesian)
study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=42),
    pruner=optuna.pruners.MedianPruner(n_startup_trials=10, n_warmup_steps=5),
)
study.optimize(objective, n_trials=50, show_progress_bar=True)

print(f"\nBest trial: #{study.best_trial.number}")
print(f"Best CV AUC: {study.best_value:.4f}")
print(f"Best params: {study.best_params}")

# COMPARE OPTUNA vs OTHER METHODS
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import loguniform, randint, uniform

rand_search = RandomizedSearchCV(
    Pipeline([("scaler", StandardScaler()), ("model", GradientBoostingClassifier(random_state=42))]),
    {"model__n_estimators": randint(50, 500), "model__learning_rate": loguniform(0.005, 0.5),
     "model__max_depth": randint(2, 8), "model__subsample": uniform(0.6, 0.4)},
    n_iter=50, cv=5, scoring="roc_auc", n_jobs=-1, random_state=42,
)
rand_search.fit(X_train, y_train)

print(f"\nComparison (50 trials each, 5-fold CV):")
print(f"  RandomizedSearchCV: {rand_search.best_score_:.4f}")
print(f"  Optuna (TPE):       {study.best_value:.4f}  (typically better in fewer trials)")

# TRAIN FINAL MODEL WITH OPTUNA PARAMS
best_pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("model",  GradientBoostingClassifier(**study.best_params, random_state=42)),
])
best_pipe.fit(X_train, y_train)
test_auc = cross_val_score(best_pipe, X_test, y_test, cv=5, scoring="roc_auc").mean()
print(f"  Test AUC: {test_auc:.4f}")

# VISUALIZE OPTIMIZATION HISTORY
try:
    fig = optuna.visualization.matplotlib.plot_optimization_history(study)
    plt.title("Optuna Optimization History -- AUC improves as trials progress")
    plt.tight_layout()
    plt.savefig("optuna_history.png", dpi=100, bbox_inches="tight")
    plt.show()
except Exception:
    # matplotlib backend issues -- plot manually
    aucs = [t.value for t in study.trials]
    best_so_far = np.maximum.accumulate(aucs)
    plt.figure(figsize=(9, 4))
    plt.plot(aucs, "o", alpha=0.4, color="steelblue", label="Trial AUC")
    plt.plot(best_so_far, "r-", linewidth=2, label="Best so far")
    plt.xlabel("Trial")
    plt.ylabel("AUC-ROC")
    plt.title("Optuna Optimization History")
    plt.legend()
    plt.tight_layout()
    plt.savefig("optuna_history.png", dpi=100, bbox_inches="tight")
    plt.show()

Tip

Practice Optuna Bayesian Hyperparameter Optimization in small, isolated examples before integrating into larger projects. Breaking concepts into small experiments builds genuine understanding faster than reading alone.

Diagram

Loading diagram…

Optuna = best tool.

Practice Task

Note

Practice Task — (1) Write a working example of Optuna Bayesian Hyperparameter Optimization from scratch without looking at notes. (2) Modify it to handle an edge case (empty input, null value, or error state). (3) Share your solution in the Priygop community for feedback.

Quick Quiz

Common Mistake

Warning

A common mistake with Optuna Bayesian Hyperparameter Optimization is skipping edge case testing — empty inputs, null values, and unexpected data types. Always validate boundary conditions to write robust, production-ready ml code.

Topics in This Module