Optimizes forecaster hyperparameters using grid search, random search, or Bayesian search (Optuna). Covers single-series and multi-series search, cross-validation configuration, and search space definition. Use when the user wants to find the best model configuration.
See references/search-parameters.md for
the complete parameter comparison across all 9 search functions, function
routing by forecaster type, and lags_grid / search_space / param_grid
usage details.
Use hyperparameter search after establishing a baseline forecaster to improve prediction accuracy. Skforecast supports three strategies:
| Strategy | When to Use | Speed |
|---|---|---|
| Bayesian Search | Recommended default. Smart exploration via Optuna | Fastest to converge |
| Random Search | Large parameter space, limited compute budget | Medium |
| Grid Search | Small parameter space, exhaustive exploration |
| Slowest |
Always prefer Bayesian search as the default strategy. It uses Optuna to intelligently explore the search space.
from skforecast.recursive import ForecasterRecursive
from skforecast.model_selection import bayesian_search_forecaster, TimeSeriesFold
from lightgbm import LGBMRegressor
forecaster = ForecasterRecursive(
estimator=LGBMRegressor(random_state=123),
lags=24,
)
cv = TimeSeriesFold(
steps=12,
initial_train_size=len(data) - 100,
refit=False,
)
# Define search space as a function — lags CAN be included here
def search_space(trial):
return {
'lags': trial.suggest_categorical('lags', [12, 24, [1, 2, 3, 23, 24]]),
'n_estimators': trial.suggest_int('n_estimators', 50, 500),
'max_depth': trial.suggest_int('max_depth', 3, 15),
'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True),
}
# n_trials=20 is the default. Increase for better results (50-200 recommended).
results, best_trial = bayesian_search_forecaster(
forecaster=forecaster,
y=data['target'],
exog=exog,
cv=cv,
search_space=search_space,
metric='mean_absolute_error',
n_trials=20,
random_state=123,
return_best=True, # Automatically updates forecaster with best params
n_jobs='auto',
show_progress=True,
output_file='search_results.csv', # Save results incrementally
)
# results is a DataFrame sorted by metric (best first)
from skforecast.model_selection import grid_search_forecaster
# Different lag configurations to try
lags_grid = [3, 10, 24, [1, 2, 3, 23, 24]]
param_grid = {
'n_estimators': [50, 100, 200],
'max_depth': [5, 10, 15],
'learning_rate': [0.01, 0.1],
}
results = grid_search_forecaster(
forecaster=forecaster,
y=data['target'],
exog=exog,
cv=cv,
lags_grid=lags_grid,
param_grid=param_grid,
metric='mean_absolute_error',
return_best=True,
n_jobs='auto',
show_progress=True,
)
from skforecast.model_selection import random_search_forecaster
# Note: uses param_distributions (not param_grid) and n_iter
param_distributions = {
'n_estimators': [50, 100, 200, 500],
'max_depth': [3, 5, 10, 15],
'learning_rate': [0.01, 0.05, 0.1, 0.3],
}
results = random_search_forecaster(
forecaster=forecaster,
y=data['target'],
exog=exog,
cv=cv,
lags_grid=lags_grid,
param_distributions=param_distributions,
n_iter=10, # Number of random parameter combinations to try
random_state=123,
metric='mean_absolute_error',
return_best=True,
n_jobs='auto',
show_progress=True,
)
from skforecast.recursive import ForecasterRecursiveMultiSeries
from skforecast.model_selection import bayesian_search_forecaster_multiseries
forecaster = ForecasterRecursiveMultiSeries(
estimator=LGBMRegressor(random_state=123),
lags=24,
encoding='ordinal',
)
cv = TimeSeriesFold(
steps=12,
initial_train_size=len(series) - 100,
refit=False,
)
results, best_trial = bayesian_search_forecaster_multiseries(
forecaster=forecaster,
series=series,
exog=exog,
cv=cv,
search_space=search_space,
metric='mean_absolute_error',
aggregate_metric=['weighted_average', 'average', 'pooling'], # Default
levels=None, # None = evaluate all series; or list of series names
n_trials=20,
return_best=True,
n_jobs='auto',
show_progress=True,
)
from skforecast.recursive import ForecasterStats
from skforecast.stats import Arima
from skforecast.model_selection import grid_search_stats
forecaster = ForecasterStats(estimator=Arima(order=(1, 1, 1)))
param_grid = {
'order': [(1, 0, 0), (1, 1, 0), (1, 1, 1), (2, 1, 1)],
'seasonal_order': [(0, 0, 0), (1, 1, 1)],
'm': [12],
}
results = grid_search_stats(
forecaster=forecaster,
y=data['target'],
cv=cv,
param_grid=param_grid,
metric='mean_absolute_error',
return_best=True,
)
from skforecast.model_selection import OneStepAheadFold
# Much faster than TimeSeriesFold — no recursive predictions needed
cv_fast = OneStepAheadFold(
initial_train_size=len(data) - 100,
)
results, best_trial = bayesian_search_forecaster(
forecaster=forecaster,
y=data['target'],
cv=cv_fast,
search_space=search_space,
metric='mean_absolute_error',
n_trials=100,
return_best=True,
)
return_best=True: The forecaster is not updated with the best parameters unless this is True.OneStepAheadFold first for fast screening, then validate the top candidates with TimeSeriesFold.search_space() — this is often the most impactful parameter.