tinkoff-ai · alex-hse-repository · Oct 10, 2022 · Dec 1, 2021 · Dec 1, 2021 · Dec 30, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,7 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 -
 -
 -
--
+- Add wandb sweeps and optuna examples ([#338](https://github.com/tinkoff-ai/etna/pull/338))
 -
 -
 ### Changed

diff --git a/Makefile b/Makefile
@@ -1,13 +1,19 @@
-lint: isort-check black-check flake8-check mypy-check spell-check imported-deps-check notebooks-check
+lint: isort-check black-check flake8-check mypy-check spell-check imported-deps-check notebooks-check isort-check-examples black-check-examples
 
 isort-check:
 	isort --skip etna/libs --sl -c etna/
 	isort --skip etna/libs --sl -c tests/
 
+isort-check-examples:
+	isort --sl -c examples/**/*.py
+
 black-check:
 	black --check etna/
 	black --check tests/
 
+black-check-examples:
+	black --check examples/**/*.py
+
 flake8-check:
 	flake8 --exclude etna/libs etna/
 	flake8 --exclude etna/libs tests/ --select E,W,C,F401,N
@@ -25,6 +31,10 @@ imported-deps-check:
 notebooks-check:
 	black --check examples/*.ipynb
 
+format-examples:
+	isort --sl examples/**/*.py
+	black examples/**/*.py
+
 format:
 	isort --skip etna/libs --sl etna/
 	isort --skip etna/libs --sl tests/

diff --git a/examples/README.md b/examples/README.md
@@ -49,3 +49,7 @@ We have prepared a set of tutorials for an easy introduction:
 
 #### 08. [Auto](https://github.com/tinkoff-ai/etna/tree/master/examples/auto.py)
 - Auto pipeline search
+
+#### 09. Hyperparameter search
+- [Optuna](https://github.com/tinkoff-ai/etna/tree/master/examples/optuna)
+- [WandB sweeps](https://github.com/tinkoff-ai/etna/tree/master/examples/wandb/sweeps) example based on [Hydra](https://hydra.cc/)
diff --git a/examples/optuna/.gitignore b/examples/optuna/.gitignore
@@ -0,0 +1,2 @@
+wandb
+optuna.db
diff --git a/examples/optuna/README.md b/examples/optuna/README.md
@@ -0,0 +1,15 @@
+# Optuna TPE hyperparameter tuning example
+
+Define your pipeline and hyperparameters in `optuna_example.py`, in the example we will optimize number of `iterations`, `depth` and number of `lags` for `CatBoostMultiSegmentModel`
+
+Run optimization:
+
+```bash
+    python optuna_example.py --n-trials=100 --metric-name=MAE
+```
+
+You can use [optuna dashboard](https://github.com/optuna/optuna-dashboard) to visualize the results:
+
+```bash
+    optuna-dashboard sqlite:///<storage-name>
+```
diff --git a/examples/optuna/optuna_example.py b/examples/optuna/optuna_example.py
@@ -0,0 +1,111 @@
+import random
+from functools import partial
+from pathlib import Path
+from typing import Optional
+
+import numpy as np
+import optuna
+import pandas as pd
+import typer
+
+from etna.datasets import TSDataset
+from etna.datasets import generate_ar_df
+from etna.loggers import WandbLogger
+from etna.loggers import tslogger
+from etna.metrics import MAE
+from etna.metrics import MSE
+from etna.metrics import SMAPE
+from etna.metrics import Sign
+from etna.models import CatBoostModelMultiSegment
+from etna.pipeline import Pipeline
+from etna.transforms import LagTransform
+from etna.transforms import SegmentEncoderTransform
+from etna.transforms import StandardScalerTransform
+
+FILE_PATH = Path(__file__)
+
+app = typer.Typer()
+
+
+def set_seed(seed: int = 42):
+    random.seed(seed)
+    np.random.seed(seed)
+
+
+def init_logger(config: dict, project: str = "wandb-sweeps", tags: Optional[list] = ["test", "sweeps"]):
+    tslogger.loggers = []
+    wblogger = WandbLogger(project=project, tags=tags, config=config)
+    tslogger.add(wblogger)
+
+
+def dataloader(file_path: Path, freq: str = "D") -> TSDataset:
+    df = pd.read_csv(file_path)
+    df = TSDataset.to_dataset(df)
+    ts = TSDataset(df=df, freq=freq)
+    return ts
+
+
+def objective(trial: optuna.Trial, metric_name: str, ts: TSDataset, horizon: int, lags: int, seed: int):
+    """Optuna objective function."""
+
+    # Set seed for reproducibility
+    set_seed(seed)
+
+    # Define model and features
+    pipeline = Pipeline(
+        model=CatBoostModelMultiSegment(
+            iterations=trial.suggest_int("iterations", 10, 100),
+            depth=trial.suggest_int("depth", 1, 12),
+        ),
+        transforms=[
+            StandardScalerTransform("target"),
+            SegmentEncoderTransform(),
+            LagTransform(in_column="target", lags=list(range(horizon, horizon + trial.suggest_int("lags", 1, lags)))),
+        ],
+        horizon=horizon,
+    )
+
+    # Init WandB logger
+    init_logger(pipeline.to_dict())
+
+    # Start backtest
+    metrics, _, _ = pipeline.backtest(ts=ts, metrics=[MAE(), SMAPE(), Sign(), MSE()])
+    return metrics[metric_name].mean()
+
+
+@app.command()
+def run_optuna(
+    horizon: int = 14,
+    metric_name: str = "MAE",
+    storage: str = "sqlite:///optuna.db",
+    study_name: Optional[str] = None,
+    n_trials: int = 200,
+    file_path: Path = FILE_PATH.parents[1] / "data" / "example_dataset.csv",
+    direction: str = "minimize",
+    freq: str = "D",
+    lags: int = 24,
+    seed: int = 11,
+):
+    """
+    Run optuna optimization for CatBoostModelMultiSegment.
+    """
+    # Load data
+    ts = dataloader(file_path, freq=freq)
+
+    # Create Optuna study
+    study = optuna.create_study(
+        storage=storage,
+        study_name=study_name,
+        sampler=optuna.samplers.TPESampler(multivariate=True, group=True),
+        load_if_exists=True,
+        direction=direction,
+    )
+
+    # Run Optuna optimization
+    study.optimize(
+        partial(objective, metric_name=metric_name, ts=ts, horizon=horizon, lags=lags, seed=seed), n_trials=n_trials
+    )
+
+
+if __name__ == "__main__":
+    typer.run(run_optuna)
diff --git a/examples/wandb/sweeps/.gitignore b/examples/wandb/sweeps/.gitignore
@@ -0,0 +1,2 @@
+outputs
+wandb
diff --git a/examples/wandb/sweeps/README.md b/examples/wandb/sweeps/README.md
@@ -1,24 +1,21 @@
-# Using WandB with ETNA library
-
-## Colab example
-
-[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1EBSqqBPaYgLWCRdpC5vMy9RiLBsCEd7I?usp=sharing)  
-
+# Using Weights & Biases with ETNA library
 ![](assets/etna-wandb.png)
 
 [Sweep Dashboard](https://wandb.ai/martins0n/wandb-etna-sweep/sweeps/c7e0r8sq/overview?workspace=user-martins0n)
 
 ## Steps to start
 
-- Define your pipeline and hyperparameters in `pipeline.yaml`, in example we will optimize number of iterations `iterations` and `learning-rate`
+- We will use [Hydra](https://hydra.cc/) to manage our command-line application.
+
+- Define your pipeline and hyperparameters in `config.yaml`, in example we will optimize number of iterations `iterations`, `depth` and number of `lags` for `CatBoostMultiSegmentModel`
 
 - Define WandB sweeps config `sweep.yaml` and push it to cloud:
 
 ```bash
-WANDB_PROJECT=<project_name> WandB sweep sweep.yaml 
+WANDB_PROJECT=<project_name> wandb sweep sweep.yaml 
 ```
 
-- You may change `dataloader` function and add additional parameters for WandB logger like tags for example in `run.py`
+- You may change `dataloader` function and add additional parameters for WandB logger like tags for example in `sweeps_example.py`
 
 - Run WandB agent for hyperparameters optimization start:
 

diff --git a/examples/wandb/sweeps/config.yaml b/examples/wandb/sweeps/config.yaml
@@ -0,0 +1,32 @@
+backtest:
+  metrics:
+  - _target_: etna.metrics.MAE
+  - _target_: etna.metrics.MSE
+  - _target_: etna.metrics.MAPE
+  - _target_: etna.metrics.SMAPE
+  - _target_: etna.metrics.Sign
+
+pipeline:
+  _target_: etna.pipeline.Pipeline
+  horizon: 14
+  model:
+    _target_: etna.models.CatBoostModelMultiSegment
+    iterations: ${iterations}
+    depth: ${depth}
+  transforms:
+    - _target_: etna.transforms.StandardScalerTransform
+      in_column: target
+    - _target_: etna.transforms.SegmentEncoderTransform
+    - _target_: etna.transforms.LagTransform
+      in_column: target
+      lags: ${range:${pipeline.horizon},${sum:${pipeline.horizon},${lags}}}
+
+dataset:
+  file_path: ${oc.env:PWD}/../../data/example_dataset.csv
+  freq: D
+
+iterations: ???
+depth: ???
+lags: ???
+
+seed: 11
diff --git a/examples/wandb/sweeps/pipeline.yaml b/examples/wandb/sweeps/pipeline.yaml
diff --git a/examples/wandb/sweeps/run.py b/examples/wandb/sweeps/run.py
diff --git a/examples/wandb/sweeps/sweep.yaml b/examples/wandb/sweeps/sweep.yaml
@@ -1,18 +1,23 @@
 program:
-  run.py
+  sweeps_example.py
 method: bayes
 parameters:
-  learning-rate:
-    min: 0.0001
-    max: 0.1
   iterations:
     distribution: int_uniform
-    min: 2
-    max: 30
+    min: 10
+    max: 100
+  depth:
+    distribution: int_uniform
+    min: 1
+    max: 12  
+  lags:
+    distribution: int_uniform
+    min: 1
+    max: 24
 metric:
-  name: MAE_median
+  name: MAE_mean
   goal: minimize
 command:
   - python
-  - run.py
-  - ${args}
+  - sweeps_example.py
+  - ${args_no_hyphens}