Source code for etna.commands.backtest_command

from pathlib import Path
from typing import Any
from typing import Dict
from typing import Optional

import hydra_slayer
import pandas as pd
import typer
from omegaconf import OmegaConf

from etna.datasets import TSDataset
from etna.pipeline import Pipeline


[docs]def backtest(
    config_path: Path = typer.Argument(..., help="path to yaml config with desired pipeline"),
    backtest_config_path: Path = typer.Argument(..., help="path to backtest config file"),
    target_path: Path = typer.Argument(..., help="path to csv with data to forecast"),
    freq: str = typer.Argument(..., help="frequency of timestamp in files in pandas format"),
    output_path: Path = typer.Argument(..., help="where to save forecast"),
    exog_path: Optional[Path] = typer.Argument(default=None, help="path to csv with exog data"),
):
    """Command to run backtest with etna without coding.

    Expected format of csv with target timeseries:

    \b
    =============  ===========  ==========
      timestamp      segment      target
    =============  ===========  ==========
    2020-01-01     segment_1         1
    2020-01-02     segment_1         2
    2020-01-03     segment_1         3
    2020-01-04     segment_1         4
    ...
    2020-01-10     segment_2        10
    2020-01-11     segment_2        20
    =============  ===========  ==========

    Expected format of csv with exogenous timeseries:

    \b
    =============  ===========  ===============  ===============
      timestamp      segment      regressor_1      regressor_2
    =============  ===========  ===============  ===============
    2020-01-01     segment_1          11               12
    2020-01-02     segment_1          22               13
    2020-01-03     segment_1          31               14
    2020-01-04     segment_1          42               15
    ...
    2020-02-10     segment_2         101               61
    2020-02-11     segment_2         205               54
    =============  ===========  ===============  ===============
    """
    pipeline_configs = OmegaConf.to_object(OmegaConf.load(config_path))
    backtest_configs = OmegaConf.to_object(OmegaConf.load(backtest_config_path))

    df_timeseries = pd.read_csv(target_path, parse_dates=["timestamp"])

    df_timeseries = TSDataset.to_dataset(df_timeseries)

    df_exog = None
    if exog_path:
        df_exog = pd.read_csv(exog_path, parse_dates=["timestamp"])
        df_exog = TSDataset.to_dataset(df_exog)

    tsdataset = TSDataset(df=df_timeseries, freq=freq, df_exog=df_exog)

    pipeline: Pipeline = hydra_slayer.get_from_params(**pipeline_configs)
    backtest_configs_hydra_slayer: Dict[str, Any] = hydra_slayer.get_from_params(**backtest_configs)

    metrics, forecast, info = pipeline.backtest(ts=tsdataset, **backtest_configs_hydra_slayer)

    (metrics.to_csv(output_path / "metrics.csv", index=False))
    (TSDataset.to_flatten(forecast).to_csv(output_path / "forecast.csv", index=False))
    (info.to_csv(output_path / "info.csv", index=False))


if __name__ == "__main__":
    typer.run(backtest)