from pathlib import Path
from typing import Any
from typing import Dict
from typing import Optional
import hydra_slayer
import pandas as pd
import typer
from omegaconf import OmegaConf
from etna.datasets import TSDataset
from etna.pipeline import Pipeline
[docs]def backtest(
config_path: Path = typer.Argument(..., help="path to yaml config with desired pipeline"),
backtest_config_path: Path = typer.Argument(..., help="path to backtest config file"),
target_path: Path = typer.Argument(..., help="path to csv with data to forecast"),
freq: str = typer.Argument(..., help="frequency of timestamp in files in pandas format"),
output_path: Path = typer.Argument(..., help="where to save forecast"),
exog_path: Optional[Path] = typer.Argument(default=None, help="path to csv with exog data"),
):
"""Command to run backtest with etna without coding.
Expected format of csv with target timeseries:
\b
============= =========== ==========
timestamp segment target
============= =========== ==========
2020-01-01 segment_1 1
2020-01-02 segment_1 2
2020-01-03 segment_1 3
2020-01-04 segment_1 4
...
2020-01-10 segment_2 10
2020-01-11 segment_2 20
============= =========== ==========
Expected format of csv with exogenous timeseries:
\b
============= =========== =============== ===============
timestamp segment regressor_1 regressor_2
============= =========== =============== ===============
2020-01-01 segment_1 11 12
2020-01-02 segment_1 22 13
2020-01-03 segment_1 31 14
2020-01-04 segment_1 42 15
...
2020-02-10 segment_2 101 61
2020-02-11 segment_2 205 54
============= =========== =============== ===============
"""
pipeline_configs = OmegaConf.to_object(OmegaConf.load(config_path))
backtest_configs = OmegaConf.to_object(OmegaConf.load(backtest_config_path))
df_timeseries = pd.read_csv(target_path, parse_dates=["timestamp"])
df_timeseries = TSDataset.to_dataset(df_timeseries)
df_exog = None
if exog_path:
df_exog = pd.read_csv(exog_path, parse_dates=["timestamp"])
df_exog = TSDataset.to_dataset(df_exog)
tsdataset = TSDataset(df=df_timeseries, freq=freq, df_exog=df_exog)
pipeline: Pipeline = hydra_slayer.get_from_params(**pipeline_configs)
backtest_configs_hydra_slayer: Dict[str, Any] = hydra_slayer.get_from_params(**backtest_configs)
metrics, forecast, info = pipeline.backtest(ts=tsdataset, **backtest_configs_hydra_slayer)
(metrics.to_csv(output_path / "metrics.csv", index=False))
(TSDataset.to_flatten(forecast).to_csv(output_path / "forecast.csv", index=False))
(info.to_csv(output_path / "info.csv", index=False))
if __name__ == "__main__":
typer.run(backtest)