import functools
import inspect
from abc import ABC
from abc import abstractmethod
from copy import deepcopy
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from typing import Sequence
from typing import Union
import numpy as np
import pandas as pd
from etna.core.mixins import BaseMixin
from etna.datasets.tsdataset import TSDataset
from etna.loggers import tslogger
# TODO: make PyCharm see signature of decorated method
[docs]def log_decorator(f):
"""Add logging for method of the model."""
patch_dict = {"function": f.__name__, "line": inspect.getsourcelines(f)[1], "name": inspect.getmodule(f).__name__}
@functools.wraps(f)
def wrapper(self, *args, **kwargs):
tslogger.log(f"Calling method {f.__name__} of {self.__class__.__name__}", **patch_dict)
result = f(self, *args, **kwargs)
return result
return wrapper
[docs]class Model(ABC, BaseMixin):
"""Class for holding specific models - autoregression and simple regressions."""
def __init__(self):
self._models = None
[docs] @abstractmethod
def fit(self, ts: TSDataset) -> "Model":
"""Fit model.
Parameters
----------
ts:
Dataframe with features
Returns
-------
:
Model after fit
"""
pass
[docs] @abstractmethod
def forecast(
self, ts: TSDataset, prediction_interval: bool = False, quantiles: Sequence[float] = (0.025, 0.975)
) -> TSDataset:
"""Make predictions.
Parameters
----------
ts:
Dataframe with features
prediction_interval:
If True returns prediction interval for forecast
quantiles:
Levels of prediction distribution. By default 2.5% and 97.5% taken to form a 95% prediction interval
Returns
-------
TSDataset
Models result
"""
pass
@staticmethod
def _forecast_segment(model, segment: Union[str, List[str]], ts: TSDataset) -> pd.DataFrame:
segment_features = ts[:, segment, :]
segment_features = segment_features.droplevel("segment", axis=1)
segment_features = segment_features.reset_index()
dates = segment_features["timestamp"]
dates.reset_index(drop=True, inplace=True)
segment_predict = model.predict(df=segment_features)
segment_predict = pd.DataFrame({"target": segment_predict})
segment_predict["segment"] = segment
segment_predict["timestamp"] = dates
return segment_predict
[docs]class FitAbstractModel(ABC):
"""Interface for model with fit method."""
[docs] @abstractmethod
def fit(self, ts: TSDataset) -> "FitAbstractModel":
"""Fit model.
Parameters
----------
ts:
Dataset with features
Returns
-------
:
Model after fit
"""
pass
[docs] @abstractmethod
def get_model(self) -> Union[Any, Dict[str, Any]]:
"""Get internal model/models that are used inside etna class.
Internal model is a model that is used inside etna to forecast segments,
e.g. :py:class:`catboost.CatBoostRegressor` or :py:class:`sklearn.linear_model.Ridge`.
Returns
-------
:
The result can be of two types:
* if model is multi-segment, then the result is internal model
* if model is per-segment, then the result is dictionary where key is segment and value is internal model
"""
pass
[docs]class ForecastAbstractModel(ABC):
"""Interface for model with forecast method."""
[docs] @abstractmethod
def forecast(self, ts: TSDataset) -> TSDataset:
"""Make predictions.
Parameters
----------
ts:
Dataset with features
Returns
-------
:
Dataset with predictions
"""
pass
[docs]class PredictIntervalAbstractModel(ABC):
"""Interface for model with forecast method that creates prediction interval."""
[docs] @abstractmethod
def forecast(
self, ts: TSDataset, prediction_interval: bool = False, quantiles: Sequence[float] = (0.025, 0.975)
) -> TSDataset:
"""Make predictions.
Parameters
----------
ts:
Dataset with features
prediction_interval:
If True returns prediction interval for forecast
quantiles:
Levels of prediction distribution. By default 2.5% and 97.5% are taken to form a 95% prediction interval
Returns
-------
:
Dataset with predictions
"""
pass
[docs]class PerSegmentBaseModel(FitAbstractModel, BaseMixin):
"""Base class for holding specific models for per-segment prediction."""
def __init__(self, base_model: Any):
"""
Init PerSegmentBaseModel.
Parameters
----------
base_model:
Internal model which will be used to forecast segments, expected to have fit/predict interface
"""
self._base_model = base_model
self._models: Optional[Dict[str, Any]] = None
[docs] @log_decorator
def fit(self, ts: TSDataset) -> "PerSegmentBaseModel":
"""Fit model.
Parameters
----------
ts:
Dataset with features
Returns
-------
:
Model after fit
"""
self._models = {}
for segment in ts.segments:
self._models[segment] = deepcopy(self._base_model)
for segment, model in self._models.items():
segment_features = ts[:, segment, :]
segment_features = segment_features.dropna() # TODO: https://github.com/tinkoff-ai/etna/issues/557
segment_features = segment_features.droplevel("segment", axis=1)
segment_features = segment_features.reset_index()
model.fit(df=segment_features, regressors=ts.regressors)
return self
def _get_model(self) -> Dict[str, Any]:
"""Get internal etna base models that are used inside etna class.
Returns
-------
:
dictionary where key is segment and value is internal model
"""
if self._models is None:
raise ValueError("Can not get the dict with base models, the model is not fitted!")
return self._models
[docs] def get_model(self) -> Dict[str, Any]:
"""Get internal models that are used inside etna class.
Internal model is a model that is used inside etna to forecast segments,
e.g. :py:class:`catboost.CatBoostRegressor` or :py:class:`sklearn.linear_model.Ridge`.
Returns
-------
:
dictionary where key is segment and value is internal model
"""
internal_models = {}
for segment, base_model in self._get_model().items():
if not hasattr(base_model, "get_model"):
raise NotImplementedError(
f"get_model method is not implemented for {self._base_model.__class__.__name__}"
)
internal_models[segment] = base_model.get_model()
return internal_models
@staticmethod
def _forecast_segment(model: Any, segment: str, ts: TSDataset, *args, **kwargs) -> pd.DataFrame:
"""Make predictions for one segment."""
segment_features = ts[:, segment, :]
segment_features = segment_features.droplevel("segment", axis=1)
segment_features = segment_features.reset_index()
dates = segment_features["timestamp"]
dates.reset_index(drop=True, inplace=True)
segment_predict = model.predict(df=segment_features, *args, **kwargs)
if isinstance(segment_predict, np.ndarray):
segment_predict = pd.DataFrame({"target": segment_predict})
segment_predict["segment"] = segment
segment_predict["timestamp"] = dates
return segment_predict
[docs]class PerSegmentModel(PerSegmentBaseModel, ForecastAbstractModel):
"""Class for holding specific models for per-segment prediction."""
def __init__(self, base_model: Any):
"""
Init PerSegmentBaseModel.
Parameters
----------
base_model:
Internal model which will be used to forecast segments, expected to have fit/predict interface
"""
super().__init__(base_model=base_model)
[docs] @log_decorator
def forecast(self, ts: TSDataset) -> TSDataset:
"""Make predictions.
Parameters
----------
ts:
Dataframe with features
Returns
-------
:
Dataset with predictions
"""
result_list = list()
for segment, model in self._get_model().items():
segment_predict = self._forecast_segment(model=model, segment=segment, ts=ts)
result_list.append(segment_predict)
result_df = pd.concat(result_list, ignore_index=True)
result_df = result_df.set_index(["timestamp", "segment"])
df = ts.to_pandas(flatten=True)
df = df.set_index(["timestamp", "segment"])
df = df.combine_first(result_df).reset_index()
df = TSDataset.to_dataset(df)
ts.df = df
ts.inverse_transform()
return ts
[docs]class PerSegmentPredictionIntervalModel(PerSegmentBaseModel, PredictIntervalAbstractModel):
"""Class for holding specific models for per-segment prediction which are able to build prediction intervals."""
def __init__(self, base_model: Any):
"""
Init PerSegmentPredictionIntervalModel.
Parameters
----------
base_model:
Internal model which will be used to forecast segments, expected to have fit/predict interface
"""
super().__init__(base_model=base_model)
[docs] @log_decorator
def forecast(
self, ts: TSDataset, prediction_interval: bool = False, quantiles: Sequence[float] = (0.025, 0.975)
) -> TSDataset:
"""Make predictions.
Parameters
----------
ts:
Dataset with features
prediction_interval:
If True returns prediction interval for forecast
quantiles:
Levels of prediction distribution. By default 2.5% and 97.5% are taken to form a 95% prediction interval
Returns
-------
:
Dataset with predictions
"""
result_list = list()
for segment, model in self._get_model().items():
segment_predict = self._forecast_segment(
model=model, segment=segment, ts=ts, prediction_interval=prediction_interval, quantiles=quantiles
)
result_list.append(segment_predict)
result_df = pd.concat(result_list, ignore_index=True)
result_df = result_df.set_index(["timestamp", "segment"])
df = ts.to_pandas(flatten=True)
df = df.set_index(["timestamp", "segment"])
df = df.combine_first(result_df).reset_index()
df = TSDataset.to_dataset(df)
ts.df = df
ts.inverse_transform()
return ts
[docs]class MultiSegmentModel(FitAbstractModel, ForecastAbstractModel, BaseMixin):
"""Class for holding specific models for per-segment prediction."""
def __init__(self, base_model: Any):
"""
Init MultiSegmentModel.
Parameters
----------
base_model:
Internal model which will be used to forecast segments, expected to have fit/predict interface
"""
self._base_model = base_model
[docs] @log_decorator
def fit(self, ts: TSDataset) -> "MultiSegmentModel":
"""Fit model.
Parameters
----------
ts:
Dataset with features
Returns
-------
:
Model after fit
"""
df = ts.to_pandas(flatten=True)
df = df.dropna() # TODO: https://github.com/tinkoff-ai/etna/issues/557
df = df.drop(columns="segment")
self._base_model.fit(df=df, regressors=ts.regressors)
return self
[docs] @log_decorator
def forecast(self, ts: TSDataset) -> TSDataset:
"""Make predictions.
Parameters
----------
ts:
Dataset with features
Returns
-------
:
Dataset with predictions
"""
horizon = len(ts.df)
x = ts.to_pandas(flatten=True).drop(["segment"], axis=1)
y = self._base_model.predict(x).reshape(-1, horizon).T
ts.loc[:, pd.IndexSlice[:, "target"]] = y
ts.inverse_transform()
return ts
[docs] def get_model(self) -> Any:
"""Get internal model that is used inside etna class.
Internal model is a model that is used inside etna to forecast segments,
e.g. :py:class:`catboost.CatBoostRegressor` or :py:class:`sklearn.linear_model.Ridge`.
Returns
-------
:
Internal model
"""
if not hasattr(self._base_model, "get_model"):
raise NotImplementedError(f"get_model method is not implemented for {self._base_model.__class__.__name__}")
return self._base_model.get_model()
[docs]class BaseAdapter(ABC):
"""Base class for models adapter."""
[docs] @abstractmethod
def get_model(self) -> Any:
"""Get internal model that is used inside etna class.
Internal model is a model that is used inside etna to forecast segments,
e.g. :py:class:`catboost.CatBoostRegressor` or :py:class:`sklearn.linear_model.Ridge`.
Returns
-------
:
Internal model
"""
pass
BaseModel = Union[PerSegmentModel, PerSegmentPredictionIntervalModel, MultiSegmentModel]