Source code for etna.transforms.decomposition.detrend
import numpy as np
import pandas as pd
from sklearn.base import RegressorMixin
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import TheilSenRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from etna.transforms.base import PerSegmentWrapper
from etna.transforms.base import Transform
from etna.transforms.utils import match_target_quantiles
[docs]class _OneSegmentLinearTrendBaseTransform(Transform):
"""LinearTrendBaseTransform is a base class that implements trend subtraction and reconstruction feature."""
def __init__(self, in_column: str, regressor: RegressorMixin, poly_degree: int = 1):
"""
Create instance of _OneSegmentLinearTrendBaseTransform.
Parameters
----------
in_column:
name of processed column
regressor:
instance of sklearn :py:class`sklearn.base.RegressorMixin` to predict trend
poly_degree:
degree of polynomial to fit trend on
"""
self.in_column = in_column
self.poly_degree = poly_degree
self._pipeline = Pipeline(
[("polynomial", PolynomialFeatures(degree=self.poly_degree, include_bias=False)), ("regressor", regressor)]
)
# verification that this variable is fitted isn't needed because this class isn't used by the user
self._x_median = None
@staticmethod
def _get_x(df) -> np.ndarray:
series_len = len(df)
x = df.index.to_series()
if isinstance(type(x.dtype), pd.Timestamp):
raise ValueError("Your timestamp column has wrong format. Need np.datetime64 or datetime.datetime")
x = x.apply(lambda ts: ts.timestamp())
x = x.to_numpy().reshape(series_len, 1)
return x
[docs] def fit(self, df: pd.DataFrame) -> "_OneSegmentLinearTrendBaseTransform":
"""
Fit regression detrend_model with data from df.
Parameters
----------
df:
data that regressor should be trained with
Returns
-------
_OneSegmentLinearTrendBaseTransform
instance with trained regressor
"""
df = df.dropna(subset=[self.in_column])
x = self._get_x(df)
self._x_median = np.median(x)
x -= self._x_median
y = df[self.in_column].tolist()
self._pipeline.fit(x, y)
return self
[docs] def transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Transform data from df: subtract linear trend found by regressor.
Parameters
----------
df:
data to subtract trend from
Returns
-------
pd.DataFrame
residue after trend subtraction
"""
result = df.copy()
x = self._get_x(df)
x -= self._x_median
y = df[self.in_column].values
trend = self._pipeline.predict(x)
no_trend_timeseries = y - trend
result[self.in_column] = no_trend_timeseries
return result
[docs] def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Fit regression detrend_model with data from df and subtract the trend from df.
Parameters
----------
df:
data to train regressor and transform
Returns
-------
pd.DataFrame
residue after trend subtraction
"""
return self.fit(df).transform(df)
[docs] def inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Inverse transformation for trend subtraction: add trend to prediction.
Parameters
----------
df:
data to transform
Returns
-------
pd.DataFrame
data with reconstructed trend
"""
result = df.copy()
x = self._get_x(df)
x -= self._x_median
y = df[self.in_column].values
trend = self._pipeline.predict(x)
add_trend_timeseries = y + trend
result[self.in_column] = add_trend_timeseries
if self.in_column == "target":
quantiles = match_target_quantiles(set(result.columns))
for quantile_column_nm in quantiles:
result.loc[:, quantile_column_nm] += trend
return result
[docs]class LinearTrendTransform(PerSegmentWrapper):
"""
Transform that uses :py:class:`sklearn.linear_model.LinearRegression` to find linear or polynomial trend in data.
Warning
-------
This transform can suffer from look-ahead bias. For transforming data at some timestamp
it uses information from the whole train part.
"""
def __init__(self, in_column: str, poly_degree: int = 1, **regression_params):
"""Create instance of LinearTrendTransform.
Parameters
----------
in_column:
name of processed column
poly_degree:
degree of polynomial to fit trend on
regression_params:
params that should be used to init :py:class:`sklearn.linear_model.LinearRegression`
"""
self.in_column = in_column
self.poly_degree = poly_degree
self.regression_params = regression_params
super().__init__(
transform=_OneSegmentLinearTrendBaseTransform(
in_column=self.in_column,
regressor=LinearRegression(**self.regression_params),
poly_degree=self.poly_degree,
)
)
[docs]class TheilSenTrendTransform(PerSegmentWrapper):
"""
Transform that uses :py:class:`sklearn.linear_model.TheilSenRegressor` to find linear or polynomial trend in data.
Warning
-------
This transform can suffer from look-ahead bias. For transforming data at some timestamp
it uses information from the whole train part.
Notes
-----
Setting parameter ``n_subsamples`` manually might cause the error. It should be at least the number
of features (plus 1 if ``fit_intercept=True``) and the number of samples in the shortest segment as a maximum.
"""
def __init__(self, in_column: str, poly_degree: int = 1, **regression_params):
"""Create instance of TheilSenTrendTransform.
Parameters
----------
in_column:
name of processed column
poly_degree:
degree of polynomial to fit trend on
regression_params:
params that should be used to init :py:class:`sklearn.linear_model.TheilSenRegressor`
"""
self.in_column = in_column
self.poly_degree = poly_degree
self.regression_params = regression_params
super().__init__(
transform=_OneSegmentLinearTrendBaseTransform(
in_column=self.in_column,
regressor=TheilSenRegressor(**self.regression_params),
poly_degree=self.poly_degree,
)
)