Source code for etna.transforms.outliers.point_outliers
from typing import Callable
from typing import Dict
from typing import List
from typing import Type
from typing import Union
import pandas as pd
from etna import SETTINGS
from etna.analysis import absolute_difference_distance
from etna.analysis import get_anomalies_density
from etna.analysis import get_anomalies_median
from etna.analysis import get_anomalies_prediction_interval
from etna.datasets import TSDataset
from etna.models import SARIMAXModel
from etna.transforms.outliers.base import OutliersTransform
if SETTINGS.prophet_required:
from etna.models import ProphetModel
[docs]class MedianOutliersTransform(OutliersTransform):
"""Transform that uses :py:func:`~etna.analysis.outliers.median_outliers.get_anomalies_median` to find anomalies in data.
Warning
-------
This transform can suffer from look-ahead bias. For transforming data at some timestamp
it uses information from the whole train part.
"""
def __init__(self, in_column: str, window_size: int = 10, alpha: float = 3):
"""Create instance of MedianOutliersTransform.
Parameters
----------
in_column:
name of processed column
window_size:
number of points in the window
alpha:
coefficient for determining the threshold
"""
self.window_size = window_size
self.alpha = alpha
super().__init__(in_column=in_column)
[docs] def detect_outliers(self, ts: TSDataset) -> Dict[str, List[pd.Timestamp]]:
"""Call :py:func:`~etna.analysis.outliers.median_outliers.get_anomalies_median` function with self parameters.
Parameters
----------
ts:
dataset to process
Returns
-------
:
dict of outliers in format {segment: [outliers_timestamps]}
"""
return get_anomalies_median(ts=ts, in_column=self.in_column, window_size=self.window_size, alpha=self.alpha)
[docs]class DensityOutliersTransform(OutliersTransform):
"""Transform that uses :py:func:`~etna.analysis.outliers.density_outliers.get_anomalies_density` to find anomalies in data.
Warning
-------
This transform can suffer from look-ahead bias. For transforming data at some timestamp
it uses information from the whole train part.
"""
def __init__(
self,
in_column: str,
window_size: int = 15,
distance_coef: float = 3,
n_neighbors: int = 3,
distance_func: Callable[[float, float], float] = absolute_difference_distance,
):
"""Create instance of DensityOutliersTransform.
Parameters
----------
in_column:
name of processed column
window_size:
size of windows to build
distance_coef:
factor for standard deviation that forms distance threshold to determine points are close to each other
n_neighbors:
min number of close neighbors of point not to be outlier
distance_func:
distance function
"""
self.window_size = window_size
self.distance_coef = distance_coef
self.n_neighbors = n_neighbors
self.distance_func = distance_func
super().__init__(in_column=in_column)
[docs] def detect_outliers(self, ts: TSDataset) -> Dict[str, List[pd.Timestamp]]:
"""Call :py:func:`~etna.analysis.outliers.density_outliers.get_anomalies_density` function with self parameters.
Parameters
----------
ts:
dataset to process
Returns
-------
:
dict of outliers in format {segment: [outliers_timestamps]}
"""
return get_anomalies_density(
ts=ts,
in_column=self.in_column,
window_size=self.window_size,
distance_coef=self.distance_coef,
n_neighbors=self.n_neighbors,
distance_func=self.distance_func,
)
[docs]class PredictionIntervalOutliersTransform(OutliersTransform):
"""Transform that uses :py:func:`~etna.analysis.outliers.prediction_interval_outliers.get_anomalies_prediction_interval` to find anomalies in data."""
def __init__(
self,
in_column: str,
model: Union[Type["ProphetModel"], Type["SARIMAXModel"]],
interval_width: float = 0.95,
**model_kwargs,
):
"""Create instance of PredictionIntervalOutliersTransform.
Parameters
----------
in_column:
name of processed column
model:
model for prediction interval estimation
interval_width:
width of the prediction interval
Notes
-----
For not "target" column only column data will be used for learning.
"""
self.model = model
self.interval_width = interval_width
self.model_kwargs = model_kwargs
super().__init__(in_column=in_column)
[docs] def detect_outliers(self, ts: TSDataset) -> Dict[str, List[pd.Timestamp]]:
"""Call :py:func:`~etna.analysis.outliers.prediction_interval_outliers.get_anomalies_prediction_interval` function with self parameters.
Parameters
----------
ts:
dataset to process
Returns
-------
:
dict of outliers in format {segment: [outliers_timestamps]}
"""
return get_anomalies_prediction_interval(
ts=ts, model=self.model, interval_width=self.interval_width, in_column=self.in_column, **self.model_kwargs
)
__all__ = [
"MedianOutliersTransform",
"DensityOutliersTransform",
"PredictionIntervalOutliersTransform",
]