Source code for etna.transforms.timestamp.time_flags
from copy import deepcopy
from typing import Optional
import numpy as np
import pandas as pd
from etna.transforms.base import FutureMixin
from etna.transforms.base import Transform
[docs]class TimeFlagsTransform(Transform, FutureMixin):
"""TimeFlagsTransform is a class that implements extraction of the main time-based features from datetime column."""
def __init__(
self,
minute_in_hour_number: bool = True,
fifteen_minutes_in_hour_number: bool = False,
hour_number: bool = True,
half_hour_number: bool = False,
half_day_number: bool = False,
one_third_day_number: bool = False,
out_column: Optional[str] = None,
):
"""Initialise class attributes.
Parameters
----------
minute_in_hour_number:
if True: add column with minute number to feature dataframe in transform
fifteen_minutes_in_hour_number:
if True: add column with number of fifteen-minute interval within hour with numeration from 0
to feature dataframe in transform
hour_number:
if True: add column with hour number to feature dataframe in transform
half_hour_number:
if True: add column with 0 for the first half of the hour and 1 for the second
to feature dataframe in transform
half_day_number:
if True: add column with 0 for the first half of the day and 1 for the second
to feature dataframe in transform
one_third_day_number:
if True: add column with number of 8-hour interval within day with numeration from 0
to feature dataframe in transform
out_column:
base for the name of created columns;
* if set the final name is '{out_column}_{feature_name}';
* if don't set, name will be ``transform.__repr__()``,
repr will be made for transform that creates exactly this column
Raises
------
ValueError: if feature has invalid initial params
"""
if not any(
[
minute_in_hour_number,
fifteen_minutes_in_hour_number,
hour_number,
half_hour_number,
half_day_number,
one_third_day_number,
]
):
raise ValueError(
f"{type(self).__name__} feature does nothing with given init args configuration, "
f"at least one of minute_in_hour_number, fifteen_minutes_in_hour_number, hour_number, "
f"half_hour_number, half_day_number, one_third_day_number should be True."
)
self.date_column_name = None
self.minute_in_hour_number: bool = minute_in_hour_number
self.fifteen_minutes_in_hour_number: bool = fifteen_minutes_in_hour_number
self.hour_number: bool = hour_number
self.half_hour_number: bool = half_hour_number
self.half_day_number: bool = half_day_number
self.one_third_day_number: bool = one_third_day_number
self.out_column = out_column
# create empty init parameters
self._empty_parameters = dict(
minute_in_hour_number=False,
fifteen_minutes_in_hour_number=False,
hour_number=False,
half_hour_number=False,
half_day_number=False,
one_third_day_number=False,
)
def _get_column_name(self, feature_name: str) -> str:
if self.out_column is None:
init_parameters = deepcopy(self._empty_parameters)
init_parameters[feature_name] = self.__dict__[feature_name]
temp_transform = TimeFlagsTransform(**init_parameters, out_column=self.out_column) # type: ignore
return repr(temp_transform)
else:
return f"{self.out_column}_{feature_name}"
[docs] def fit(self, *args, **kwargs) -> "TimeFlagsTransform":
"""Fit datetime model."""
return self
[docs] def transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Transform method for features based on time.
Parameters
----------
df:
Features dataframe with time
Returns
-------
result: pd.DataFrame
Dataframe with extracted features
"""
features = pd.DataFrame(index=df.index)
timestamp_series = pd.Series(df.index)
if self.minute_in_hour_number:
minute_in_hour_number = self._get_minute_number(timestamp_series=timestamp_series)
features[self._get_column_name("minute_in_hour_number")] = minute_in_hour_number
if self.fifteen_minutes_in_hour_number:
fifteen_minutes_in_hour_number = self._get_period_in_hour(
timestamp_series=timestamp_series, period_in_minutes=15
)
features[self._get_column_name("fifteen_minutes_in_hour_number")] = fifteen_minutes_in_hour_number
if self.hour_number:
hour_number = self._get_hour_number(timestamp_series=timestamp_series)
features[self._get_column_name("hour_number")] = hour_number
if self.half_hour_number:
half_hour_number = self._get_period_in_hour(timestamp_series=timestamp_series, period_in_minutes=30)
features[self._get_column_name("half_hour_number")] = half_hour_number
if self.half_day_number:
half_day_number = self._get_period_in_day(timestamp_series=timestamp_series, period_in_hours=12)
features[self._get_column_name("half_day_number")] = half_day_number
if self.one_third_day_number:
one_third_day_number = self._get_period_in_day(timestamp_series=timestamp_series, period_in_hours=8)
features[self._get_column_name("one_third_day_number")] = one_third_day_number
for feature in features.columns:
features[feature] = features[feature].astype("category")
dataframes = []
for seg in df.columns.get_level_values("segment").unique():
tmp = df[seg].join(features)
_idx = tmp.columns.to_frame()
_idx.insert(0, "segment", seg)
tmp.columns = pd.MultiIndex.from_frame(_idx)
dataframes.append(tmp)
result = pd.concat(dataframes, axis=1).sort_index(axis=1)
result.columns.names = ["segment", "feature"]
return result
@staticmethod
def _get_minute_number(timestamp_series: pd.Series) -> np.ndarray:
"""Generate array with the minute number in the hour."""
return timestamp_series.apply(lambda x: x.minute).values
@staticmethod
def _get_period_in_hour(timestamp_series: pd.Series, period_in_minutes: int = 15) -> np.ndarray:
"""Generate an array with the period number in the hour.
Accepts a period length in minutes as input and returns array where timestamps marked by period number.
"""
return timestamp_series.apply(lambda x: x.minute // period_in_minutes).values
@staticmethod
def _get_hour_number(timestamp_series: pd.Series) -> np.ndarray:
"""Generate an array with the hour number in the day."""
return timestamp_series.apply(lambda x: x.hour).values
@staticmethod
def _get_period_in_day(timestamp_series: pd.Series, period_in_hours: int = 12) -> np.ndarray:
"""Generate an array with the period number in the day.
Accepts a period length in hours as input and returns array where timestamps marked by period number.
"""
return timestamp_series.apply(lambda x: x.hour // period_in_hours).values
__all__ = ["TimeFlagsTransform"]