Source code for etna.transforms.encoders.segment_encoder

import pandas as pd
from sklearn import preprocessing

from etna.transforms.base import FutureMixin
from etna.transforms.base import Transform


[docs]class SegmentEncoderTransform(Transform, FutureMixin): """Encode segment label to categorical. Creates column 'segment_code'.""" idx = pd.IndexSlice def __init__(self): self._le = preprocessing.LabelEncoder()
[docs] def fit(self, df: pd.DataFrame) -> "SegmentEncoderTransform": """ Fit encoder on existing segment labels. Parameters ---------- df: dataframe with data to fit label encoder. Returns ------- : Fitted transform """ segment_columns = df.columns.get_level_values("segment") self._le.fit(segment_columns) return self
[docs] def transform(self, df: pd.DataFrame) -> pd.DataFrame: """ Get encoded (categorical) for each segment. Parameters ---------- df: dataframe with data to transform. Returns ------- : result dataframe """ encoded_matrix = self._le.transform(self._le.classes_) encoded_matrix = encoded_matrix.reshape(len(self._le.classes_), -1).repeat(len(df), axis=1).T encoded_df = pd.DataFrame( encoded_matrix, columns=pd.MultiIndex.from_product([self._le.classes_, ["segment_code"]], names=("segment", "feature")), index=df.index, ) encoded_df = encoded_df.astype("category") df = df.join(encoded_df) df = df.sort_index(axis=1) return df