Source code for etna.analysis.feature_relevance.relevance_table

from typing import Union

import numpy as np
import pandas as pd
from catboost import CatBoostRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import ExtraTreeRegressor

from etna.libs.tsfresh import calculate_relevance_table

TreeBasedRegressor = Union[
    DecisionTreeRegressor,
    ExtraTreeRegressor,
    RandomForestRegressor,
    ExtraTreesRegressor,
    GradientBoostingRegressor,
    CatBoostRegressor,
]


[docs]def get_statistics_relevance_table(df: pd.DataFrame, df_exog: pd.DataFrame) -> pd.DataFrame: """Calculate relevance table with p-values from tsfresh. Parameters ---------- df: dataframe with timeseries df_exog: dataframe with exogenous data Returns ------- pd.DataFrame dataframe with p-values. """ regressors = sorted(df_exog.columns.get_level_values("feature").unique()) segments = sorted(df.columns.get_level_values("segment").unique()) result = np.empty((len(segments), len(regressors))) for k, seg in enumerate(segments): first_valid_idx = df.loc[:, seg].first_valid_index() df_now = df.loc[first_valid_idx:, seg]["target"] df_exog_now = df_exog.loc[first_valid_idx:, seg] relevance = calculate_relevance_table(df_exog_now[: len(df_now)], df_now)[["feature", "p_value"]].values result[k] = np.array(sorted(relevance, key=lambda x: x[0]))[:, 1] relevance_table = pd.DataFrame(result) relevance_table.index = segments relevance_table.columns = regressors return relevance_table
[docs]def get_model_relevance_table(df: pd.DataFrame, df_exog: pd.DataFrame, model: TreeBasedRegressor) -> pd.DataFrame: """Calculate relevance table with feature importance from model. Parameters ---------- df: dataframe with timeseries df_exog: dataframe with exogenous data model: model to obtain feature importance, should have ``feature_importances_`` property Returns ------- pd.DataFrame dataframe with feature importance values. """ regressors = sorted(df_exog.columns.get_level_values("feature").unique()) segments = sorted(df.columns.get_level_values("segment").unique()) result = np.empty((len(segments), len(regressors))) for k, seg in enumerate(segments): df_exog_seg = df_exog.loc[:, seg].dropna()[regressors] df_seg = df.loc[:, seg].dropna()["target"] common_index = df_seg.index.intersection(df_exog_seg.index) model.fit(df_exog_seg.loc[common_index], df_seg.loc[common_index]) result[k] = model.feature_importances_ relevance_table = pd.DataFrame(result) relevance_table.index = segments relevance_table.columns = regressors return relevance_table