mirror of
https://github.com/freqtrade/freqtrade.git
synced 2024-11-10 10:21:59 +00:00
Merge pull request #7431 from initrv/add-plot-feature-importance
Add plot feature importance
This commit is contained in:
commit
225f7cd5f8
|
@ -77,7 +77,8 @@
|
|||
"indicator_periods_candles": [
|
||||
10,
|
||||
20
|
||||
]
|
||||
],
|
||||
"plot_feature_importance": true
|
||||
},
|
||||
"data_split_parameters": {
|
||||
"test_size": 0.33,
|
||||
|
|
|
@ -20,6 +20,7 @@ from freqtrade.exceptions import OperationalException
|
|||
from freqtrade.exchange import timeframe_to_seconds
|
||||
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
from freqtrade.freqai.utils import plot_feature_importance
|
||||
from freqtrade.strategy.interface import IStrategy
|
||||
|
||||
|
||||
|
@ -562,6 +563,9 @@ class IFreqaiModel(ABC):
|
|||
self.dd.pair_to_end_of_training_queue(pair)
|
||||
self.dd.save_data(model, pair, dk)
|
||||
|
||||
if self.freqai_info["feature_parameters"].get("plot_feature_importance", True):
|
||||
plot_feature_importance(model, pair, dk)
|
||||
|
||||
if self.freqai_info.get("purge_old_models", False):
|
||||
self.dd.purge_old_models()
|
||||
|
||||
|
|
|
@ -1,5 +1,9 @@
|
|||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import Config
|
||||
|
@ -8,6 +12,7 @@ from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
|
|||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.exchange import timeframe_to_seconds
|
||||
from freqtrade.exchange.exchange import market_is_active
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist
|
||||
|
||||
|
||||
|
@ -131,3 +136,58 @@ def get_required_data_timerange(config: Config) -> TimeRange:
|
|||
# trading_mode=config.get("trading_mode", "spot"),
|
||||
# prepend=config.get("prepend_data", False),
|
||||
# )
|
||||
|
||||
|
||||
def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
|
||||
count_max: int = 25) -> None:
|
||||
"""
|
||||
Plot Best and worst features by importance for a single sub-train.
|
||||
:param model: Any = A model which was `fit` using a common library
|
||||
such as catboost or lightgbm
|
||||
:param pair: str = pair e.g. BTC/USD
|
||||
:param dk: FreqaiDataKitchen = non-persistent data container for current coin/loop
|
||||
:param count_max: int = the amount of features to be loaded per column
|
||||
"""
|
||||
from freqtrade.plot.plotting import go, make_subplots, store_plot_file
|
||||
|
||||
# Extract feature importance from model
|
||||
models = {}
|
||||
if 'FreqaiMultiOutputRegressor' in str(model.__class__):
|
||||
for estimator, label in zip(model.estimators_, dk.label_list):
|
||||
models[label] = estimator
|
||||
else:
|
||||
models[dk.label_list[0]] = model
|
||||
|
||||
for label in models:
|
||||
mdl = models[label]
|
||||
if "catboost.core" in str(mdl.__class__):
|
||||
feature_importance = mdl.get_feature_importance()
|
||||
elif "lightgbm.sklearn" or "xgb" in str(mdl.__class__):
|
||||
feature_importance = mdl.feature_importances_
|
||||
else:
|
||||
logger.info('Model type not support for generating feature importances.')
|
||||
return
|
||||
|
||||
# Data preparation
|
||||
fi_df = pd.DataFrame({
|
||||
"feature_names": np.array(dk.training_features_list),
|
||||
"feature_importance": np.array(feature_importance)
|
||||
})
|
||||
fi_df_top = fi_df.nlargest(count_max, "feature_importance")[::-1]
|
||||
fi_df_worst = fi_df.nsmallest(count_max, "feature_importance")[::-1]
|
||||
|
||||
# Plotting
|
||||
def add_feature_trace(fig, fi_df, col):
|
||||
return fig.add_trace(
|
||||
go.Bar(
|
||||
x=fi_df["feature_importance"],
|
||||
y=fi_df["feature_names"],
|
||||
orientation='h', showlegend=False
|
||||
), row=1, col=col
|
||||
)
|
||||
fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.5)
|
||||
fig = add_feature_trace(fig, fi_df_top, 1)
|
||||
fig = add_feature_trace(fig, fi_df_worst, 2)
|
||||
fig.update_layout(title_text=f"Best and worst features by importance {pair}")
|
||||
label = label.replace('&', '').replace('%', '') # escape two FreqAI specific characters
|
||||
store_plot_file(fig, f"{dk.model_filename}-{label}.html", dk.data_path)
|
||||
|
|
|
@ -318,6 +318,41 @@ def test_principal_component_analysis(mocker, freqai_conf):
|
|||
shutil.rmtree(Path(freqai.dk.full_path))
|
||||
|
||||
|
||||
def test_plot_feature_importance(mocker, freqai_conf):
|
||||
|
||||
from freqtrade.freqai.utils import plot_feature_importance
|
||||
|
||||
freqai_conf.update({"timerange": "20180110-20180130"})
|
||||
freqai_conf.get("freqai", {}).get("feature_parameters", {}).update(
|
||||
{"princpial_component_analysis": "true"})
|
||||
|
||||
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
|
||||
exchange = get_patched_exchange(mocker, freqai_conf)
|
||||
strategy.dp = DataProvider(freqai_conf, exchange)
|
||||
strategy.freqai_info = freqai_conf.get("freqai", {})
|
||||
freqai = strategy.freqai
|
||||
freqai.live = True
|
||||
freqai.dk = FreqaiDataKitchen(freqai_conf)
|
||||
timerange = TimeRange.parse_timerange("20180110-20180130")
|
||||
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
|
||||
|
||||
freqai.dd.pair_dict = MagicMock()
|
||||
|
||||
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
|
||||
new_timerange = TimeRange.parse_timerange("20180120-20180130")
|
||||
|
||||
freqai.extract_data_and_train_model(
|
||||
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
|
||||
|
||||
model = freqai.dd.load_data("ADA/BTC", freqai.dk)
|
||||
|
||||
plot_feature_importance(model, "ADA/BTC", freqai.dk)
|
||||
|
||||
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}.html")
|
||||
|
||||
shutil.rmtree(Path(freqai.dk.full_path))
|
||||
|
||||
|
||||
@pytest.mark.parametrize('timeframes,corr_pairs', [
|
||||
(['5m'], ['ADA/BTC', 'DASH/BTC']),
|
||||
(['5m'], ['ADA/BTC', 'DASH/BTC', 'ETH/USDT']),
|
||||
|
|
Loading…
Reference in New Issue
Block a user