freqtrade_origin/freqtrade/freqai/utils.py

206 lines
7.3 KiB
Python
Raw Normal View History

import logging
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
2022-09-16 16:17:41 +00:00
import numpy as np
import pandas as pd
import rapidjson
from freqtrade.configuration import TimeRange
2022-09-18 11:20:36 +00:00
from freqtrade.constants import Config
2022-08-26 13:30:28 +00:00
from freqtrade.data.dataprovider import DataProvider
from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
2022-09-17 15:53:43 +00:00
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist
logger = logging.getLogger(__name__)
2022-08-26 13:30:28 +00:00
2022-09-18 11:20:36 +00:00
def download_all_data_for_training(dp: DataProvider, config: Config) -> None:
2022-08-26 13:30:28 +00:00
"""
Called only once upon start of bot to download the necessary data for
populating indicators and training the model.
2022-08-26 13:30:28 +00:00
:param timerange: TimeRange = The full data timerange for populating the indicators
and training the model.
:param dp: DataProvider instance attached to the strategy
"""
2022-08-31 10:26:47 +00:00
if dp._exchange is None:
2024-05-12 15:12:20 +00:00
raise OperationalException("No exchange object found.")
markets = [
2024-05-12 15:12:20 +00:00
p
for p in dp._exchange.get_markets(
tradable_only=True, active_only=not config.get("include_inactive")
).keys()
]
2022-08-26 13:30:28 +00:00
all_pairs = dynamic_expand_pairlist(config, markets)
timerange = get_required_data_timerange(config)
new_pairs_days = int((timerange.stopts - timerange.startts) / 86400)
2022-08-31 10:26:47 +00:00
refresh_backtest_ohlcv_data(
dp._exchange,
pairs=all_pairs,
timeframes=config["freqai"]["feature_parameters"].get("include_timeframes"),
datadir=config["datadir"],
timerange=timerange,
new_pairs_days=new_pairs_days,
erase=False,
2023-07-09 12:29:48 +00:00
data_format=config.get("dataformat_ohlcv", "feather"),
trading_mode=config.get("trading_mode", "spot"),
prepend=config.get("prepend_data", False),
)
2022-08-26 13:30:28 +00:00
2022-09-18 11:20:36 +00:00
def get_required_data_timerange(config: Config) -> TimeRange:
"""
Used to compute the required data download time range
for auto data-download in FreqAI
"""
2022-08-26 13:30:28 +00:00
time = datetime.now(tz=timezone.utc).timestamp()
timeframes = config["freqai"]["feature_parameters"].get("include_timeframes")
max_tf_seconds = 0
for tf in timeframes:
secs = timeframe_to_seconds(tf)
if secs > max_tf_seconds:
max_tf_seconds = secs
2024-05-12 15:12:20 +00:00
startup_candles = config.get("startup_candle_count", 0)
indicator_periods = config["freqai"]["feature_parameters"]["indicator_periods_candles"]
# factor the max_period as a factor of safety.
max_period = int(max(startup_candles, max(indicator_periods)) * 1.5)
2024-05-12 15:12:20 +00:00
config["startup_candle_count"] = max_period
logger.info(f"FreqAI auto-downloader using {max_period} startup candles.")
additional_seconds = max_period * max_tf_seconds
2024-05-12 15:12:20 +00:00
startts = int(time - config["freqai"].get("train_period_days", 0) * 86400 - additional_seconds)
2022-08-31 10:28:31 +00:00
stopts = int(time)
2024-05-12 15:12:20 +00:00
data_load_timerange = TimeRange("date", "date", startts, stopts)
return data_load_timerange
2024-05-12 15:12:20 +00:00
def plot_feature_importance(
model: Any, pair: str, dk: FreqaiDataKitchen, count_max: int = 25
) -> None:
2022-09-16 16:17:41 +00:00
"""
2024-05-12 15:12:20 +00:00
Plot Best and worst features by importance for a single sub-train.
:param model: Any = A model which was `fit` using a common library
such as catboost or lightgbm
:param pair: str = pair e.g. BTC/USD
:param dk: FreqaiDataKitchen = non-persistent data container for current coin/loop
:param count_max: int = the amount of features to be loaded per column
2022-09-16 16:17:41 +00:00
"""
2022-09-17 15:53:43 +00:00
from freqtrade.plot.plotting import go, make_subplots, store_plot_file
2022-09-16 18:47:12 +00:00
2022-09-17 15:53:43 +00:00
# Extract feature importance from model
models = {}
2024-05-12 15:12:20 +00:00
if "FreqaiMultiOutputRegressor" in str(model.__class__):
for estimator, label in zip(model.estimators_, dk.label_list):
models[label] = estimator
else:
2022-09-18 10:49:08 +00:00
models[dk.label_list[0]] = model
for label in models:
mdl = models[label]
if "catboost.core" in str(mdl.__class__):
feature_importance = mdl.get_feature_importance()
elif "lightgbm.sklearn" in str(mdl.__class__):
feature_importance = mdl.feature_importances_
elif "xgb" in str(mdl.__class__):
feature_importance = mdl.feature_importances_
else:
2024-05-12 15:12:20 +00:00
logger.info("Model type does not support generating feature importances.")
return
# Data preparation
2024-05-12 15:12:20 +00:00
fi_df = pd.DataFrame(
{
"feature_names": np.array(dk.data_dictionary["train_features"].columns),
"feature_importance": np.array(feature_importance),
}
)
fi_df_top = fi_df.nlargest(count_max, "feature_importance")[::-1]
fi_df_worst = fi_df.nsmallest(count_max, "feature_importance")[::-1]
# Plotting
def add_feature_trace(fig, fi_df, col):
return fig.add_trace(
go.Bar(
x=fi_df["feature_importance"],
y=fi_df["feature_names"],
2024-05-12 15:12:20 +00:00
orientation="h",
showlegend=False,
),
row=1,
col=col,
)
2024-05-12 15:12:20 +00:00
fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.5)
fig = add_feature_trace(fig, fi_df_top, 1)
fig = add_feature_trace(fig, fi_df_worst, 2)
fig.update_layout(title_text=f"Best and worst features by importance {pair}")
2024-05-12 15:12:20 +00:00
label = label.replace("&", "").replace("%", "") # escape two FreqAI specific characters
store_plot_file(fig, f"{dk.model_filename}-{label}.html", dk.data_path)
def record_params(config: dict[str, Any], full_path: Path) -> None:
"""
Records run params in the full path for reproducibility
"""
params_record_path = full_path / "run_params.json"
run_params = {
2024-05-12 15:12:20 +00:00
"freqai": config.get("freqai", {}),
"timeframe": config.get("timeframe"),
"stake_amount": config.get("stake_amount"),
"stake_currency": config.get("stake_currency"),
"max_open_trades": config.get("max_open_trades"),
"pairs": config.get("exchange", {}).get("pair_whitelist"),
}
2023-02-25 16:17:05 +00:00
with params_record_path.open("w") as handle:
2022-10-24 18:23:54 +00:00
rapidjson.dump(
run_params,
handle,
indent=4,
default=str,
2024-05-12 15:12:20 +00:00
number_mode=rapidjson.NM_NATIVE | rapidjson.NM_NAN,
2022-10-24 18:23:54 +00:00
)
2022-11-07 18:35:28 +00:00
def get_timerange_backtest_live_models(config: Config) -> str:
"""
2024-04-18 20:51:25 +00:00
Returns a formatted timerange for backtest live/ready models
:param config: Configuration dictionary
:return: a string timerange (format example: '20220801-20220822')
"""
dk = FreqaiDataKitchen(config)
models_path = dk.get_full_models_path(config)
dd = FreqaiDataDrawer(models_path, config)
timerange = dd.get_timerange_from_live_historic_predictions()
2022-11-10 17:26:14 +00:00
return timerange.timerange_str
2023-05-14 15:49:24 +00:00
def get_tb_logger(model_type: str, path: Path, activate: bool) -> Any:
2023-05-14 15:44:41 +00:00
if model_type == "pytorch" and activate:
2023-05-14 15:49:24 +00:00
from freqtrade.freqai.tensorboard import TBLogger
2024-05-12 15:12:20 +00:00
2023-05-15 16:27:12 +00:00
return TBLogger(path, activate)
else:
2023-05-15 17:26:51 +00:00
from freqtrade.freqai.tensorboard.base_tensorboard import BaseTensorboardLogger
2024-05-12 15:12:20 +00:00
2023-05-15 16:27:12 +00:00
return BaseTensorboardLogger(path, activate)