Fix duplicated data loading and timerange for populate_features

This commit is contained in:
Thomas Joußen 2023-12-15 10:47:56 +00:00
parent 0333abcbdc
commit 2d6a49013f
4 changed files with 71 additions and 4 deletions

View File

@ -68,7 +68,7 @@ Backtesting mode requires [downloading the necessary data](#downloading-data-to-
This way, you can return to using any model you wish by simply specifying the `identifier`. This way, you can return to using any model you wish by simply specifying the `identifier`.
!!! Note !!! Note
Backtesting calls `set_freqai_targets()` one time for each backtest window (where the number of windows is the full backtest timerange divided by the `backtest_period_days` parameter). Doing this means that the targets simulate dry/live behavior without look ahead bias. However, the definition of the features in `feature_engineering_*()` is performed once on the entire backtest timerange. This means that you should be sure that features do look-ahead into the future. Backtesting calls `set_freqai_targets()` one time for each backtest window (where the number of windows is the full backtest timerange divided by the `backtest_period_days` parameter). Doing this means that the targets simulate dry/live behavior without look ahead bias. However, the definition of the features in `feature_engineering_*()` is performed once on the entire training timerange. This means that you should be sure that features do not look-ahead into the future.
More details about look-ahead bias can be found in [Common Mistakes](strategy-customization.md#common-mistakes-when-developing-strategies). More details about look-ahead bias can be found in [Common Mistakes](strategy-customization.md#common-mistakes-when-developing-strategies).
--- ---

View File

@ -311,11 +311,14 @@ class DataProvider:
timerange = TimeRange.parse_timerange(None if self._config.get( timerange = TimeRange.parse_timerange(None if self._config.get(
'timerange') is None else str(self._config.get('timerange'))) 'timerange') is None else str(self._config.get('timerange')))
# It is not necessary to add the training candles, as they startup_candles = self.get_required_startup(str(timeframe))
# were already added at the beginning of the backtest.
startup_candles = self.get_required_startup(str(timeframe), False)
tf_seconds = timeframe_to_seconds(str(timeframe)) tf_seconds = timeframe_to_seconds(str(timeframe))
timerange.subtract_start(tf_seconds * startup_candles) timerange.subtract_start(tf_seconds * startup_candles)
logger.info(f"Loading data for {pair} {timeframe} "
f"from {timerange.start_fmt} "
f"to {timerange.stop_fmt}")
self.__cached_pairs_backtesting[saved_pair] = load_pair_history( self.__cached_pairs_backtesting[saved_pair] = load_pair_history(
pair=pair, pair=pair,
timeframe=timeframe, timeframe=timeframe,

View File

@ -709,6 +709,8 @@ class FreqaiDataKitchen:
pair, tf, strategy, corr_dataframes, base_dataframes, is_corr_pairs) pair, tf, strategy, corr_dataframes, base_dataframes, is_corr_pairs)
informative_copy = informative_df.copy() informative_copy = informative_df.copy()
logger.debug(f"Populating features for {pair} {tf}")
for t in self.freqai_config["feature_parameters"]["indicator_periods_candles"]: for t in self.freqai_config["feature_parameters"]["indicator_periods_candles"]:
df_features = strategy.feature_engineering_expand_all( df_features = strategy.feature_engineering_expand_all(
informative_copy.copy(), t, metadata=metadata) informative_copy.copy(), t, metadata=metadata)
@ -788,6 +790,7 @@ class FreqaiDataKitchen:
if not prediction_dataframe.empty: if not prediction_dataframe.empty:
dataframe = prediction_dataframe.copy() dataframe = prediction_dataframe.copy()
base_dataframes[self.config["timeframe"]] = dataframe.copy()
else: else:
dataframe = base_dataframes[self.config["timeframe"]].copy() dataframe = base_dataframes[self.config["timeframe"]].copy()

View File

@ -3,6 +3,7 @@ from datetime import datetime, timedelta, timezone
from pathlib import Path from pathlib import Path
from unittest.mock import MagicMock from unittest.mock import MagicMock
import pandas as pd
import pytest import pytest
from freqtrade.configuration import TimeRange from freqtrade.configuration import TimeRange
@ -135,3 +136,63 @@ def test_get_full_model_path(mocker, freqai_conf, model):
model_path = freqai.dk.get_full_models_path(freqai_conf) model_path = freqai.dk.get_full_models_path(freqai_conf)
assert model_path.is_dir() is True assert model_path.is_dir() is True
def test_get_pair_data_for_features_with_prealoaded_data(mocker, freqai_conf):
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
_, base_df = freqai.dd.get_base_and_corr_dataframes(timerange, "LTC/BTC", freqai.dk)
df = freqai.dk.get_pair_data_for_features("LTC/BTC", "5m", strategy, base_dataframes=base_df)
assert df is base_df["5m"]
assert not df.empty
def test_get_pair_data_for_features_without_preloaded_data(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180115-20180130"})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
base_df = {'5m': pd.DataFrame()}
df = freqai.dk.get_pair_data_for_features("LTC/BTC", "5m", strategy, base_dataframes=base_df)
assert df is not base_df["5m"]
assert not df.empty
assert df.iloc[0]['date'].strftime("%Y-%m-%d %H:%M:%S") == "2018-01-11 23:00:00"
assert df.iloc[-1]['date'].strftime("%Y-%m-%d %H:%M:%S") == "2018-01-30 00:00:00"
def test_populate_features(mocker, freqai_conf):
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180115-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
corr_df, base_df = freqai.dd.get_base_and_corr_dataframes(timerange, "LTC/BTC", freqai.dk)
mocker.patch.object(strategy, 'feature_engineering_expand_all', return_value=base_df["5m"])
df = freqai.dk.populate_features(base_df["5m"], "LTC/BTC", strategy,
base_dataframes=base_df, corr_dataframes=corr_df)
strategy.feature_engineering_expand_all.assert_called_once()
pd.testing.assert_frame_equal(base_df["5m"],
strategy.feature_engineering_expand_all.call_args[0][0])
assert df.iloc[0]['date'].strftime("%Y-%m-%d %H:%M:%S") == "2018-01-15 00:00:00"