Merge pull request #10527 from freqtrade/feat/bt_generator

Backtesting - dynamic pairlist sorting
2024-11-10 02:12:01 +00:00 · 2024-08-13 09:56:19 +02:00 · 2024-08-13 09:56:19 +02:00 · 7e502beafc
commit 7e502beafc
parent 6fc2a604b4 948e67a2b7
4 changed files with 355 additions and 85 deletions
--- a/docs/backtesting.md
+++ b/docs/backtesting.md
@ -530,10 +530,10 @@ You can then load the trades to perform further analysis as shown in the [data a
 Since backtesting lacks some detailed information about what happens within a candle, it needs to take a few assumptions:

 - Exchange [trading limits](#trading-limits-in-backtesting) are respected
- Entries happen at open-price
+- Entries happen at open-price unless a custom price logic has been specified
 - All orders are filled at the requested price (no slippage) as long as the price is within the candle's high/low range
 - Exit-signal exits happen at open-price of the consecutive candle
- Exits don't free their trade slot for a new trade until the next candle
+- Exits free their trade slot for a new trade with a different pair
 - Exit-signal is favored over Stoploss, because exit-signals are assumed to trigger on candle's open
 - ROI
  - Exits are compared to high - but the ROI value is used (e.g. ROI = 2%, high=5% - so the exit will be at 2%)
--- a/freqtrade/data/btanalysis.py
+++ b/freqtrade/data/btanalysis.py
@ -401,7 +401,15 @@ def analyze_trade_parallelism(results: pd.DataFrame, timeframe: str) -> pd.DataF

    timeframe_freq = timeframe_to_resample_freq(timeframe)
    dates = [
-        pd.Series(pd.date_range(row[1]["open_date"], row[1]["close_date"], freq=timeframe_freq))
+        pd.Series(
+            pd.date_range(
+                row[1]["open_date"],
+                row[1]["close_date"],
+                freq=timeframe_freq,
+                # Exclude right boundary - the date is the candle open date.
+                inclusive="left",
+            )
+        )
        for row in results[["open_date", "close_date"]].iterrows()
    ]
    deltas = [len(x) for x in dates]
--- a/freqtrade/optimize/backtesting.py
+++ b/freqtrade/optimize/backtesting.py
@ -1332,10 +1332,9 @@ class Backtesting:
        pair: str,
        current_time: datetime,
        end_date: datetime,
-        open_trade_count_start: int,
        trade_dir: Optional[LongShort],
        is_first: bool = True,
-    ) -> int:
+    ) -> None:
        """
        NOTE: This method is used by Hyperopt at each iteration. Please keep it optimized.

@ -1345,7 +1344,6 @@ class Backtesting:
            # 1. Manage currently open orders of active trades
            if self.manage_open_orders(t, current_time, row):
                # Close trade
-                open_trade_count_start -= 1
                LocalTrade.remove_bt_trade(t)
                self.wallets.update()

@ -1361,13 +1359,9 @@ class Backtesting:
            and trade_dir is not None
            and not PairLocks.is_pair_locked(pair, row[DATE_IDX], trade_dir)
        ):
-            if self.trade_slot_available(open_trade_count_start):
+            if self.trade_slot_available(LocalTrade.bt_open_open_trade_count):
                trade = self._enter_trade(pair, row, trade_dir)
                if trade:
-                    # TODO: hacky workaround to avoid opening > max_open_trades
-                    # This emulates previous behavior - not sure if this is correct
-                    # Prevents entering if the trade-slot was freed in this candle
-                    open_trade_count_start += 1
                    self.wallets.update()
            else:
                self._collate_rejected(pair, row)
@ -1386,7 +1380,28 @@ class Backtesting:
            order = trade.select_order(trade.exit_side, is_open=True)
            if order:
                self._process_exit_order(order, trade, current_time, row, pair)
-        return open_trade_count_start
+
+    def time_pair_generator(
+        self, start_date: datetime, end_date: datetime, increment: timedelta, pairs: List[str]
+    ):
+        """
+        Backtest time and pair generator
+        """
+        current_time = start_date + increment
+        self.progress.init_step(
+            BacktestState.BACKTEST, int((end_date - start_date) / self.timeframe_td)
+        )
+        while current_time <= end_date:
+            is_first = True
+            # Pairs that have open trades should be processed first
+            new_pairlist = list(dict.fromkeys([t.pair for t in LocalTrade.bt_trades_open] + pairs))
+
+            for pair in new_pairlist:
+                yield current_time, pair, is_first
+                is_first = False
+
+            self.progress.increment()
+            current_time += increment

    def backtest(self, processed: Dict, start_date: datetime, end_date: datetime) -> Dict[str, Any]:
        """
@ -1411,82 +1426,70 @@ class Backtesting:

        # Indexes per pair, so some pairs are allowed to have a missing start.
        indexes: Dict = defaultdict(int)
-        current_time = start_date + self.timeframe_td

-        self.progress.init_step(
-            BacktestState.BACKTEST, int((end_date - start_date) / self.timeframe_td)
-        )
        # Loop timerange and get candle for each pair at that point in time
-        while current_time <= end_date:
-            open_trade_count_start = LocalTrade.bt_open_open_trade_count
-            self.check_abort()
-            strategy_safe_wrapper(self.strategy.bot_loop_start, supress_error=True)(
-                current_time=current_time
-            )
-            for i, pair in enumerate(data):
-                row_index = indexes[pair]
-                row = self.validate_row(data, pair, row_index, current_time)
-                if not row:
+        for current_time, pair, is_first in self.time_pair_generator(
+            start_date, end_date, self.timeframe_td, list(data.keys())
+        ):
+            if is_first:
+                self.check_abort()
+                strategy_safe_wrapper(self.strategy.bot_loop_start, supress_error=True)(
+                    current_time=current_time
+                )
+            row_index = indexes[pair]
+            row = self.validate_row(data, pair, row_index, current_time)
+            if not row:
+                continue
+
+            row_index += 1
+            indexes[pair] = row_index
+            self.dataprovider._set_dataframe_max_index(self.required_startup + row_index)
+            self.dataprovider._set_dataframe_max_date(current_time)
+            current_detail_time: datetime = row[DATE_IDX].to_pydatetime()
+            trade_dir: Optional[LongShort] = self.check_for_trade_entry(row)
+
+            if (
+                (trade_dir is not None or len(LocalTrade.bt_trades_open_pp[pair]) > 0)
+                and self.timeframe_detail
+                and pair in self.detail_data
+            ):
+                # Spread out into detail timeframe.
+                # Should only happen when we are either in a trade for this pair
+                # or when we got the signal for a new trade.
+                exit_candle_end = current_detail_time + self.timeframe_td
+
+                detail_data = self.detail_data[pair]
+                detail_data = detail_data.loc[
+                    (detail_data["date"] >= current_detail_time)
+                    & (detail_data["date"] < exit_candle_end)
+                ].copy()
+                if len(detail_data) == 0:
+                    # Fall back to "regular" data if no detail data was found for this candle
+                    self.backtest_loop(row, pair, current_time, end_date, trade_dir)
                    continue
-
-                row_index += 1
-                indexes[pair] = row_index
-                self.dataprovider._set_dataframe_max_index(self.required_startup + row_index)
-                self.dataprovider._set_dataframe_max_date(current_time)
-                current_detail_time: datetime = row[DATE_IDX].to_pydatetime()
-                trade_dir: Optional[LongShort] = self.check_for_trade_entry(row)
-
-                if (
-                    (trade_dir is not None or len(LocalTrade.bt_trades_open_pp[pair]) > 0)
-                    and self.timeframe_detail
-                    and pair in self.detail_data
-                ):
-                    # Spread out into detail timeframe.
-                    # Should only happen when we are either in a trade for this pair
-                    # or when we got the signal for a new trade.
-                    exit_candle_end = current_detail_time + self.timeframe_td
-
-                    detail_data = self.detail_data[pair]
-                    detail_data = detail_data.loc[
-                        (detail_data["date"] >= current_detail_time)
-                        & (detail_data["date"] < exit_candle_end)
-                    ].copy()
-                    if len(detail_data) == 0:
-                        # Fall back to "regular" data if no detail data was found for this candle
-                        open_trade_count_start = self.backtest_loop(
-                            row, pair, current_time, end_date, open_trade_count_start, trade_dir
-                        )
-                        continue
-                    detail_data.loc[:, "enter_long"] = row[LONG_IDX]
-                    detail_data.loc[:, "exit_long"] = row[ELONG_IDX]
-                    detail_data.loc[:, "enter_short"] = row[SHORT_IDX]
-                    detail_data.loc[:, "exit_short"] = row[ESHORT_IDX]
-                    detail_data.loc[:, "enter_tag"] = row[ENTER_TAG_IDX]
-                    detail_data.loc[:, "exit_tag"] = row[EXIT_TAG_IDX]
-                    is_first = True
-                    current_time_det = current_time
-                    for det_row in detail_data[HEADERS].values.tolist():
-                        self.dataprovider._set_dataframe_max_date(current_time_det)
-                        open_trade_count_start = self.backtest_loop(
-                            det_row,
-                            pair,
-                            current_time_det,
-                            end_date,
-                            open_trade_count_start,
-                            trade_dir,
-                            is_first,
-                        )
-                        current_time_det += self.timeframe_detail_td
-                        is_first = False
-                else:
-                    self.dataprovider._set_dataframe_max_date(current_time)
-                    open_trade_count_start = self.backtest_loop(
-                        row, pair, current_time, end_date, open_trade_count_start, trade_dir
+                detail_data.loc[:, "enter_long"] = row[LONG_IDX]
+                detail_data.loc[:, "exit_long"] = row[ELONG_IDX]
+                detail_data.loc[:, "enter_short"] = row[SHORT_IDX]
+                detail_data.loc[:, "exit_short"] = row[ESHORT_IDX]
+                detail_data.loc[:, "enter_tag"] = row[ENTER_TAG_IDX]
+                detail_data.loc[:, "exit_tag"] = row[EXIT_TAG_IDX]
+                is_first = True
+                current_time_det = current_time
+                for det_row in detail_data[HEADERS].values.tolist():
+                    self.dataprovider._set_dataframe_max_date(current_time_det)
+                    self.backtest_loop(
+                        det_row,
+                        pair,
+                        current_time_det,
+                        end_date,
+                        trade_dir,
+                        is_first,
                    )
-
-            # Move time one configured time_interval ahead.
-            self.progress.increment()
-            current_time += self.timeframe_td
+                    current_time_det += self.timeframe_detail_td
+                    is_first = False
+            else:
+                self.dataprovider._set_dataframe_max_date(current_time)
+                self.backtest_loop(row, pair, current_time, end_date, trade_dir)

        self.handle_left_open(LocalTrade.bt_trades_open_pp, data=data)
        self.wallets.update()
--- a/tests/optimize/test_backtesting.py
+++ b/tests/optimize/test_backtesting.py
@ -1,6 +1,7 @@
 # pragma pylint: disable=missing-docstring, W0212, line-too-long, C0103, unused-argument

 import random
+from collections import defaultdict
 from copy import deepcopy
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
@ -15,7 +16,7 @@ from freqtrade.commands.optimize_commands import setup_optimize_configuration, s
 from freqtrade.configuration import TimeRange
 from freqtrade.data import history
 from freqtrade.data.btanalysis import BT_DATA_COLUMNS, evaluate_result_multi
-from freqtrade.data.converter import clean_ohlcv_dataframe
+from freqtrade.data.converter import clean_ohlcv_dataframe, ohlcv_fill_up_missing_data
 from freqtrade.data.dataprovider import DataProvider
 from freqtrade.data.history import get_timerange
 from freqtrade.enums import CandleType, ExitType, RunMode
@ -29,6 +30,7 @@ from freqtrade.util.datetime_helpers import dt_utc
 from tests.conftest import (
    CURRENT_TEST_STRATEGY,
    EXMS,
+    generate_test_data,
    get_args,
    log_has,
    log_has_re,
@ -1485,6 +1487,7 @@ def test_backtest_multi_pair(default_conf, fee, mocker, tres, pair, testdatadir)
    default_conf["max_open_trades"] = 3

    backtesting = Backtesting(default_conf)
+    vr_spy = mocker.spy(backtesting, "validate_row")
    backtesting._set_strategy(backtesting.strategylist[0])
    backtesting.strategy.bot_loop_start = MagicMock()
    backtesting.strategy.advise_entry = _trend_alternate_hold  # Override
@ -1503,6 +1506,36 @@ def test_backtest_multi_pair(default_conf, fee, mocker, tres, pair, testdatadir)

    # bot_loop_start is called once per candle.
    assert backtesting.strategy.bot_loop_start.call_count == 499
+    # Validated row once per candle and pair
+    assert vr_spy.call_count == 2495
+    # List of calls pair args - in batches of 5 (s)
+    calls_per_candle = defaultdict(list)
+    for call in vr_spy.call_args_list:
+        calls_per_candle[call[0][3]].append(call[0][1])
+
+    all_orients = [x for _, x in calls_per_candle.items()]
+
+    distinct_calls = [list(x) for x in set(tuple(x) for x in all_orients)]
+
+    # All calls must be made for the full pairlist
+    assert all(len(x) == 5 for x in distinct_calls)
+
+    # order varied - and is not always identical
+    assert not all(
+        x == ["ADA/BTC", "DASH/BTC", "ETH/BTC", "LTC/BTC", "NXT/BTC"] for x in distinct_calls
+    )
+    # But some calls should've kept the original ordering
+    assert any(
+        x == ["ADA/BTC", "DASH/BTC", "ETH/BTC", "LTC/BTC", "NXT/BTC"] for x in distinct_calls
+    )
+    assert (
+        # Ordering can be different, but should be one of the following
+        any(x == ["ETH/BTC", "ADA/BTC", "DASH/BTC", "LTC/BTC", "NXT/BTC"] for x in distinct_calls)
+        or any(
+            x == ["ETH/BTC", "LTC/BTC", "ADA/BTC", "DASH/BTC", "NXT/BTC"] for x in distinct_calls
+        )
+    )
+
    # Make sure we have parallel trades
    assert len(evaluate_result_multi(results["results"], "5m", 2)) > 0
    # make sure we don't have trades with more than configured max_open_trades
@ -1528,6 +1561,232 @@ def test_backtest_multi_pair(default_conf, fee, mocker, tres, pair, testdatadir)
    assert len(evaluate_result_multi(results["results"], "5m", 1)) == 0


+@pytest.mark.parametrize("use_detail", [True, False])
+@pytest.mark.parametrize("pair", ["ADA/USDT", "LTC/USDT"])
+@pytest.mark.parametrize("tres", [0, 20, 30])
+def test_backtest_multi_pair_detail(
+    default_conf_usdt,
+    fee,
+    mocker,
+    tres,
+    pair,
+    use_detail,
+):
+    """
+    literally the same as test_backtest_multi_pair - but with artificial data
+    and detail timeframe.
+    """
+
+    def _trend_alternate_hold(dataframe=None, metadata=None):
+        """
+        Buy every xth candle - sell every other xth -2 (hold on to pairs a bit)
+        """
+        if metadata["pair"] in ("ETH/USDT", "LTC/USDT"):
+            multi = 20
+        else:
+            multi = 18
+        dataframe["enter_long"] = np.where(dataframe.index % multi == 0, 1, 0)
+        dataframe["exit_long"] = np.where((dataframe.index + multi - 2) % multi == 0, 1, 0)
+        dataframe["enter_short"] = 0
+        dataframe["exit_short"] = 0
+        return dataframe
+
+    default_conf_usdt.update(
+        {
+            "runmode": "backtest",
+            "stoploss": -1.0,
+            "minimal_roi": {"0": 100},
+        }
+    )
+
+    if use_detail:
+        default_conf_usdt["timeframe_detail"] = "1m"
+
+    mocker.patch(f"{EXMS}.get_min_pair_stake_amount", return_value=0.00001)
+    mocker.patch(f"{EXMS}.get_max_pair_stake_amount", return_value=float("inf"))
+    mocker.patch(f"{EXMS}.get_fee", fee)
+    patch_exchange(mocker)
+
+    raw_candles_1m = generate_test_data("1m", 2500, "2022-01-03 12:00:00+00:00")
+    raw_candles = ohlcv_fill_up_missing_data(raw_candles_1m, "5m", "dummy")
+
+    pairs = ["ADA/USDT", "DASH/USDT", "ETH/USDT", "LTC/USDT", "NXT/USDT"]
+    data = {pair: raw_candles for pair in pairs}
+    detail_data = {pair: raw_candles_1m for pair in pairs}
+
+    # Only use 500 lines to increase performance
+    data = trim_dictlist(data, -500)
+
+    # Remove data for one pair from the beginning of the data
+    if tres > 0:
+        data[pair] = data[pair][tres:].reset_index()
+    default_conf_usdt["timeframe"] = "5m"
+    default_conf_usdt["max_open_trades"] = 3
+
+    backtesting = Backtesting(default_conf_usdt)
+    vr_spy = mocker.spy(backtesting, "validate_row")
+    bl_spy = mocker.spy(backtesting, "backtest_loop")
+    backtesting.detail_data = detail_data
+    backtesting._set_strategy(backtesting.strategylist[0])
+    backtesting.strategy.bot_loop_start = MagicMock()
+    backtesting.strategy.advise_entry = _trend_alternate_hold  # Override
+    backtesting.strategy.advise_exit = _trend_alternate_hold  # Override
+
+    processed = backtesting.strategy.advise_all_indicators(data)
+    min_date, max_date = get_timerange(processed)
+
+    backtest_conf = {
+        "processed": deepcopy(processed),
+        "start_date": min_date,
+        "end_date": max_date,
+    }
+
+    results = backtesting.backtest(**backtest_conf)
+
+    # bot_loop_start is called once per candle.
+    assert backtesting.strategy.bot_loop_start.call_count == 499
+    # Validated row once per candle and pair
+    assert vr_spy.call_count == 2495
+
+    if use_detail:
+        # Backtest loop is called once per candle per pair
+        # Exact numbers depend on trade state - but should be around 3_800
+        assert bl_spy.call_count > 3_800
+        assert bl_spy.call_count < 3_900
+    else:
+        assert bl_spy.call_count < 2495
+
+    # Make sure we have parallel trades
+    assert len(evaluate_result_multi(results["results"], "5m", 2)) > 0
+    # make sure we don't have trades with more than configured max_open_trades
+    assert len(evaluate_result_multi(results["results"], "5m", 3)) == 0
+
+    # Cached data correctly removed amounts
+    offset = 1 if tres == 0 else 0
+    removed_candles = len(data[pair]) - offset
+    assert len(backtesting.dataprovider.get_analyzed_dataframe(pair, "5m")[0]) == removed_candles
+    assert (
+        len(backtesting.dataprovider.get_analyzed_dataframe("NXT/USDT", "5m")[0])
+        == len(data["NXT/USDT"]) - 1
+    )
+
+    backtesting.strategy.max_open_trades = 1
+    backtesting.config.update({"max_open_trades": 1})
+    backtest_conf = {
+        "processed": deepcopy(processed),
+        "start_date": min_date,
+        "end_date": max_date,
+    }
+    results = backtesting.backtest(**backtest_conf)
+    assert len(evaluate_result_multi(results["results"], "5m", 1)) == 0
+
+
+@pytest.mark.parametrize("use_detail", [True, False])
+def test_backtest_multi_pair_long_short_switch(
+    default_conf_usdt,
+    fee,
+    mocker,
+    use_detail,
+):
+    """
+    literally the same as test_backtest_multi_pair - but with artificial data
+    and detail timeframe.
+    """
+
+    def _trend_alternate_hold(dataframe=None, metadata=None):
+        """
+        Buy every xth candle - sell every other xth -2 (hold on to pairs a bit)
+        """
+        if metadata["pair"] in ("ETH/USDT", "LTC/USDT"):
+            multi = 20
+        else:
+            multi = 18
+        dataframe["enter_long"] = np.where(dataframe.index % multi == 0, 1, 0)
+        dataframe["exit_long"] = np.where((dataframe.index + multi - 2) % multi == 0, 1, 0)
+        dataframe["enter_short"] = dataframe["exit_long"]
+        dataframe["exit_short"] = dataframe["enter_long"]
+        return dataframe
+
+    default_conf_usdt.update(
+        {
+            "runmode": "backtest",
+            "timeframe": "5m",
+            "max_open_trades": 1,
+            "stoploss": -1.0,
+            "minimal_roi": {"0": 100},
+            "margin_mode": "isolated",
+            "trading_mode": "futures",
+        }
+    )
+
+    if use_detail:
+        default_conf_usdt["timeframe_detail"] = "1m"
+
+    mocker.patch(f"{EXMS}.get_min_pair_stake_amount", return_value=0.00001)
+    mocker.patch(f"{EXMS}.get_max_pair_stake_amount", return_value=float("inf"))
+    mocker.patch(f"{EXMS}.get_fee", fee)
+    patch_exchange(mocker)
+
+    raw_candles_1m = generate_test_data("1m", 2500, "2022-01-03 12:00:00+00:00")
+    raw_candles = ohlcv_fill_up_missing_data(raw_candles_1m, "5m", "dummy")
+
+    pairs = [
+        "ETH/USDT:USDT",
+    ]
+    default_conf_usdt["exchange"]["pair_whitelist"] = pairs
+    # Fake whitelist to avoid some mock data issues
+    mocker.patch(f"{EXMS}.get_maintenance_ratio_and_amt", return_value=(0.01, 0.01))
+
+    data = {pair: raw_candles for pair in pairs}
+    detail_data = {pair: raw_candles_1m for pair in pairs}
+
+    # Only use 500 lines to increase performance
+    data = trim_dictlist(data, -500)
+
+    backtesting = Backtesting(default_conf_usdt)
+    vr_spy = mocker.spy(backtesting, "validate_row")
+    bl_spy = mocker.spy(backtesting, "backtest_loop")
+    backtesting.detail_data = detail_data
+    backtesting.funding_fee_timeframe_secs = 3600 * 8  # 8h
+    backtesting.futures_data = {pair: pd.DataFrame() for pair in pairs}
+
+    backtesting.strategylist[0].can_short = True
+    backtesting._set_strategy(backtesting.strategylist[0])
+    backtesting.strategy.bot_loop_start = MagicMock()
+    backtesting.strategy.advise_entry = _trend_alternate_hold  # Override
+    backtesting.strategy.advise_exit = _trend_alternate_hold  # Override
+
+    processed = backtesting.strategy.advise_all_indicators(data)
+    min_date, max_date = get_timerange(processed)
+
+    backtest_conf = {
+        "processed": deepcopy(processed),
+        "start_date": min_date,
+        "end_date": max_date,
+    }
+
+    results = backtesting.backtest(**backtest_conf)
+
+    # bot_loop_start is called once per candle.
+    assert backtesting.strategy.bot_loop_start.call_count == 499
+    # Validated row once per candle and pair
+    assert vr_spy.call_count == 499
+
+    if use_detail:
+        # Backtest loop is called once per candle per pair
+        assert bl_spy.call_count == 1071
+    else:
+        assert bl_spy.call_count == 479
+
+    # Make sure we have parallel trades
+    assert len(evaluate_result_multi(results["results"], "5m", 0)) > 0
+    # make sure we don't have trades with more than configured max_open_trades
+    assert len(evaluate_result_multi(results["results"], "5m", 1)) == 0
+
+    # Expect 26 results initially
+    assert len(results["results"]) == 30
+
+
 def test_backtest_start_timerange(default_conf, mocker, caplog, testdatadir):
    patch_exchange(mocker)
    mocker.patch("freqtrade.optimize.backtesting.Backtesting.backtest")