freqtrade_origin/freqtrade/optimize/analysis/recursive.py
2024-10-07 11:17:15 +09:00

208 lines
8.1 KiB
Python

import logging
import numbers
import shutil
from copy import deepcopy
from datetime import timedelta
from pathlib import Path
from typing import Any
from pandas import DataFrame
from freqtrade.exchange import timeframe_to_minutes
from freqtrade.loggers.set_log_levels import (
reduce_verbosity_for_bias_tester,
restore_verbosity_for_bias_tester,
)
from freqtrade.optimize.backtesting import Backtesting
from freqtrade.optimize.base_analysis import BaseAnalysis, VarHolder
from freqtrade.resolvers import StrategyResolver
logger = logging.getLogger(__name__)
def is_number(variable):
return isinstance(variable, numbers.Number) and not isinstance(variable, bool)
class RecursiveAnalysis(BaseAnalysis):
def __init__(self, config: dict[str, Any], strategy_obj: dict):
self._startup_candle = list(
map(int, config.get("startup_candle", [199, 399, 499, 999, 1999]))
)
super().__init__(config, strategy_obj)
strat = StrategyResolver.load_strategy(config)
self._strat_scc = strat.startup_candle_count
if self._strat_scc not in self._startup_candle:
self._startup_candle.append(self._strat_scc)
self._startup_candle.sort()
self.partial_varHolder_array: list[VarHolder] = []
self.partial_varHolder_lookahead_array: list[VarHolder] = []
self.dict_recursive: dict[str, Any] = dict()
# For recursive bias check
# analyzes two data frames with processed indicators and shows differences between them.
def analyze_indicators(self):
pair_to_check = self.local_config["pairs"][0]
logger.info("Start checking for recursive bias")
# check and report signals
base_last_row = self.full_varHolder.indicators[pair_to_check].iloc[-1]
for part in self.partial_varHolder_array:
part_last_row = part.indicators[pair_to_check].iloc[-1]
compare_df = base_last_row.compare(part_last_row)
if compare_df.shape[0] > 0:
# print(compare_df)
for col_name, values in compare_df.items():
# print(col_name)
if "other" == col_name:
continue
indicators = values.index
for indicator in indicators:
if indicator not in self.dict_recursive:
self.dict_recursive[indicator] = {}
values_diff = compare_df.loc[indicator]
values_diff_self = values_diff.loc["self"]
values_diff_other = values_diff.loc["other"]
if (
values_diff_self
and values_diff_other
and is_number(values_diff_self)
and is_number(values_diff_other)
):
diff = (values_diff_other - values_diff_self) / values_diff_self * 100
str_diff = f"{diff:.3f}%"
else:
str_diff = "NaN"
self.dict_recursive[indicator][part.startup_candle] = str_diff
else:
logger.info("No variance on indicator(s) found due to recursive formula.")
break
# For lookahead bias check
# analyzes two data frames with processed indicators and shows differences between them.
def analyze_indicators_lookahead(self):
pair_to_check = self.local_config["pairs"][0]
logger.info("Start checking for lookahead bias on indicators only")
part = self.partial_varHolder_lookahead_array[0]
part_last_row = part.indicators[pair_to_check].iloc[-1]
date_to_check = part_last_row["date"]
index_to_get = self.full_varHolder.indicators[pair_to_check]["date"] == date_to_check
base_row_check = self.full_varHolder.indicators[pair_to_check].loc[index_to_get].iloc[-1]
check_time = part.to_dt.strftime("%Y-%m-%dT%H:%M:%S")
logger.info(f"Check indicators at {check_time}")
# logger.info(f"vs {part_timerange} with {part.startup_candle} startup candle")
compare_df = base_row_check.compare(part_last_row)
if compare_df.shape[0] > 0:
# print(compare_df)
for col_name, values in compare_df.items():
# print(col_name)
if "other" == col_name:
continue
indicators = values.index
for indicator in indicators:
logger.info(f"=> found lookahead in indicator {indicator}")
# logger.info("base value {:.5f}".format(values_diff_self))
# logger.info("part value {:.5f}".format(values_diff_other))
else:
logger.info("No lookahead bias on indicators found.")
def prepare_data(self, varholder: VarHolder, pairs_to_load: list[DataFrame]):
if "freqai" in self.local_config and "identifier" in self.local_config["freqai"]:
# purge previous data if the freqai model is defined
# (to be sure nothing is carried over from older backtests)
path_to_current_identifier = Path(
f"{self.local_config['user_data_dir']}/models/"
f"{self.local_config['freqai']['identifier']}"
).resolve()
# remove folder and its contents
if Path.exists(path_to_current_identifier):
shutil.rmtree(path_to_current_identifier)
prepare_data_config = deepcopy(self.local_config)
prepare_data_config["timerange"] = (
str(self.dt_to_timestamp(varholder.from_dt))
+ "-"
+ str(self.dt_to_timestamp(varholder.to_dt))
)
prepare_data_config["exchange"]["pair_whitelist"] = pairs_to_load
backtesting = Backtesting(prepare_data_config, self.exchange)
self.exchange = backtesting.exchange
backtesting._set_strategy(backtesting.strategylist[0])
varholder.data, varholder.timerange = backtesting.load_bt_data()
backtesting.load_bt_data_detail()
varholder.timeframe = backtesting.timeframe
varholder.indicators = backtesting.strategy.advise_all_indicators(varholder.data)
def fill_partial_varholder(self, start_date, startup_candle):
logger.info(f"Calculating indicators using startup candle of {startup_candle}.")
partial_varHolder = VarHolder()
partial_varHolder.from_dt = start_date
partial_varHolder.to_dt = self.full_varHolder.to_dt
partial_varHolder.startup_candle = startup_candle
self.local_config["startup_candle_count"] = startup_candle
self.prepare_data(partial_varHolder, self.local_config["pairs"])
self.partial_varHolder_array.append(partial_varHolder)
def fill_partial_varholder_lookahead(self, end_date):
logger.info("Calculating indicators to test lookahead on indicators.")
partial_varHolder = VarHolder()
partial_varHolder.from_dt = self.full_varHolder.from_dt
partial_varHolder.to_dt = end_date
self.prepare_data(partial_varHolder, self.local_config["pairs"])
self.partial_varHolder_lookahead_array.append(partial_varHolder)
def start(self) -> None:
super().start()
reduce_verbosity_for_bias_tester()
start_date_full = self.full_varHolder.from_dt
end_date_full = self.full_varHolder.to_dt
timeframe_minutes = timeframe_to_minutes(self.full_varHolder.timeframe)
end_date_partial = start_date_full + timedelta(minutes=int(timeframe_minutes * 10))
self.fill_partial_varholder_lookahead(end_date_partial)
# restore_verbosity_for_bias_tester()
start_date_partial = end_date_full - timedelta(minutes=int(timeframe_minutes))
for startup_candle in self._startup_candle:
self.fill_partial_varholder(start_date_partial, startup_candle)
# Restore verbosity, so it's not too quiet for the next strategy
restore_verbosity_for_bias_tester()
self.analyze_indicators()
self.analyze_indicators_lookahead()