add recursive analysis

This commit is contained in:
Stefano Ariestasia 2023-09-04 10:53:04 +09:00
parent d2c0e9e438
commit b77f926cdd
5 changed files with 446 additions and 2 deletions

View File

@ -20,7 +20,8 @@ from freqtrade.commands.list_commands import (start_list_exchanges, start_list_f
start_list_timeframes, start_show_trades)
from freqtrade.commands.optimize_commands import (start_backtesting, start_backtesting_show,
start_edge, start_hyperopt,
start_lookahead_analysis)
start_lookahead_analysis,
start_recursive_analysis)
from freqtrade.commands.pairlist_commands import start_test_pairlist
from freqtrade.commands.plot_commands import start_plot_dataframe, start_plot_profit
from freqtrade.commands.strategy_utils_commands import start_strategy_update

View File

@ -122,6 +122,8 @@ ARGS_LOOKAHEAD_ANALYSIS = [
a for a in ARGS_BACKTEST if a not in ("position_stacking", "use_max_market_positions", 'cache')
] + ["minimum_trade_amount", "targeted_trade_amount", "lookahead_analysis_exportfilename"]
ARGS_RECURSIVE_ANALYSIS = ["timeframe", "timerange", "dataformat_ohlcv", "pairs", "startup_candle"]
class Arguments:
"""
@ -206,7 +208,7 @@ class Arguments:
start_list_strategies, start_list_timeframes,
start_lookahead_analysis, start_new_config,
start_new_strategy, start_plot_dataframe, start_plot_profit,
start_show_trades, start_strategy_update,
start_recursive_analysis, start_show_trades, start_strategy_update,
start_test_pairlist, start_trading, start_webserver)
subparsers = self.parser.add_subparsers(dest='command',
@ -467,3 +469,14 @@ class Arguments:
self._build_args(optionlist=ARGS_LOOKAHEAD_ANALYSIS,
parser=lookahead_analayis_cmd)
# Add recursive_analysis subcommand
recursive_analayis_cmd = subparsers.add_parser(
'recursive-analysis',
help="Check for potential look ahead bias.",
parents=[_common_parser, _strategy_parser])
recursive_analayis_cmd.set_defaults(func=start_recursive_analysis)
self._build_args(optionlist=ARGS_RECURSIVE_ANALYSIS,
parser=recursive_analayis_cmd)

View File

@ -144,3 +144,15 @@ def start_lookahead_analysis(args: Dict[str, Any]) -> None:
config = setup_utils_configuration(args, RunMode.UTIL_NO_EXCHANGE)
LookaheadAnalysisSubFunctions.start(config)
def start_recursive_analysis(args: Dict[str, Any]) -> None:
"""
Start the backtest recursive tester script
:param args: Cli args from Arguments()
:return: None
"""
from freqtrade.optimize.recursive_analysis_helpers import RecursiveAnalysisSubFunctions
config = setup_utils_configuration(args, RunMode.UTIL_NO_EXCHANGE)
RecursiveAnalysisSubFunctions.start(config)

View File

@ -0,0 +1,236 @@
import logging
import shutil
from copy import deepcopy
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
from pandas import DataFrame
from freqtrade.configuration import TimeRange
from freqtrade.data.history import get_timerange
from freqtrade.exchange import timeframe_to_minutes
from freqtrade.loggers.set_log_levels import (reduce_verbosity_for_bias_tester,
restore_verbosity_for_bias_tester)
from freqtrade.optimize.backtesting import Backtesting
logger = logging.getLogger(__name__)
class VarHolder:
timerange: TimeRange
data: DataFrame
indicators: Dict[str, DataFrame]
from_dt: datetime
to_dt: datetime
timeframe: str
startup_candle: int
class RecursiveAnalysis:
def __init__(self, config: Dict[str, Any], strategy_obj: Dict):
self.failed_bias_check = True
self.full_varHolder = VarHolder()
self.partial_varHolder_array = []
self.partial_varHolder_lookahead_array = []
self.entry_varHolders: List[VarHolder] = []
self.exit_varHolders: List[VarHolder] = []
self.exchange: Optional[Any] = None
# pull variables the scope of the recursive_analysis-instance
self.local_config = deepcopy(config)
self.local_config['strategy'] = strategy_obj['name']
self._startup_candle = config.get('startup_candle', [199, 399, 499, 999, 1999])
self.strategy_obj = strategy_obj
self.dict_recursive = dict()
@staticmethod
def dt_to_timestamp(dt: datetime):
timestamp = int(dt.replace(tzinfo=timezone.utc).timestamp())
return timestamp
# For recursive bias check
# analyzes two data frames with processed indicators and shows differences between them.
def analyze_indicators(self):
pair_to_check = self.local_config['pairs'][0]
logger.info(f"Start checking for recursive bias")
# check and report signals
base_last_row = self.full_varHolder.indicators[pair_to_check].iloc[-1]
base_timerange = self.full_varHolder.from_dt.strftime('%Y-%m-%dT%H:%M:%S') + "-" + self.full_varHolder.to_dt.strftime('%Y-%m-%dT%H:%M:%S')
for part in self.partial_varHolder_array:
part_last_row = part.indicators[pair_to_check].iloc[-1]
part_timerange = part.from_dt.strftime('%Y-%m-%dT%H:%M:%S') + "-" + part.to_dt.strftime('%Y-%m-%dT%H:%M:%S')
logger.info(f"Comparing last row of {base_timerange} backtest")
logger.info(f"vs {part_timerange} with {part.startup_candle} startup candle")
compare_df = base_last_row.compare(part_last_row)
if compare_df.shape[0] > 0:
# print(compare_df)
for col_name, values in compare_df.items():
# print(col_name)
if 'other' == col_name:
continue
indicators = values.index
for indicator in indicators:
if(indicator not in self.dict_recursive):
self.dict_recursive[indicator] = {}
values_diff = compare_df.loc[indicator]
values_diff_self = values_diff.loc['self']
values_diff_other = values_diff.loc['other']
difference = (values_diff_other - values_diff_self) / values_diff_self * 100
self.dict_recursive[indicator][part.startup_candle] = "{:.3f}%".format(difference)
# logger.info(f"=> found difference in indicator "
# f"{indicator}, with difference of "
# "{:.8f}%".format(difference))
else:
logger.info("No difference found. Stop the process.")
break
# For lookahead bias check
# analyzes two data frames with processed indicators and shows differences between them.
def analyze_indicators_lookahead(self):
pair_to_check = self.local_config['pairs'][0]
logger.info(f"Start checking for lookahead bias")
# check and report signals
# base_last_row = self.full_varHolder.indicators[pair_to_check].iloc[-1]
# base_timerange = self.full_varHolder.from_dt.strftime('%Y-%m-%dT%H:%M:%S') + "-" + self.full_varHolder.to_dt.strftime('%Y-%m-%dT%H:%M:%S')
part = self.partial_varHolder_lookahead_array[0]
part_last_row = part.indicators[pair_to_check].iloc[-1]
date_to_check = part_last_row['date']
base_row_to_check = self.full_varHolder.indicators[pair_to_check].loc[(self.full_varHolder.indicators[pair_to_check]['date'] == date_to_check)].iloc[-1]
check_time = part.to_dt.strftime('%Y-%m-%dT%H:%M:%S')
logger.info(f"Check indicators at {check_time}")
# logger.info(f"vs {part_timerange} with {part.startup_candle} startup candle")
compare_df = base_row_to_check.compare(part_last_row)
if compare_df.shape[0] > 0:
# print(compare_df)
for col_name, values in compare_df.items():
# print(col_name)
if 'other' == col_name:
continue
indicators = values.index
for indicator in indicators:
logger.info(f"=> found lookahead in indicator {indicator}")
# logger.info("base value {:.5f}".format(values_diff_self))
# logger.info("part value {:.5f}".format(values_diff_other))
else:
logger.info("No lookahead bias found. Stop the process.")
def prepare_data(self, varholder: VarHolder, pairs_to_load: List[DataFrame]):
if 'freqai' in self.local_config and 'identifier' in self.local_config['freqai']:
# purge previous data if the freqai model is defined
# (to be sure nothing is carried over from older backtests)
path_to_current_identifier = (
Path(f"{self.local_config['user_data_dir']}/models/"
f"{self.local_config['freqai']['identifier']}").resolve())
# remove folder and its contents
if Path.exists(path_to_current_identifier):
shutil.rmtree(path_to_current_identifier)
prepare_data_config = deepcopy(self.local_config)
prepare_data_config['timerange'] = (str(self.dt_to_timestamp(varholder.from_dt)) + "-" +
str(self.dt_to_timestamp(varholder.to_dt)))
prepare_data_config['exchange']['pair_whitelist'] = pairs_to_load
backtesting = Backtesting(prepare_data_config, self.exchange)
self.exchange = backtesting.exchange
backtesting._set_strategy(backtesting.strategylist[0])
varholder.data, varholder.timerange = backtesting.load_bt_data()
backtesting.load_bt_data_detail()
varholder.timeframe = backtesting.timeframe
varholder.indicators = backtesting.strategy.advise_all_indicators(varholder.data)
def fill_full_varholder(self):
self.full_varHolder = VarHolder()
# define datetime in human-readable format
parsed_timerange = TimeRange.parse_timerange(self.local_config['timerange'])
if parsed_timerange.startdt is None:
self.full_varHolder.from_dt = datetime.fromtimestamp(0, tz=timezone.utc)
else:
self.full_varHolder.from_dt = parsed_timerange.startdt
if parsed_timerange.stopdt is None:
self.full_varHolder.to_dt = datetime.utcnow()
else:
self.full_varHolder.to_dt = parsed_timerange.stopdt
self.prepare_data(self.full_varHolder, self.local_config['pairs'])
def fill_partial_varholder(self, start_date, startup_candle):
partial_varHolder = VarHolder()
partial_varHolder.from_dt = start_date
partial_varHolder.to_dt = self.full_varHolder.to_dt
partial_varHolder.startup_candle = startup_candle
self.local_config['startup_candle_count'] = startup_candle
self.prepare_data(partial_varHolder, self.local_config['pairs'])
self.partial_varHolder_array.append(partial_varHolder)
def fill_partial_varholder_lookahead(self, end_date):
partial_varHolder = VarHolder()
partial_varHolder.from_dt = self.full_varHolder.from_dt
partial_varHolder.to_dt = end_date
# partial_varHolder.startup_candle = startup_candle
# self.local_config['startup_candle_count'] = startup_candle
self.prepare_data(partial_varHolder, self.local_config['pairs'])
self.partial_varHolder_lookahead_array.append(partial_varHolder)
def start(self) -> None:
# first make a single backtest
self.fill_full_varholder()
reduce_verbosity_for_bias_tester()
start_date_full = self.full_varHolder.from_dt
end_date_full = self.full_varHolder.to_dt
timeframe_minutes = timeframe_to_minutes(self.full_varHolder.timeframe)
end_date_partial = start_date_full + timedelta(minutes=int(timeframe_minutes * 10))
self.fill_partial_varholder_lookahead(end_date_partial)
# restore_verbosity_for_bias_tester()
start_date_partial = end_date_full - timedelta(minutes=int(timeframe_minutes))
for startup_candle in self._startup_candle:
self.fill_partial_varholder(start_date_partial, int(startup_candle))
# Restore verbosity, so it's not too quiet for the next strategy
restore_verbosity_for_bias_tester()
self.analyze_indicators()
self.analyze_indicators_lookahead()

View File

@ -0,0 +1,182 @@
import logging
import time
from pathlib import Path
from typing import Any, Dict, List
import pandas as pd
from freqtrade.constants import Config
from freqtrade.exceptions import OperationalException
from freqtrade.optimize.recursive_analysis import RecursiveAnalysis
from freqtrade.resolvers import StrategyResolver
logger = logging.getLogger(__name__)
class RecursiveAnalysisSubFunctions:
@staticmethod
def text_table_recursive_analysis_instances(
config: Dict[str, Any],
recursive_instances: List[RecursiveAnalysis]):
startups = recursive_instances[0]._startup_candle
headers = ['strategy', 'indicators']
for candle in startups:
headers.append(candle)
data = []
for inst in recursive_instances:
if len(inst.dict_recursive) > 0:
for indicator, values in inst.dict_recursive.items():
temp_data = [inst.strategy_obj['name'], indicator]
for candle in startups:
temp_data.append(values.get(int(candle), '-'))
data.append(temp_data)
from tabulate import tabulate
table = tabulate(data, headers=headers, tablefmt="orgtbl")
print(table)
return table, headers, data
@staticmethod
def export_to_csv(config: Dict[str, Any], lookahead_analysis: List[RecursiveAnalysis]):
def add_or_update_row(df, row_data):
if (
(df['filename'] == row_data['filename']) &
(df['strategy'] == row_data['strategy'])
).any():
# Update existing row
pd_series = pd.DataFrame([row_data])
df.loc[
(df['filename'] == row_data['filename']) &
(df['strategy'] == row_data['strategy'])
] = pd_series
else:
# Add new row
df = pd.concat([df, pd.DataFrame([row_data], columns=df.columns)])
return df
if Path(config['lookahead_analysis_exportfilename']).exists():
# Read CSV file into a pandas dataframe
csv_df = pd.read_csv(config['lookahead_analysis_exportfilename'])
else:
# Create a new empty DataFrame with the desired column names and set the index
csv_df = pd.DataFrame(columns=[
'filename', 'strategy', 'has_bias', 'total_signals',
'biased_entry_signals', 'biased_exit_signals', 'biased_indicators'
],
index=None)
for inst in lookahead_analysis:
# only update if
if (inst.current_analysis.total_signals > config['minimum_trade_amount']
and inst.failed_bias_check is not True):
new_row_data = {'filename': inst.strategy_obj['location'].parts[-1],
'strategy': inst.strategy_obj['name'],
'has_bias': inst.current_analysis.has_bias,
'total_signals':
int(inst.current_analysis.total_signals),
'biased_entry_signals':
int(inst.current_analysis.false_entry_signals),
'biased_exit_signals':
int(inst.current_analysis.false_exit_signals),
'biased_indicators':
",".join(inst.current_analysis.false_indicators)}
csv_df = add_or_update_row(csv_df, new_row_data)
# Fill NaN values with a default value (e.g., 0)
csv_df['total_signals'] = csv_df['total_signals'].fillna(0)
csv_df['biased_entry_signals'] = csv_df['biased_entry_signals'].fillna(0)
csv_df['biased_exit_signals'] = csv_df['biased_exit_signals'].fillna(0)
# Convert columns to integers
csv_df['total_signals'] = csv_df['total_signals'].astype(int)
csv_df['biased_entry_signals'] = csv_df['biased_entry_signals'].astype(int)
csv_df['biased_exit_signals'] = csv_df['biased_exit_signals'].astype(int)
logger.info(f"saving {config['lookahead_analysis_exportfilename']}")
csv_df.to_csv(config['lookahead_analysis_exportfilename'], index=False)
@staticmethod
def calculate_config_overrides(config: Config):
if config['targeted_trade_amount'] < config['minimum_trade_amount']:
# this combo doesn't make any sense.
raise OperationalException(
"Targeted trade amount can't be smaller than minimum trade amount."
)
if len(config['pairs']) > config['max_open_trades']:
logger.info('Max_open_trades were less than amount of pairs. '
'Set max_open_trades to amount of pairs just to avoid false positives.')
config['max_open_trades'] = len(config['pairs'])
min_dry_run_wallet = 1000000000
if config['dry_run_wallet'] < min_dry_run_wallet:
logger.info('Dry run wallet was not set to 1 billion, pushing it up there '
'just to avoid false positives')
config['dry_run_wallet'] = min_dry_run_wallet
# enforce cache to be 'none', shift it to 'none' if not already
# (since the default value is 'day')
if config.get('backtest_cache') is None:
config['backtest_cache'] = 'none'
elif config['backtest_cache'] != 'none':
logger.info(f"backtest_cache = "
f"{config['backtest_cache']} detected. "
f"Inside lookahead-analysis it is enforced to be 'none'. "
f"Changed it to 'none'")
config['backtest_cache'] = 'none'
return config
@staticmethod
def initialize_single_recursive_analysis(config: Config, strategy_obj: Dict[str, Any]):
logger.info(f"Recursive test of {Path(strategy_obj['location']).name} started.")
start = time.perf_counter()
current_instance = RecursiveAnalysis(config, strategy_obj)
current_instance.start()
elapsed = time.perf_counter() - start
logger.info(f"Checking recursive and lookahead bias of indicators "
f"of {Path(strategy_obj['location']).name} "
f"took {elapsed:.0f} seconds.")
return current_instance
@staticmethod
def start(config: Config):
config = RecursiveAnalysisSubFunctions.calculate_config_overrides(config)
strategy_objs = StrategyResolver.search_all_objects(
config, enum_failed=False, recursive=config.get('recursive_strategy_search', False))
RecursiveAnalysis_instances = []
# unify --strategy and --strategy_list to one list
if not (strategy_list := config.get('strategy_list', [])):
if config.get('strategy') is None:
raise OperationalException(
"No Strategy specified. Please specify a strategy via --strategy or "
"--strategy_list"
)
strategy_list = [config['strategy']]
# check if strategies can be properly loaded, only check them if they can be.
for strat in strategy_list:
for strategy_obj in strategy_objs:
if strategy_obj['name'] == strat and strategy_obj not in strategy_list:
RecursiveAnalysis_instances.append(
RecursiveAnalysisSubFunctions.initialize_single_recursive_analysis(
config, strategy_obj))
break
# report the results
if RecursiveAnalysis_instances:
RecursiveAnalysisSubFunctions.text_table_recursive_analysis_instances(
config, RecursiveAnalysis_instances)
if config.get('lookahead_analysis_exportfilename') is not None:
RecursiveAnalysisSubFunctions.export_to_csv(config, RecursiveAnalysis_instances)
else:
logger.error("There were no strategies specified neither through "
"--strategy nor through "
"--strategy_list "
"or timeframe was not specified.")