ruff format: freqtrade.data

This commit is contained in:
Matthias 2024-05-12 17:41:55 +02:00
parent 801ab4acc9
commit fea1653e31
16 changed files with 1195 additions and 917 deletions

View File

@ -3,6 +3,4 @@ Module to handle data operations for freqtrade
""" """
# limit what's imported when using `from freqtrade.data import *` # limit what's imported when using `from freqtrade.data import *`
__all__ = [ __all__ = ["converter"]
'converter'
]

View File

@ -1,6 +1,7 @@
""" """
Helpers when analyzing backtest data Helpers when analyzing backtest data
""" """
import logging import logging
from copy import copy from copy import copy
from datetime import datetime, timezone from datetime import datetime, timezone
@ -21,14 +22,35 @@ from freqtrade.types import BacktestHistoryEntryType, BacktestResultType
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Newest format # Newest format
BT_DATA_COLUMNS = ['pair', 'stake_amount', 'max_stake_amount', 'amount', BT_DATA_COLUMNS = [
'open_date', 'close_date', 'open_rate', 'close_rate', "pair",
'fee_open', 'fee_close', 'trade_duration', "stake_amount",
'profit_ratio', 'profit_abs', 'exit_reason', "max_stake_amount",
'initial_stop_loss_abs', 'initial_stop_loss_ratio', 'stop_loss_abs', "amount",
'stop_loss_ratio', 'min_rate', 'max_rate', 'is_open', 'enter_tag', "open_date",
'leverage', 'is_short', 'open_timestamp', 'close_timestamp', 'orders' "close_date",
] "open_rate",
"close_rate",
"fee_open",
"fee_close",
"trade_duration",
"profit_ratio",
"profit_abs",
"exit_reason",
"initial_stop_loss_abs",
"initial_stop_loss_ratio",
"stop_loss_abs",
"stop_loss_ratio",
"min_rate",
"max_rate",
"is_open",
"enter_tag",
"leverage",
"is_short",
"open_timestamp",
"close_timestamp",
"orders",
]
def get_latest_optimize_filename(directory: Union[Path, str], variant: str) -> str: def get_latest_optimize_filename(directory: Union[Path, str], variant: str) -> str:
@ -50,15 +72,16 @@ def get_latest_optimize_filename(directory: Union[Path, str], variant: str) -> s
if not filename.is_file(): if not filename.is_file():
raise ValueError( raise ValueError(
f"Directory '{directory}' does not seem to contain backtest statistics yet.") f"Directory '{directory}' does not seem to contain backtest statistics yet."
)
with filename.open() as file: with filename.open() as file:
data = json_load(file) data = json_load(file)
if f'latest_{variant}' not in data: if f"latest_{variant}" not in data:
raise ValueError(f"Invalid '{LAST_BT_RESULT_FN}' format.") raise ValueError(f"Invalid '{LAST_BT_RESULT_FN}' format.")
return data[f'latest_{variant}'] return data[f"latest_{variant}"]
def get_latest_backtest_filename(directory: Union[Path, str]) -> str: def get_latest_backtest_filename(directory: Union[Path, str]) -> str:
@ -71,7 +94,7 @@ def get_latest_backtest_filename(directory: Union[Path, str]) -> str:
* `directory/.last_result.json` does not exist * `directory/.last_result.json` does not exist
* `directory/.last_result.json` has the wrong content * `directory/.last_result.json` has the wrong content
""" """
return get_latest_optimize_filename(directory, 'backtest') return get_latest_optimize_filename(directory, "backtest")
def get_latest_hyperopt_filename(directory: Union[Path, str]) -> str: def get_latest_hyperopt_filename(directory: Union[Path, str]) -> str:
@ -85,14 +108,15 @@ def get_latest_hyperopt_filename(directory: Union[Path, str]) -> str:
* `directory/.last_result.json` has the wrong content * `directory/.last_result.json` has the wrong content
""" """
try: try:
return get_latest_optimize_filename(directory, 'hyperopt') return get_latest_optimize_filename(directory, "hyperopt")
except ValueError: except ValueError:
# Return default (legacy) pickle filename # Return default (legacy) pickle filename
return 'hyperopt_results.pickle' return "hyperopt_results.pickle"
def get_latest_hyperopt_file( def get_latest_hyperopt_file(
directory: Union[Path, str], predef_filename: Optional[str] = None) -> Path: directory: Union[Path, str], predef_filename: Optional[str] = None
) -> Path:
""" """
Get latest hyperopt export based on '.last_result.json'. Get latest hyperopt export based on '.last_result.json'.
:param directory: Directory to search for last result :param directory: Directory to search for last result
@ -107,7 +131,8 @@ def get_latest_hyperopt_file(
if predef_filename: if predef_filename:
if Path(predef_filename).is_absolute(): if Path(predef_filename).is_absolute():
raise ConfigurationError( raise ConfigurationError(
"--hyperopt-filename expects only the filename, not an absolute path.") "--hyperopt-filename expects only the filename, not an absolute path."
)
return directory / predef_filename return directory / predef_filename
return directory / get_latest_hyperopt_filename(directory) return directory / get_latest_hyperopt_filename(directory)
@ -126,7 +151,7 @@ def load_backtest_metadata(filename: Union[Path, str]) -> Dict[str, Any]:
except FileNotFoundError: except FileNotFoundError:
return {} return {}
except Exception as e: except Exception as e:
raise OperationalException('Unexpected error while loading backtest metadata.') from e raise OperationalException("Unexpected error while loading backtest metadata.") from e
def load_backtest_stats(filename: Union[Path, str]) -> BacktestResultType: def load_backtest_stats(filename: Union[Path, str]) -> BacktestResultType:
@ -147,7 +172,7 @@ def load_backtest_stats(filename: Union[Path, str]) -> BacktestResultType:
# Legacy list format does not contain metadata. # Legacy list format does not contain metadata.
if isinstance(data, dict): if isinstance(data, dict):
data['metadata'] = load_backtest_metadata(filename) data["metadata"] = load_backtest_metadata(filename)
return data return data
@ -159,38 +184,39 @@ def load_and_merge_backtest_result(strategy_name: str, filename: Path, results:
:param results: dict to merge the result to. :param results: dict to merge the result to.
""" """
bt_data = load_backtest_stats(filename) bt_data = load_backtest_stats(filename)
k: Literal['metadata', 'strategy'] k: Literal["metadata", "strategy"]
for k in ('metadata', 'strategy'): # type: ignore for k in ("metadata", "strategy"): # type: ignore
results[k][strategy_name] = bt_data[k][strategy_name] results[k][strategy_name] = bt_data[k][strategy_name]
results['metadata'][strategy_name]['filename'] = filename.stem results["metadata"][strategy_name]["filename"] = filename.stem
comparison = bt_data['strategy_comparison'] comparison = bt_data["strategy_comparison"]
for i in range(len(comparison)): for i in range(len(comparison)):
if comparison[i]['key'] == strategy_name: if comparison[i]["key"] == strategy_name:
results['strategy_comparison'].append(comparison[i]) results["strategy_comparison"].append(comparison[i])
break break
def _get_backtest_files(dirname: Path) -> List[Path]: def _get_backtest_files(dirname: Path) -> List[Path]:
# Weird glob expression here avoids including .meta.json files. # Weird glob expression here avoids including .meta.json files.
return list(reversed(sorted(dirname.glob('backtest-result-*-[0-9][0-9].json')))) return list(reversed(sorted(dirname.glob("backtest-result-*-[0-9][0-9].json"))))
def _extract_backtest_result(filename: Path) -> List[BacktestHistoryEntryType]: def _extract_backtest_result(filename: Path) -> List[BacktestHistoryEntryType]:
metadata = load_backtest_metadata(filename) metadata = load_backtest_metadata(filename)
return [ return [
{ {
'filename': filename.stem, "filename": filename.stem,
'strategy': s, "strategy": s,
'run_id': v['run_id'], "run_id": v["run_id"],
'notes': v.get('notes', ''), "notes": v.get("notes", ""),
# Backtest "run" time # Backtest "run" time
'backtest_start_time': v['backtest_start_time'], "backtest_start_time": v["backtest_start_time"],
# Backtest timerange # Backtest timerange
'backtest_start_ts': v.get('backtest_start_ts', None), "backtest_start_ts": v.get("backtest_start_ts", None),
'backtest_end_ts': v.get('backtest_end_ts', None), "backtest_end_ts": v.get("backtest_end_ts", None),
'timeframe': v.get('timeframe', None), "timeframe": v.get("timeframe", None),
'timeframe_detail': v.get('timeframe_detail', None), "timeframe_detail": v.get("timeframe_detail", None),
} for s, v in metadata.items() }
for s, v in metadata.items()
] ]
@ -218,7 +244,7 @@ def delete_backtest_result(file_abs: Path):
""" """
# *.meta.json # *.meta.json
logger.info(f"Deleting backtest result file: {file_abs.name}") logger.info(f"Deleting backtest result file: {file_abs.name}")
file_abs_meta = file_abs.with_suffix('.meta.json') file_abs_meta = file_abs.with_suffix(".meta.json")
file_abs.unlink() file_abs.unlink()
file_abs_meta.unlink() file_abs_meta.unlink()
@ -244,12 +270,13 @@ def get_backtest_market_change(filename: Path, include_ts: bool = True) -> pd.Da
""" """
df = pd.read_feather(filename) df = pd.read_feather(filename)
if include_ts: if include_ts:
df.loc[:, '__date_ts'] = df.loc[:, 'date'].astype(np.int64) // 1000 // 1000 df.loc[:, "__date_ts"] = df.loc[:, "date"].astype(np.int64) // 1000 // 1000
return df return df
def find_existing_backtest_stats(dirname: Union[Path, str], run_ids: Dict[str, str], def find_existing_backtest_stats(
min_backtest_date: Optional[datetime] = None) -> Dict[str, Any]: dirname: Union[Path, str], run_ids: Dict[str, str], min_backtest_date: Optional[datetime] = None
) -> Dict[str, Any]:
""" """
Find existing backtest stats that match specified run IDs and load them. Find existing backtest stats that match specified run IDs and load them.
:param dirname: pathlib.Path object, or string pointing to the file. :param dirname: pathlib.Path object, or string pointing to the file.
@ -261,9 +288,9 @@ def find_existing_backtest_stats(dirname: Union[Path, str], run_ids: Dict[str, s
run_ids = copy(run_ids) run_ids = copy(run_ids)
dirname = Path(dirname) dirname = Path(dirname)
results: Dict[str, Any] = { results: Dict[str, Any] = {
'metadata': {}, "metadata": {},
'strategy': {}, "strategy": {},
'strategy_comparison': [], "strategy_comparison": [],
} }
for filename in _get_backtest_files(dirname): for filename in _get_backtest_files(dirname):
@ -280,14 +307,14 @@ def find_existing_backtest_stats(dirname: Union[Path, str], run_ids: Dict[str, s
continue continue
if min_backtest_date is not None: if min_backtest_date is not None:
backtest_date = strategy_metadata['backtest_start_time'] backtest_date = strategy_metadata["backtest_start_time"]
backtest_date = datetime.fromtimestamp(backtest_date, tz=timezone.utc) backtest_date = datetime.fromtimestamp(backtest_date, tz=timezone.utc)
if backtest_date < min_backtest_date: if backtest_date < min_backtest_date:
# Do not use a cached result for this strategy as first result is too old. # Do not use a cached result for this strategy as first result is too old.
del run_ids[strategy_name] del run_ids[strategy_name]
continue continue
if strategy_metadata['run_id'] == run_id: if strategy_metadata["run_id"] == run_id:
del run_ids[strategy_name] del run_ids[strategy_name]
load_and_merge_backtest_result(strategy_name, filename, results) load_and_merge_backtest_result(strategy_name, filename, results)
@ -300,20 +327,20 @@ def _load_backtest_data_df_compatibility(df: pd.DataFrame) -> pd.DataFrame:
""" """
Compatibility support for older backtest data. Compatibility support for older backtest data.
""" """
df['open_date'] = pd.to_datetime(df['open_date'], utc=True) df["open_date"] = pd.to_datetime(df["open_date"], utc=True)
df['close_date'] = pd.to_datetime(df['close_date'], utc=True) df["close_date"] = pd.to_datetime(df["close_date"], utc=True)
# Compatibility support for pre short Columns # Compatibility support for pre short Columns
if 'is_short' not in df.columns: if "is_short" not in df.columns:
df['is_short'] = False df["is_short"] = False
if 'leverage' not in df.columns: if "leverage" not in df.columns:
df['leverage'] = 1.0 df["leverage"] = 1.0
if 'enter_tag' not in df.columns: if "enter_tag" not in df.columns:
df['enter_tag'] = df['buy_tag'] df["enter_tag"] = df["buy_tag"]
df = df.drop(['buy_tag'], axis=1) df = df.drop(["buy_tag"], axis=1)
if 'max_stake_amount' not in df.columns: if "max_stake_amount" not in df.columns:
df['max_stake_amount'] = df['stake_amount'] df["max_stake_amount"] = df["stake_amount"]
if 'orders' not in df.columns: if "orders" not in df.columns:
df['orders'] = None df["orders"] = None
return df return df
@ -329,23 +356,25 @@ def load_backtest_data(filename: Union[Path, str], strategy: Optional[str] = Non
data = load_backtest_stats(filename) data = load_backtest_stats(filename)
if not isinstance(data, list): if not isinstance(data, list):
# new, nested format # new, nested format
if 'strategy' not in data: if "strategy" not in data:
raise ValueError("Unknown dataformat.") raise ValueError("Unknown dataformat.")
if not strategy: if not strategy:
if len(data['strategy']) == 1: if len(data["strategy"]) == 1:
strategy = list(data['strategy'].keys())[0] strategy = list(data["strategy"].keys())[0]
else: else:
raise ValueError("Detected backtest result with more than one strategy. " raise ValueError(
"Please specify a strategy.") "Detected backtest result with more than one strategy. "
"Please specify a strategy."
)
if strategy not in data['strategy']: if strategy not in data["strategy"]:
raise ValueError( raise ValueError(
f"Strategy {strategy} not available in the backtest result. " f"Strategy {strategy} not available in the backtest result. "
f"Available strategies are '{','.join(data['strategy'].keys())}'" f"Available strategies are '{','.join(data['strategy'].keys())}'"
) )
data = data['strategy'][strategy]['trades'] data = data["strategy"][strategy]["trades"]
df = pd.DataFrame(data) df = pd.DataFrame(data)
if not df.empty: if not df.empty:
df = _load_backtest_data_df_compatibility(df) df = _load_backtest_data_df_compatibility(df)
@ -353,7 +382,8 @@ def load_backtest_data(filename: Union[Path, str], strategy: Optional[str] = Non
else: else:
# old format - only with lists. # old format - only with lists.
raise OperationalException( raise OperationalException(
"Backtest-results with only trades data are no longer supported.") "Backtest-results with only trades data are no longer supported."
)
if not df.empty: if not df.empty:
df = df.sort_values("open_date").reset_index(drop=True) df = df.sort_values("open_date").reset_index(drop=True)
return df return df
@ -368,23 +398,26 @@ def analyze_trade_parallelism(results: pd.DataFrame, timeframe: str) -> pd.DataF
:return: dataframe with open-counts per time-period in timeframe :return: dataframe with open-counts per time-period in timeframe
""" """
from freqtrade.exchange import timeframe_to_resample_freq from freqtrade.exchange import timeframe_to_resample_freq
timeframe_freq = timeframe_to_resample_freq(timeframe) timeframe_freq = timeframe_to_resample_freq(timeframe)
dates = [pd.Series(pd.date_range(row[1]['open_date'], row[1]['close_date'], dates = [
freq=timeframe_freq)) pd.Series(pd.date_range(row[1]["open_date"], row[1]["close_date"], freq=timeframe_freq))
for row in results[['open_date', 'close_date']].iterrows()] for row in results[["open_date", "close_date"]].iterrows()
]
deltas = [len(x) for x in dates] deltas = [len(x) for x in dates]
dates = pd.Series(pd.concat(dates).values, name='date') dates = pd.Series(pd.concat(dates).values, name="date")
df2 = pd.DataFrame(np.repeat(results.values, deltas, axis=0), columns=results.columns) df2 = pd.DataFrame(np.repeat(results.values, deltas, axis=0), columns=results.columns)
df2 = pd.concat([dates, df2], axis=1) df2 = pd.concat([dates, df2], axis=1)
df2 = df2.set_index('date') df2 = df2.set_index("date")
df_final = df2.resample(timeframe_freq)[['pair']].count() df_final = df2.resample(timeframe_freq)[["pair"]].count()
df_final = df_final.rename({'pair': 'open_trades'}, axis=1) df_final = df_final.rename({"pair": "open_trades"}, axis=1)
return df_final return df_final
def evaluate_result_multi(results: pd.DataFrame, timeframe: str, def evaluate_result_multi(
max_open_trades: IntOrInf) -> pd.DataFrame: results: pd.DataFrame, timeframe: str, max_open_trades: IntOrInf
) -> pd.DataFrame:
""" """
Find overlapping trades by expanding each trade once per period it was open Find overlapping trades by expanding each trade once per period it was open
and then counting overlaps and then counting overlaps
@ -394,7 +427,7 @@ def evaluate_result_multi(results: pd.DataFrame, timeframe: str,
:return: dataframe with open-counts per time-period in freq :return: dataframe with open-counts per time-period in freq
""" """
df_final = analyze_trade_parallelism(results, timeframe) df_final = analyze_trade_parallelism(results, timeframe)
return df_final[df_final['open_trades'] > max_open_trades] return df_final[df_final["open_trades"] > max_open_trades]
def trade_list_to_dataframe(trades: Union[List[Trade], List[LocalTrade]]) -> pd.DataFrame: def trade_list_to_dataframe(trades: Union[List[Trade], List[LocalTrade]]) -> pd.DataFrame:
@ -405,9 +438,9 @@ def trade_list_to_dataframe(trades: Union[List[Trade], List[LocalTrade]]) -> pd.
""" """
df = pd.DataFrame.from_records([t.to_json(True) for t in trades], columns=BT_DATA_COLUMNS) df = pd.DataFrame.from_records([t.to_json(True) for t in trades], columns=BT_DATA_COLUMNS)
if len(df) > 0: if len(df) > 0:
df['close_date'] = pd.to_datetime(df['close_date'], utc=True) df["close_date"] = pd.to_datetime(df["close_date"], utc=True)
df['open_date'] = pd.to_datetime(df['open_date'], utc=True) df["open_date"] = pd.to_datetime(df["open_date"], utc=True)
df['close_rate'] = df['close_rate'].astype('float64') df["close_rate"] = df["close_rate"].astype("float64")
return df return df
@ -429,8 +462,13 @@ def load_trades_from_db(db_url: str, strategy: Optional[str] = None) -> pd.DataF
return trades return trades
def load_trades(source: str, db_url: str, exportfilename: Path, def load_trades(
no_trades: bool = False, strategy: Optional[str] = None) -> pd.DataFrame: source: str,
db_url: str,
exportfilename: Path,
no_trades: bool = False,
strategy: Optional[str] = None,
) -> pd.DataFrame:
""" """
Based on configuration option 'trade_source': Based on configuration option 'trade_source':
* loads data from DB (using `db_url`) * loads data from DB (using `db_url`)
@ -451,8 +489,9 @@ def load_trades(source: str, db_url: str, exportfilename: Path,
return load_backtest_data(exportfilename, strategy) return load_backtest_data(exportfilename, strategy)
def extract_trades_of_period(dataframe: pd.DataFrame, trades: pd.DataFrame, def extract_trades_of_period(
date_index=False) -> pd.DataFrame: dataframe: pd.DataFrame, trades: pd.DataFrame, date_index=False
) -> pd.DataFrame:
""" """
Compare trades and backtested pair DataFrames to get trades performed on backtested period Compare trades and backtested pair DataFrames to get trades performed on backtested period
:return: the DataFrame of a trades of period :return: the DataFrame of a trades of period
@ -461,8 +500,9 @@ def extract_trades_of_period(dataframe: pd.DataFrame, trades: pd.DataFrame,
trades_start = dataframe.index[0] trades_start = dataframe.index[0]
trades_stop = dataframe.index[-1] trades_stop = dataframe.index[-1]
else: else:
trades_start = dataframe.iloc[0]['date'] trades_start = dataframe.iloc[0]["date"]
trades_stop = dataframe.iloc[-1]['date'] trades_stop = dataframe.iloc[-1]["date"]
trades = trades.loc[(trades['open_date'] >= trades_start) & trades = trades.loc[
(trades['close_date'] <= trades_stop)] (trades["open_date"] >= trades_start) & (trades["close_date"] <= trades_stop)
]
return trades return trades

View File

@ -20,19 +20,19 @@ from freqtrade.data.converter.trade_converter import (
__all__ = [ __all__ = [
'clean_ohlcv_dataframe', "clean_ohlcv_dataframe",
'convert_ohlcv_format', "convert_ohlcv_format",
'ohlcv_fill_up_missing_data', "ohlcv_fill_up_missing_data",
'ohlcv_to_dataframe', "ohlcv_to_dataframe",
'order_book_to_dataframe', "order_book_to_dataframe",
'reduce_dataframe_footprint', "reduce_dataframe_footprint",
'trim_dataframe', "trim_dataframe",
'trim_dataframes', "trim_dataframes",
'convert_trades_format', "convert_trades_format",
'convert_trades_to_ohlcv', "convert_trades_to_ohlcv",
'trades_convert_types', "trades_convert_types",
'trades_df_remove_duplicates', "trades_df_remove_duplicates",
'trades_dict_to_list', "trades_dict_to_list",
'trades_list_to_df', "trades_list_to_df",
'trades_to_ohlcv', "trades_to_ohlcv",
] ]

View File

@ -1,6 +1,7 @@
""" """
Functions to convert data from one format to another Functions to convert data from one format to another
""" """
import logging import logging
from typing import Dict from typing import Dict
@ -15,8 +16,14 @@ from freqtrade.enums import CandleType, TradingMode
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def ohlcv_to_dataframe(ohlcv: list, timeframe: str, pair: str, *, def ohlcv_to_dataframe(
fill_missing: bool = True, drop_incomplete: bool = True) -> DataFrame: ohlcv: list,
timeframe: str,
pair: str,
*,
fill_missing: bool = True,
drop_incomplete: bool = True,
) -> DataFrame:
""" """
Converts a list with candle (OHLCV) data (in format returned by ccxt.fetch_ohlcv) Converts a list with candle (OHLCV) data (in format returned by ccxt.fetch_ohlcv)
to a Dataframe to a Dataframe
@ -32,20 +39,28 @@ def ohlcv_to_dataframe(ohlcv: list, timeframe: str, pair: str, *,
cols = DEFAULT_DATAFRAME_COLUMNS cols = DEFAULT_DATAFRAME_COLUMNS
df = DataFrame(ohlcv, columns=cols) df = DataFrame(ohlcv, columns=cols)
df['date'] = to_datetime(df['date'], unit='ms', utc=True) df["date"] = to_datetime(df["date"], unit="ms", utc=True)
# Some exchanges return int values for Volume and even for OHLC. # Some exchanges return int values for Volume and even for OHLC.
# Convert them since TA-LIB indicators used in the strategy assume floats # Convert them since TA-LIB indicators used in the strategy assume floats
# and fail with exception... # and fail with exception...
df = df.astype(dtype={'open': 'float', 'high': 'float', 'low': 'float', 'close': 'float', df = df.astype(
'volume': 'float'}) dtype={
return clean_ohlcv_dataframe(df, timeframe, pair, "open": "float",
fill_missing=fill_missing, "high": "float",
drop_incomplete=drop_incomplete) "low": "float",
"close": "float",
"volume": "float",
}
)
return clean_ohlcv_dataframe(
df, timeframe, pair, fill_missing=fill_missing, drop_incomplete=drop_incomplete
)
def clean_ohlcv_dataframe(data: DataFrame, timeframe: str, pair: str, *, def clean_ohlcv_dataframe(
fill_missing: bool, drop_incomplete: bool) -> DataFrame: data: DataFrame, timeframe: str, pair: str, *, fill_missing: bool, drop_incomplete: bool
) -> DataFrame:
""" """
Cleanse a OHLCV dataframe by Cleanse a OHLCV dataframe by
* Grouping it by date (removes duplicate tics) * Grouping it by date (removes duplicate tics)
@ -60,17 +75,19 @@ def clean_ohlcv_dataframe(data: DataFrame, timeframe: str, pair: str, *,
:return: DataFrame :return: DataFrame
""" """
# group by index and aggregate results to eliminate duplicate ticks # group by index and aggregate results to eliminate duplicate ticks
data = data.groupby(by='date', as_index=False, sort=True).agg({ data = data.groupby(by="date", as_index=False, sort=True).agg(
'open': 'first', {
'high': 'max', "open": "first",
'low': 'min', "high": "max",
'close': 'last', "low": "min",
'volume': 'max', "close": "last",
}) "volume": "max",
}
)
# eliminate partial candle # eliminate partial candle
if drop_incomplete: if drop_incomplete:
data.drop(data.tail(1).index, inplace=True) data.drop(data.tail(1).index, inplace=True)
logger.debug('Dropping last candle') logger.debug("Dropping last candle")
if fill_missing: if fill_missing:
return ohlcv_fill_up_missing_data(data, timeframe, pair) return ohlcv_fill_up_missing_data(data, timeframe, pair)
@ -86,32 +103,30 @@ def ohlcv_fill_up_missing_data(dataframe: DataFrame, timeframe: str, pair: str)
""" """
from freqtrade.exchange import timeframe_to_resample_freq from freqtrade.exchange import timeframe_to_resample_freq
ohlcv_dict = { ohlcv_dict = {"open": "first", "high": "max", "low": "min", "close": "last", "volume": "sum"}
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'sum'
}
resample_interval = timeframe_to_resample_freq(timeframe) resample_interval = timeframe_to_resample_freq(timeframe)
# Resample to create "NAN" values # Resample to create "NAN" values
df = dataframe.resample(resample_interval, on='date').agg(ohlcv_dict) df = dataframe.resample(resample_interval, on="date").agg(ohlcv_dict)
# Forwardfill close for missing columns # Forwardfill close for missing columns
df['close'] = df['close'].ffill() df["close"] = df["close"].ffill()
# Use close for "open, high, low" # Use close for "open, high, low"
df.loc[:, ['open', 'high', 'low']] = df[['open', 'high', 'low']].fillna( df.loc[:, ["open", "high", "low"]] = df[["open", "high", "low"]].fillna(
value={'open': df['close'], value={
'high': df['close'], "open": df["close"],
'low': df['close'], "high": df["close"],
}) "low": df["close"],
}
)
df.reset_index(inplace=True) df.reset_index(inplace=True)
len_before = len(dataframe) len_before = len(dataframe)
len_after = len(df) len_after = len(df)
pct_missing = (len_after - len_before) / len_before if len_before > 0 else 0 pct_missing = (len_after - len_before) / len_before if len_before > 0 else 0
if len_before != len_after: if len_before != len_after:
message = (f"Missing data fillup for {pair}, {timeframe}: " message = (
f"before: {len_before} - after: {len_after} - {pct_missing:.2%}") f"Missing data fillup for {pair}, {timeframe}: "
f"before: {len_before} - after: {len_after} - {pct_missing:.2%}"
)
if pct_missing > 0.01: if pct_missing > 0.01:
logger.info(message) logger.info(message)
else: else:
@ -120,8 +135,9 @@ def ohlcv_fill_up_missing_data(dataframe: DataFrame, timeframe: str, pair: str)
return df return df
def trim_dataframe(df: DataFrame, timerange, *, df_date_col: str = 'date', def trim_dataframe(
startup_candles: int = 0) -> DataFrame: df: DataFrame, timerange, *, df_date_col: str = "date", startup_candles: int = 0
) -> DataFrame:
""" """
Trim dataframe based on given timerange Trim dataframe based on given timerange
:param df: Dataframe to trim :param df: Dataframe to trim
@ -134,15 +150,16 @@ def trim_dataframe(df: DataFrame, timerange, *, df_date_col: str = 'date',
# Trim candles instead of timeframe in case of given startup_candle count # Trim candles instead of timeframe in case of given startup_candle count
df = df.iloc[startup_candles:, :] df = df.iloc[startup_candles:, :]
else: else:
if timerange.starttype == 'date': if timerange.starttype == "date":
df = df.loc[df[df_date_col] >= timerange.startdt, :] df = df.loc[df[df_date_col] >= timerange.startdt, :]
if timerange.stoptype == 'date': if timerange.stoptype == "date":
df = df.loc[df[df_date_col] <= timerange.stopdt, :] df = df.loc[df[df_date_col] <= timerange.stopdt, :]
return df return df
def trim_dataframes(preprocessed: Dict[str, DataFrame], timerange, def trim_dataframes(
startup_candles: int) -> Dict[str, DataFrame]: preprocessed: Dict[str, DataFrame], timerange, startup_candles: int
) -> Dict[str, DataFrame]:
""" """
Trim startup period from analyzed dataframes Trim startup period from analyzed dataframes
:param preprocessed: Dict of pair: dataframe :param preprocessed: Dict of pair: dataframe
@ -157,8 +174,9 @@ def trim_dataframes(preprocessed: Dict[str, DataFrame], timerange,
if not trimed_df.empty: if not trimed_df.empty:
processed[pair] = trimed_df processed[pair] = trimed_df
else: else:
logger.warning(f'{pair} has no data left after adjusting for startup candles, ' logger.warning(
f'skipping.') f"{pair} has no data left after adjusting for startup candles, " f"skipping."
)
return processed return processed
@ -170,19 +188,28 @@ def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:
b_sum b_size bids asks a_size a_sum b_sum b_size bids asks a_size a_sum
------------------------------------------------------------------- -------------------------------------------------------------------
""" """
cols = ['bids', 'b_size'] cols = ["bids", "b_size"]
bids_frame = DataFrame(bids, columns=cols) bids_frame = DataFrame(bids, columns=cols)
# add cumulative sum column # add cumulative sum column
bids_frame['b_sum'] = bids_frame['b_size'].cumsum() bids_frame["b_sum"] = bids_frame["b_size"].cumsum()
cols2 = ['asks', 'a_size'] cols2 = ["asks", "a_size"]
asks_frame = DataFrame(asks, columns=cols2) asks_frame = DataFrame(asks, columns=cols2)
# add cumulative sum column # add cumulative sum column
asks_frame['a_sum'] = asks_frame['a_size'].cumsum() asks_frame["a_sum"] = asks_frame["a_size"].cumsum()
frame = pd.concat([bids_frame['b_sum'], bids_frame['b_size'], bids_frame['bids'], frame = pd.concat(
asks_frame['asks'], asks_frame['a_size'], asks_frame['a_sum']], axis=1, [
keys=['b_sum', 'b_size', 'bids', 'asks', 'a_size', 'a_sum']) bids_frame["b_sum"],
bids_frame["b_size"],
bids_frame["bids"],
asks_frame["asks"],
asks_frame["a_size"],
asks_frame["a_sum"],
],
axis=1,
keys=["b_sum", "b_size", "bids", "asks", "a_size", "a_sum"],
)
# logger.info('order book %s', frame ) # logger.info('order book %s', frame )
return frame return frame
@ -201,47 +228,51 @@ def convert_ohlcv_format(
:param erase: Erase source data (does not apply if source and target format are identical) :param erase: Erase source data (does not apply if source and target format are identical)
""" """
from freqtrade.data.history import get_datahandler from freqtrade.data.history import get_datahandler
src = get_datahandler(config['datadir'], convert_from)
trg = get_datahandler(config['datadir'], convert_to) src = get_datahandler(config["datadir"], convert_from)
timeframes = config.get('timeframes', [config.get('timeframe')]) trg = get_datahandler(config["datadir"], convert_to)
timeframes = config.get("timeframes", [config.get("timeframe")])
logger.info(f"Converting candle (OHLCV) for timeframe {timeframes}") logger.info(f"Converting candle (OHLCV) for timeframe {timeframes}")
candle_types = [CandleType.from_string(ct) for ct in config.get('candle_types', [ candle_types = [
c.value for c in CandleType])] CandleType.from_string(ct)
for ct in config.get("candle_types", [c.value for c in CandleType])
]
logger.info(candle_types) logger.info(candle_types)
paircombs = src.ohlcv_get_available_data(config['datadir'], TradingMode.SPOT) paircombs = src.ohlcv_get_available_data(config["datadir"], TradingMode.SPOT)
paircombs.extend(src.ohlcv_get_available_data(config['datadir'], TradingMode.FUTURES)) paircombs.extend(src.ohlcv_get_available_data(config["datadir"], TradingMode.FUTURES))
if 'pairs' in config: if "pairs" in config:
# Filter pairs # Filter pairs
paircombs = [comb for comb in paircombs if comb[0] in config['pairs']] paircombs = [comb for comb in paircombs if comb[0] in config["pairs"]]
if 'timeframes' in config: if "timeframes" in config:
paircombs = [comb for comb in paircombs if comb[1] in config['timeframes']] paircombs = [comb for comb in paircombs if comb[1] in config["timeframes"]]
paircombs = [comb for comb in paircombs if comb[2] in candle_types] paircombs = [comb for comb in paircombs if comb[2] in candle_types]
paircombs = sorted(paircombs, key=lambda x: (x[0], x[1], x[2].value)) paircombs = sorted(paircombs, key=lambda x: (x[0], x[1], x[2].value))
formatted_paircombs = '\n'.join([f"{pair}, {timeframe}, {candle_type}" formatted_paircombs = "\n".join(
for pair, timeframe, candle_type in paircombs]) [f"{pair}, {timeframe}, {candle_type}" for pair, timeframe, candle_type in paircombs]
)
logger.info(f"Converting candle (OHLCV) data for the following pair combinations:\n" logger.info(
f"{formatted_paircombs}") f"Converting candle (OHLCV) data for the following pair combinations:\n"
f"{formatted_paircombs}"
)
for pair, timeframe, candle_type in paircombs: for pair, timeframe, candle_type in paircombs:
data = src.ohlcv_load(pair=pair, timeframe=timeframe, data = src.ohlcv_load(
timerange=None, pair=pair,
fill_missing=False, timeframe=timeframe,
drop_incomplete=False, timerange=None,
startup_candles=0, fill_missing=False,
candle_type=candle_type) drop_incomplete=False,
startup_candles=0,
candle_type=candle_type,
)
logger.info(f"Converting {len(data)} {timeframe} {candle_type} candles for {pair}") logger.info(f"Converting {len(data)} {timeframe} {candle_type} candles for {pair}")
if len(data) > 0: if len(data) > 0:
trg.ohlcv_store( trg.ohlcv_store(pair=pair, timeframe=timeframe, data=data, candle_type=candle_type)
pair=pair,
timeframe=timeframe,
data=data,
candle_type=candle_type
)
if erase and convert_from != convert_to: if erase and convert_from != convert_to:
logger.info(f"Deleting source data for {pair} / {timeframe}") logger.info(f"Deleting source data for {pair} / {timeframe}")
src.ohlcv_purge(pair=pair, timeframe=timeframe, candle_type=candle_type) src.ohlcv_purge(pair=pair, timeframe=timeframe, candle_type=candle_type)
@ -254,12 +285,11 @@ def reduce_dataframe_footprint(df: DataFrame) -> DataFrame:
:return: Dataframe converted to float/int 32s :return: Dataframe converted to float/int 32s
""" """
logger.debug(f"Memory usage of dataframe is " logger.debug(f"Memory usage of dataframe is " f"{df.memory_usage().sum() / 1024**2:.2f} MB")
f"{df.memory_usage().sum() / 1024**2:.2f} MB")
df_dtypes = df.dtypes df_dtypes = df.dtypes
for column, dtype in df_dtypes.items(): for column, dtype in df_dtypes.items():
if column in ['open', 'high', 'low', 'close', 'volume']: if column in ["open", "high", "low", "close", "volume"]:
continue continue
if dtype == np.float64: if dtype == np.float64:
df_dtypes[column] = np.float32 df_dtypes[column] = np.float32
@ -267,7 +297,8 @@ def reduce_dataframe_footprint(df: DataFrame) -> DataFrame:
df_dtypes[column] = np.int32 df_dtypes[column] = np.int32
df = df.astype(df_dtypes) df = df.astype(df_dtypes)
logger.debug(f"Memory usage after optimization is: " logger.debug(
f"{df.memory_usage().sum() / 1024**2:.2f} MB") f"Memory usage after optimization is: " f"{df.memory_usage().sum() / 1024**2:.2f} MB"
)
return df return df

View File

@ -1,6 +1,7 @@
""" """
Functions to convert data from one format to another Functions to convert data from one format to another
""" """
import logging import logging
from pathlib import Path from pathlib import Path
from typing import Dict, List from typing import Dict, List
@ -30,7 +31,7 @@ def trades_df_remove_duplicates(trades: pd.DataFrame) -> pd.DataFrame:
:param trades: DataFrame with the columns constants.DEFAULT_TRADES_COLUMNS :param trades: DataFrame with the columns constants.DEFAULT_TRADES_COLUMNS
:return: DataFrame with duplicates removed based on the 'timestamp' column :return: DataFrame with duplicates removed based on the 'timestamp' column
""" """
return trades.drop_duplicates(subset=['timestamp', 'id']) return trades.drop_duplicates(subset=["timestamp", "id"])
def trades_dict_to_list(trades: List[Dict]) -> TradeList: def trades_dict_to_list(trades: List[Dict]) -> TradeList:
@ -47,7 +48,7 @@ def trades_convert_types(trades: DataFrame) -> DataFrame:
Convert Trades dtypes and add 'date' column Convert Trades dtypes and add 'date' column
""" """
trades = trades.astype(TRADES_DTYPES) trades = trades.astype(TRADES_DTYPES)
trades['date'] = to_datetime(trades['timestamp'], unit='ms', utc=True) trades["date"] = to_datetime(trades["timestamp"], unit="ms", utc=True)
return trades return trades
@ -76,13 +77,14 @@ def trades_to_ohlcv(trades: DataFrame, timeframe: str) -> DataFrame:
:raises: ValueError if no trades are provided :raises: ValueError if no trades are provided
""" """
from freqtrade.exchange import timeframe_to_resample_freq from freqtrade.exchange import timeframe_to_resample_freq
if trades.empty: if trades.empty:
raise ValueError('Trade-list empty.') raise ValueError("Trade-list empty.")
df = trades.set_index('date', drop=True) df = trades.set_index("date", drop=True)
resample_interval = timeframe_to_resample_freq(timeframe) resample_interval = timeframe_to_resample_freq(timeframe)
df_new = df['price'].resample(resample_interval).ohlc() df_new = df["price"].resample(resample_interval).ohlc()
df_new['volume'] = df['amount'].resample(resample_interval).sum() df_new["volume"] = df["amount"].resample(resample_interval).sum()
df_new['date'] = df_new.index df_new["date"] = df_new.index
# Drop 0 volume rows # Drop 0 volume rows
df_new = df_new.dropna() df_new = df_new.dropna()
return df_new.loc[:, DEFAULT_DATAFRAME_COLUMNS] return df_new.loc[:, DEFAULT_DATAFRAME_COLUMNS]
@ -102,24 +104,27 @@ def convert_trades_to_ohlcv(
Convert stored trades data to ohlcv data Convert stored trades data to ohlcv data
""" """
from freqtrade.data.history import get_datahandler from freqtrade.data.history import get_datahandler
data_handler_trades = get_datahandler(datadir, data_format=data_format_trades) data_handler_trades = get_datahandler(datadir, data_format=data_format_trades)
data_handler_ohlcv = get_datahandler(datadir, data_format=data_format_ohlcv) data_handler_ohlcv = get_datahandler(datadir, data_format=data_format_ohlcv)
logger.info(f"About to convert pairs: '{', '.join(pairs)}', " logger.info(
f"intervals: '{', '.join(timeframes)}' to {datadir}") f"About to convert pairs: '{', '.join(pairs)}', "
f"intervals: '{', '.join(timeframes)}' to {datadir}"
)
trading_mode = TradingMode.FUTURES if candle_type != CandleType.SPOT else TradingMode.SPOT trading_mode = TradingMode.FUTURES if candle_type != CandleType.SPOT else TradingMode.SPOT
for pair in pairs: for pair in pairs:
trades = data_handler_trades.trades_load(pair, trading_mode) trades = data_handler_trades.trades_load(pair, trading_mode)
for timeframe in timeframes: for timeframe in timeframes:
if erase: if erase:
if data_handler_ohlcv.ohlcv_purge(pair, timeframe, candle_type=candle_type): if data_handler_ohlcv.ohlcv_purge(pair, timeframe, candle_type=candle_type):
logger.info(f'Deleting existing data for pair {pair}, interval {timeframe}.') logger.info(f"Deleting existing data for pair {pair}, interval {timeframe}.")
try: try:
ohlcv = trades_to_ohlcv(trades, timeframe) ohlcv = trades_to_ohlcv(trades, timeframe)
# Store ohlcv # Store ohlcv
data_handler_ohlcv.ohlcv_store(pair, timeframe, data=ohlcv, candle_type=candle_type) data_handler_ohlcv.ohlcv_store(pair, timeframe, data=ohlcv, candle_type=candle_type)
except ValueError: except ValueError:
logger.warning(f'Could not convert {pair} to OHLCV.') logger.warning(f"Could not convert {pair} to OHLCV.")
def convert_trades_format(config: Config, convert_from: str, convert_to: str, erase: bool): def convert_trades_format(config: Config, convert_from: str, convert_to: str, erase: bool):
@ -130,25 +135,27 @@ def convert_trades_format(config: Config, convert_from: str, convert_to: str, er
:param convert_to: Target format :param convert_to: Target format
:param erase: Erase source data (does not apply if source and target format are identical) :param erase: Erase source data (does not apply if source and target format are identical)
""" """
if convert_from == 'kraken_csv': if convert_from == "kraken_csv":
if config['exchange']['name'] != 'kraken': if config["exchange"]["name"] != "kraken":
raise OperationalException( raise OperationalException(
'Converting from csv is only supported for kraken.' "Converting from csv is only supported for kraken."
'Please refer to the documentation for details about this special mode.' "Please refer to the documentation for details about this special mode."
) )
from freqtrade.data.converter.trade_converter_kraken import import_kraken_trades_from_csv from freqtrade.data.converter.trade_converter_kraken import import_kraken_trades_from_csv
import_kraken_trades_from_csv(config, convert_to) import_kraken_trades_from_csv(config, convert_to)
return return
from freqtrade.data.history import get_datahandler from freqtrade.data.history import get_datahandler
src = get_datahandler(config['datadir'], convert_from)
trg = get_datahandler(config['datadir'], convert_to)
if 'pairs' not in config: src = get_datahandler(config["datadir"], convert_from)
config['pairs'] = src.trades_get_pairs(config['datadir']) trg = get_datahandler(config["datadir"], convert_to)
if "pairs" not in config:
config["pairs"] = src.trades_get_pairs(config["datadir"])
logger.info(f"Converting trades for {config['pairs']}") logger.info(f"Converting trades for {config['pairs']}")
trading_mode: TradingMode = config.get('trading_mode', TradingMode.SPOT) trading_mode: TradingMode = config.get("trading_mode", TradingMode.SPOT)
for pair in config['pairs']: for pair in config["pairs"]:
data = src.trades_load(pair, trading_mode) data = src.trades_load(pair, trading_mode)
logger.info(f"Converting {len(data)} trades for {pair}") logger.info(f"Converting {len(data)} trades for {pair}")
trg.trades_store(pair, data, trading_mode) trg.trades_store(pair, data, trading_mode)

View File

@ -17,32 +17,33 @@ from freqtrade.resolvers import ExchangeResolver
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
KRAKEN_CSV_TRADE_COLUMNS = ['timestamp', 'price', 'amount'] KRAKEN_CSV_TRADE_COLUMNS = ["timestamp", "price", "amount"]
def import_kraken_trades_from_csv(config: Config, convert_to: str): def import_kraken_trades_from_csv(config: Config, convert_to: str):
""" """
Import kraken trades from csv Import kraken trades from csv
""" """
if config['exchange']['name'] != 'kraken': if config["exchange"]["name"] != "kraken":
raise OperationalException('This function is only for the kraken exchange.') raise OperationalException("This function is only for the kraken exchange.")
datadir: Path = config['datadir'] datadir: Path = config["datadir"]
data_handler = get_datahandler(datadir, data_format=convert_to) data_handler = get_datahandler(datadir, data_format=convert_to)
tradesdir: Path = config['datadir'] / 'trades_csv' tradesdir: Path = config["datadir"] / "trades_csv"
exchange = ExchangeResolver.load_exchange(config, validate=False) exchange = ExchangeResolver.load_exchange(config, validate=False)
# iterate through directories in this directory # iterate through directories in this directory
data_symbols = {p.stem for p in tradesdir.rglob('*.csv')} data_symbols = {p.stem for p in tradesdir.rglob("*.csv")}
# create pair/filename mapping # create pair/filename mapping
markets = { markets = {
(m['symbol'], m['altname']) for m in exchange.markets.values() (m["symbol"], m["altname"])
if m.get('altname') in data_symbols for m in exchange.markets.values()
if m.get("altname") in data_symbols
} }
logger.info(f"Found csv files for {', '.join(data_symbols)}.") logger.info(f"Found csv files for {', '.join(data_symbols)}.")
if pairs_raw := config.get('pairs'): if pairs_raw := config.get("pairs"):
pairs = expand_pairlist(pairs_raw, [m[0] for m in markets]) pairs = expand_pairlist(pairs_raw, [m[0] for m in markets])
markets = {m for m in markets if m[0] in pairs} markets = {m for m in markets if m[0] in pairs}
if not markets: if not markets:
@ -68,18 +69,20 @@ def import_kraken_trades_from_csv(config: Config, convert_to: str):
trades = pd.concat(dfs, ignore_index=True) trades = pd.concat(dfs, ignore_index=True)
del dfs del dfs
trades.loc[:, 'timestamp'] = trades['timestamp'] * 1e3 trades.loc[:, "timestamp"] = trades["timestamp"] * 1e3
trades.loc[:, 'cost'] = trades['price'] * trades['amount'] trades.loc[:, "cost"] = trades["price"] * trades["amount"]
for col in DEFAULT_TRADES_COLUMNS: for col in DEFAULT_TRADES_COLUMNS:
if col not in trades.columns: if col not in trades.columns:
trades.loc[:, col] = '' trades.loc[:, col] = ""
trades = trades[DEFAULT_TRADES_COLUMNS] trades = trades[DEFAULT_TRADES_COLUMNS]
trades = trades_convert_types(trades) trades = trades_convert_types(trades)
trades_df = trades_df_remove_duplicates(trades) trades_df = trades_df_remove_duplicates(trades)
del trades del trades
logger.info(f"{pair}: {len(trades_df)} trades, from " logger.info(
f"{trades_df['date'].min():{DATETIME_PRINT_FORMAT}} to " f"{pair}: {len(trades_df)} trades, from "
f"{trades_df['date'].max():{DATETIME_PRINT_FORMAT}}") f"{trades_df['date'].min():{DATETIME_PRINT_FORMAT}} to "
f"{trades_df['date'].max():{DATETIME_PRINT_FORMAT}}"
)
data_handler.trades_store(pair, trades_df, TradingMode.SPOT) data_handler.trades_store(pair, trades_df, TradingMode.SPOT)

View File

@ -4,6 +4,7 @@ Responsible to provide data to the bot
including ticker and orderbook data, live and historical candle (OHLCV) data including ticker and orderbook data, live and historical candle (OHLCV) data
Common Interface for bot and strategy to access data. Common Interface for bot and strategy to access data.
""" """
import logging import logging
from collections import deque from collections import deque
from datetime import datetime, timezone from datetime import datetime, timezone
@ -31,18 +32,17 @@ from freqtrade.util import PeriodicCache
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
NO_EXCHANGE_EXCEPTION = 'Exchange is not available to DataProvider.' NO_EXCHANGE_EXCEPTION = "Exchange is not available to DataProvider."
MAX_DATAFRAME_CANDLES = 1000 MAX_DATAFRAME_CANDLES = 1000
class DataProvider: class DataProvider:
def __init__( def __init__(
self, self,
config: Config, config: Config,
exchange: Optional[Exchange], exchange: Optional[Exchange],
pairlists=None, pairlists=None,
rpc: Optional[RPCManager] = None rpc: Optional[RPCManager] = None,
) -> None: ) -> None:
self._config = config self._config = config
self._exchange = exchange self._exchange = exchange
@ -53,18 +53,20 @@ class DataProvider:
self.__slice_date: Optional[datetime] = None self.__slice_date: Optional[datetime] = None
self.__cached_pairs_backtesting: Dict[PairWithTimeframe, DataFrame] = {} self.__cached_pairs_backtesting: Dict[PairWithTimeframe, DataFrame] = {}
self.__producer_pairs_df: Dict[str, self.__producer_pairs_df: Dict[
Dict[PairWithTimeframe, Tuple[DataFrame, datetime]]] = {} str, Dict[PairWithTimeframe, Tuple[DataFrame, datetime]]
] = {}
self.__producer_pairs: Dict[str, List[str]] = {} self.__producer_pairs: Dict[str, List[str]] = {}
self._msg_queue: deque = deque() self._msg_queue: deque = deque()
self._default_candle_type = self._config.get('candle_type_def', CandleType.SPOT) self._default_candle_type = self._config.get("candle_type_def", CandleType.SPOT)
self._default_timeframe = self._config.get('timeframe', '1h') self._default_timeframe = self._config.get("timeframe", "1h")
self.__msg_cache = PeriodicCache( self.__msg_cache = PeriodicCache(
maxsize=1000, ttl=timeframe_to_seconds(self._default_timeframe)) maxsize=1000, ttl=timeframe_to_seconds(self._default_timeframe)
)
self.producers = self._config.get('external_message_consumer', {}).get('producers', []) self.producers = self._config.get("external_message_consumer", {}).get("producers", [])
self.external_data_enabled = len(self.producers) > 0 self.external_data_enabled = len(self.producers) > 0
def _set_dataframe_max_index(self, limit_index: int): def _set_dataframe_max_index(self, limit_index: int):
@ -84,11 +86,7 @@ class DataProvider:
self.__slice_date = limit_date self.__slice_date = limit_date
def _set_cached_df( def _set_cached_df(
self, self, pair: str, timeframe: str, dataframe: DataFrame, candle_type: CandleType
pair: str,
timeframe: str,
dataframe: DataFrame,
candle_type: CandleType
) -> None: ) -> None:
""" """
Store cached Dataframe. Store cached Dataframe.
@ -100,8 +98,7 @@ class DataProvider:
:param candle_type: Any of the enum CandleType (must match trading mode!) :param candle_type: Any of the enum CandleType (must match trading mode!)
""" """
pair_key = (pair, timeframe, candle_type) pair_key = (pair, timeframe, candle_type)
self.__cached_pairs[pair_key] = ( self.__cached_pairs[pair_key] = (dataframe, datetime.now(timezone.utc))
dataframe, datetime.now(timezone.utc))
# For multiple producers we will want to merge the pairlists instead of overwriting # For multiple producers we will want to merge the pairlists instead of overwriting
def _set_producer_pairs(self, pairlist: List[str], producer_name: str = "default"): def _set_producer_pairs(self, pairlist: List[str], producer_name: str = "default"):
@ -120,12 +117,7 @@ class DataProvider:
""" """
return self.__producer_pairs.get(producer_name, []).copy() return self.__producer_pairs.get(producer_name, []).copy()
def _emit_df( def _emit_df(self, pair_key: PairWithTimeframe, dataframe: DataFrame, new_candle: bool) -> None:
self,
pair_key: PairWithTimeframe,
dataframe: DataFrame,
new_candle: bool
) -> None:
""" """
Send this dataframe as an ANALYZED_DF message to RPC Send this dataframe as an ANALYZED_DF message to RPC
@ -135,19 +127,21 @@ class DataProvider:
""" """
if self.__rpc: if self.__rpc:
msg: RPCAnalyzedDFMsg = { msg: RPCAnalyzedDFMsg = {
'type': RPCMessageType.ANALYZED_DF, "type": RPCMessageType.ANALYZED_DF,
'data': { "data": {
'key': pair_key, "key": pair_key,
'df': dataframe.tail(1), "df": dataframe.tail(1),
'la': datetime.now(timezone.utc) "la": datetime.now(timezone.utc),
} },
} }
self.__rpc.send_msg(msg) self.__rpc.send_msg(msg)
if new_candle: if new_candle:
self.__rpc.send_msg({ self.__rpc.send_msg(
'type': RPCMessageType.NEW_CANDLE, {
'data': pair_key, "type": RPCMessageType.NEW_CANDLE,
}) "data": pair_key,
}
)
def _replace_external_df( def _replace_external_df(
self, self,
@ -156,7 +150,7 @@ class DataProvider:
last_analyzed: datetime, last_analyzed: datetime,
timeframe: str, timeframe: str,
candle_type: CandleType, candle_type: CandleType,
producer_name: str = "default" producer_name: str = "default",
) -> None: ) -> None:
""" """
Add the pair data to this class from an external source. Add the pair data to this class from an external source.
@ -182,7 +176,7 @@ class DataProvider:
last_analyzed: datetime, last_analyzed: datetime,
timeframe: str, timeframe: str,
candle_type: CandleType, candle_type: CandleType,
producer_name: str = "default" producer_name: str = "default",
) -> Tuple[bool, int]: ) -> Tuple[bool, int]:
""" """
Append a candle to the existing external dataframe. The incoming dataframe Append a candle to the existing external dataframe. The incoming dataframe
@ -208,12 +202,14 @@ class DataProvider:
last_analyzed=last_analyzed, last_analyzed=last_analyzed,
timeframe=timeframe, timeframe=timeframe,
candle_type=candle_type, candle_type=candle_type,
producer_name=producer_name producer_name=producer_name,
) )
return (True, 0) return (True, 0)
if (producer_name not in self.__producer_pairs_df if (
or pair_key not in self.__producer_pairs_df[producer_name]): producer_name not in self.__producer_pairs_df
or pair_key not in self.__producer_pairs_df[producer_name]
):
# We don't have data from this producer yet, # We don't have data from this producer yet,
# or we don't have data for this pair_key # or we don't have data for this pair_key
# return False and 1000 for the full df # return False and 1000 for the full df
@ -224,12 +220,12 @@ class DataProvider:
# CHECK FOR MISSING CANDLES # CHECK FOR MISSING CANDLES
# Convert the timeframe to a timedelta for pandas # Convert the timeframe to a timedelta for pandas
timeframe_delta: Timedelta = to_timedelta(timeframe) timeframe_delta: Timedelta = to_timedelta(timeframe)
local_last: Timestamp = existing_df.iloc[-1]['date'] # We want the last date from our copy local_last: Timestamp = existing_df.iloc[-1]["date"] # We want the last date from our copy
# We want the first date from the incoming # We want the first date from the incoming
incoming_first: Timestamp = dataframe.iloc[0]['date'] incoming_first: Timestamp = dataframe.iloc[0]["date"]
# Remove existing candles that are newer than the incoming first candle # Remove existing candles that are newer than the incoming first candle
existing_df1 = existing_df[existing_df['date'] < incoming_first] existing_df1 = existing_df[existing_df["date"] < incoming_first]
candle_difference = (incoming_first - local_last) / timeframe_delta candle_difference = (incoming_first - local_last) / timeframe_delta
@ -247,13 +243,13 @@ class DataProvider:
# Everything is good, we appended # Everything is good, we appended
self._replace_external_df( self._replace_external_df(
pair, pair,
appended_df, appended_df,
last_analyzed=last_analyzed, last_analyzed=last_analyzed,
timeframe=timeframe, timeframe=timeframe,
candle_type=candle_type, candle_type=candle_type,
producer_name=producer_name producer_name=producer_name,
) )
return (True, 0) return (True, 0)
def get_producer_df( def get_producer_df(
@ -261,7 +257,7 @@ class DataProvider:
pair: str, pair: str,
timeframe: Optional[str] = None, timeframe: Optional[str] = None,
candle_type: Optional[CandleType] = None, candle_type: Optional[CandleType] = None,
producer_name: str = "default" producer_name: str = "default",
) -> Tuple[DataFrame, datetime]: ) -> Tuple[DataFrame, datetime]:
""" """
Get the pair data from producers. Get the pair data from producers.
@ -296,64 +292,64 @@ class DataProvider:
""" """
self._pairlists = pairlists self._pairlists = pairlists
def historic_ohlcv( def historic_ohlcv(self, pair: str, timeframe: str, candle_type: str = "") -> DataFrame:
self,
pair: str,
timeframe: str,
candle_type: str = ''
) -> DataFrame:
""" """
Get stored historical candle (OHLCV) data Get stored historical candle (OHLCV) data
:param pair: pair to get the data for :param pair: pair to get the data for
:param timeframe: timeframe to get data for :param timeframe: timeframe to get data for
:param candle_type: '', mark, index, premiumIndex, or funding_rate :param candle_type: '', mark, index, premiumIndex, or funding_rate
""" """
_candle_type = CandleType.from_string( _candle_type = (
candle_type) if candle_type != '' else self._config['candle_type_def'] CandleType.from_string(candle_type)
if candle_type != ""
else self._config["candle_type_def"]
)
saved_pair: PairWithTimeframe = (pair, str(timeframe), _candle_type) saved_pair: PairWithTimeframe = (pair, str(timeframe), _candle_type)
if saved_pair not in self.__cached_pairs_backtesting: if saved_pair not in self.__cached_pairs_backtesting:
timerange = TimeRange.parse_timerange(None if self._config.get( timerange = TimeRange.parse_timerange(
'timerange') is None else str(self._config.get('timerange'))) None
if self._config.get("timerange") is None
else str(self._config.get("timerange"))
)
startup_candles = self.get_required_startup(str(timeframe)) startup_candles = self.get_required_startup(str(timeframe))
tf_seconds = timeframe_to_seconds(str(timeframe)) tf_seconds = timeframe_to_seconds(str(timeframe))
timerange.subtract_start(tf_seconds * startup_candles) timerange.subtract_start(tf_seconds * startup_candles)
logger.info(f"Loading data for {pair} {timeframe} " logger.info(
f"from {timerange.start_fmt} to {timerange.stop_fmt}") f"Loading data for {pair} {timeframe} "
f"from {timerange.start_fmt} to {timerange.stop_fmt}"
)
self.__cached_pairs_backtesting[saved_pair] = load_pair_history( self.__cached_pairs_backtesting[saved_pair] = load_pair_history(
pair=pair, pair=pair,
timeframe=timeframe, timeframe=timeframe,
datadir=self._config['datadir'], datadir=self._config["datadir"],
timerange=timerange, timerange=timerange,
data_format=self._config['dataformat_ohlcv'], data_format=self._config["dataformat_ohlcv"],
candle_type=_candle_type, candle_type=_candle_type,
) )
return self.__cached_pairs_backtesting[saved_pair].copy() return self.__cached_pairs_backtesting[saved_pair].copy()
def get_required_startup(self, timeframe: str) -> int: def get_required_startup(self, timeframe: str) -> int:
freqai_config = self._config.get('freqai', {}) freqai_config = self._config.get("freqai", {})
if not freqai_config.get('enabled', False): if not freqai_config.get("enabled", False):
return self._config.get('startup_candle_count', 0) return self._config.get("startup_candle_count", 0)
else: else:
startup_candles = self._config.get('startup_candle_count', 0) startup_candles = self._config.get("startup_candle_count", 0)
indicator_periods = freqai_config['feature_parameters']['indicator_periods_candles'] indicator_periods = freqai_config["feature_parameters"]["indicator_periods_candles"]
# make sure the startupcandles is at least the set maximum indicator periods # make sure the startupcandles is at least the set maximum indicator periods
self._config['startup_candle_count'] = max(startup_candles, max(indicator_periods)) self._config["startup_candle_count"] = max(startup_candles, max(indicator_periods))
tf_seconds = timeframe_to_seconds(timeframe) tf_seconds = timeframe_to_seconds(timeframe)
train_candles = freqai_config['train_period_days'] * 86400 / tf_seconds train_candles = freqai_config["train_period_days"] * 86400 / tf_seconds
total_candles = int(self._config['startup_candle_count'] + train_candles) total_candles = int(self._config["startup_candle_count"] + train_candles)
logger.info( logger.info(
f'Increasing startup_candle_count for freqai on {timeframe} to {total_candles}') f"Increasing startup_candle_count for freqai on {timeframe} to {total_candles}"
)
return total_candles return total_candles
def get_pair_dataframe( def get_pair_dataframe(
self, self, pair: str, timeframe: Optional[str] = None, candle_type: str = ""
pair: str,
timeframe: Optional[str] = None,
candle_type: str = ''
) -> DataFrame: ) -> DataFrame:
""" """
Return pair candle (OHLCV) data, either live or cached historical -- depending Return pair candle (OHLCV) data, either live or cached historical -- depending
@ -370,13 +366,13 @@ class DataProvider:
data = self.ohlcv(pair=pair, timeframe=timeframe, candle_type=candle_type) data = self.ohlcv(pair=pair, timeframe=timeframe, candle_type=candle_type)
else: else:
# Get historical OHLCV data (cached on disk). # Get historical OHLCV data (cached on disk).
timeframe = timeframe or self._config['timeframe'] timeframe = timeframe or self._config["timeframe"]
data = self.historic_ohlcv(pair=pair, timeframe=timeframe, candle_type=candle_type) data = self.historic_ohlcv(pair=pair, timeframe=timeframe, candle_type=candle_type)
# Cut date to timeframe-specific date. # Cut date to timeframe-specific date.
# This is necessary to prevent lookahead bias in callbacks through informative pairs. # This is necessary to prevent lookahead bias in callbacks through informative pairs.
if self.__slice_date: if self.__slice_date:
cutoff_date = timeframe_to_prev_date(timeframe, self.__slice_date) cutoff_date = timeframe_to_prev_date(timeframe, self.__slice_date)
data = data.loc[data['date'] < cutoff_date] data = data.loc[data["date"] < cutoff_date]
if len(data) == 0: if len(data) == 0:
logger.warning(f"No data found for ({pair}, {timeframe}, {candle_type}).") logger.warning(f"No data found for ({pair}, {timeframe}, {candle_type}).")
return data return data
@ -391,7 +387,7 @@ class DataProvider:
combination. combination.
Returns empty dataframe and Epoch 0 (1970-01-01) if no dataframe was cached. Returns empty dataframe and Epoch 0 (1970-01-01) if no dataframe was cached.
""" """
pair_key = (pair, timeframe, self._config.get('candle_type_def', CandleType.SPOT)) pair_key = (pair, timeframe, self._config.get("candle_type_def", CandleType.SPOT))
if pair_key in self.__cached_pairs: if pair_key in self.__cached_pairs:
if self.runmode in (RunMode.DRY_RUN, RunMode.LIVE): if self.runmode in (RunMode.DRY_RUN, RunMode.LIVE):
df, date = self.__cached_pairs[pair_key] df, date = self.__cached_pairs[pair_key]
@ -399,7 +395,7 @@ class DataProvider:
df, date = self.__cached_pairs[pair_key] df, date = self.__cached_pairs[pair_key]
if self.__slice_index is not None: if self.__slice_index is not None:
max_index = self.__slice_index max_index = self.__slice_index
df = df.iloc[max(0, max_index - MAX_DATAFRAME_CANDLES):max_index] df = df.iloc[max(0, max_index - MAX_DATAFRAME_CANDLES) : max_index]
return df, date return df, date
else: else:
return (DataFrame(), datetime.fromtimestamp(0, tz=timezone.utc)) return (DataFrame(), datetime.fromtimestamp(0, tz=timezone.utc))
@ -410,7 +406,7 @@ class DataProvider:
Get runmode of the bot Get runmode of the bot
can be "live", "dry-run", "backtest", "edgecli", "hyperopt" or "other". can be "live", "dry-run", "backtest", "edgecli", "hyperopt" or "other".
""" """
return RunMode(self._config.get('runmode', RunMode.OTHER)) return RunMode(self._config.get("runmode", RunMode.OTHER))
def current_whitelist(self) -> List[str]: def current_whitelist(self) -> List[str]:
""" """
@ -438,9 +434,11 @@ class DataProvider:
# Exchange functions # Exchange functions
def refresh(self, def refresh(
pairlist: ListPairsWithTimeframes, self,
helping_pairs: Optional[ListPairsWithTimeframes] = None) -> None: pairlist: ListPairsWithTimeframes,
helping_pairs: Optional[ListPairsWithTimeframes] = None,
) -> None:
""" """
Refresh data, called with each cycle Refresh data, called with each cycle
""" """
@ -460,11 +458,7 @@ class DataProvider:
return list(self._exchange._klines.keys()) return list(self._exchange._klines.keys())
def ohlcv( def ohlcv(
self, self, pair: str, timeframe: Optional[str] = None, copy: bool = True, candle_type: str = ""
pair: str,
timeframe: Optional[str] = None,
copy: bool = True,
candle_type: str = ''
) -> DataFrame: ) -> DataFrame:
""" """
Get candle (OHLCV) data for the given pair as DataFrame Get candle (OHLCV) data for the given pair as DataFrame
@ -478,11 +472,13 @@ class DataProvider:
if self._exchange is None: if self._exchange is None:
raise OperationalException(NO_EXCHANGE_EXCEPTION) raise OperationalException(NO_EXCHANGE_EXCEPTION)
if self.runmode in (RunMode.DRY_RUN, RunMode.LIVE): if self.runmode in (RunMode.DRY_RUN, RunMode.LIVE):
_candle_type = CandleType.from_string( _candle_type = (
candle_type) if candle_type != '' else self._config['candle_type_def'] CandleType.from_string(candle_type)
if candle_type != ""
else self._config["candle_type_def"]
)
return self._exchange.klines( return self._exchange.klines(
(pair, timeframe or self._config['timeframe'], _candle_type), (pair, timeframe or self._config["timeframe"], _candle_type), copy=copy
copy=copy
) )
else: else:
return DataFrame() return DataFrame()

View File

@ -21,9 +21,10 @@ logger = logging.getLogger(__name__)
def _load_backtest_analysis_data(backtest_dir: Path, name: str): def _load_backtest_analysis_data(backtest_dir: Path, name: str):
if backtest_dir.is_dir(): if backtest_dir.is_dir():
scpf = Path(backtest_dir, scpf = Path(
Path(get_latest_backtest_filename(backtest_dir)).stem + "_" + name + ".pkl" backtest_dir,
) Path(get_latest_backtest_filename(backtest_dir)).stem + "_" + name + ".pkl",
)
else: else:
scpf = Path(backtest_dir.parent / f"{backtest_dir.stem}_{name}.pkl") scpf = Path(backtest_dir.parent / f"{backtest_dir.stem}_{name}.pkl")
@ -56,7 +57,8 @@ def _process_candles_and_indicators(pairlist, strategy_name, trades, signal_cand
for pair in pairlist: for pair in pairlist:
if pair in signal_candles[strategy_name]: if pair in signal_candles[strategy_name]:
analysed_trades_dict[strategy_name][pair] = _analyze_candles_and_indicators( analysed_trades_dict[strategy_name][pair] = _analyze_candles_and_indicators(
pair, trades, signal_candles[strategy_name][pair]) pair, trades, signal_candles[strategy_name][pair]
)
except Exception as e: except Exception as e:
print(f"Cannot process entry/exit reasons for {strategy_name}: ", e) print(f"Cannot process entry/exit reasons for {strategy_name}: ", e)
@ -67,28 +69,28 @@ def _analyze_candles_and_indicators(pair, trades: pd.DataFrame, signal_candles:
buyf = signal_candles buyf = signal_candles
if len(buyf) > 0: if len(buyf) > 0:
buyf = buyf.set_index('date', drop=False) buyf = buyf.set_index("date", drop=False)
trades_red = trades.loc[trades['pair'] == pair].copy() trades_red = trades.loc[trades["pair"] == pair].copy()
trades_inds = pd.DataFrame() trades_inds = pd.DataFrame()
if trades_red.shape[0] > 0 and buyf.shape[0] > 0: if trades_red.shape[0] > 0 and buyf.shape[0] > 0:
for t, v in trades_red.open_date.items(): for t, v in trades_red.open_date.items():
allinds = buyf.loc[(buyf['date'] < v)] allinds = buyf.loc[(buyf["date"] < v)]
if allinds.shape[0] > 0: if allinds.shape[0] > 0:
tmp_inds = allinds.iloc[[-1]] tmp_inds = allinds.iloc[[-1]]
trades_red.loc[t, 'signal_date'] = tmp_inds['date'].values[0] trades_red.loc[t, "signal_date"] = tmp_inds["date"].values[0]
trades_red.loc[t, 'enter_reason'] = trades_red.loc[t, 'enter_tag'] trades_red.loc[t, "enter_reason"] = trades_red.loc[t, "enter_tag"]
tmp_inds.index.rename('signal_date', inplace=True) tmp_inds.index.rename("signal_date", inplace=True)
trades_inds = pd.concat([trades_inds, tmp_inds]) trades_inds = pd.concat([trades_inds, tmp_inds])
if 'signal_date' in trades_red: if "signal_date" in trades_red:
trades_red['signal_date'] = pd.to_datetime(trades_red['signal_date'], utc=True) trades_red["signal_date"] = pd.to_datetime(trades_red["signal_date"], utc=True)
trades_red.set_index('signal_date', inplace=True) trades_red.set_index("signal_date", inplace=True)
try: try:
trades_red = pd.merge(trades_red, trades_inds, on='signal_date', how='outer') trades_red = pd.merge(trades_red, trades_inds, on="signal_date", how="outer")
except Exception as e: except Exception as e:
raise e raise e
return trades_red return trades_red
@ -96,138 +98,166 @@ def _analyze_candles_and_indicators(pair, trades: pd.DataFrame, signal_candles:
return pd.DataFrame() return pd.DataFrame()
def _do_group_table_output(bigdf, glist, csv_path: Path, to_csv=False, ): def _do_group_table_output(
bigdf,
glist,
csv_path: Path,
to_csv=False,
):
for g in glist: for g in glist:
# 0: summary wins/losses grouped by enter tag # 0: summary wins/losses grouped by enter tag
if g == "0": if g == "0":
group_mask = ['enter_reason'] group_mask = ["enter_reason"]
wins = bigdf.loc[bigdf['profit_abs'] >= 0] \ wins = (
.groupby(group_mask) \ bigdf.loc[bigdf["profit_abs"] >= 0].groupby(group_mask).agg({"profit_abs": ["sum"]})
.agg({'profit_abs': ['sum']}) )
wins.columns = ['profit_abs_wins'] wins.columns = ["profit_abs_wins"]
loss = bigdf.loc[bigdf['profit_abs'] < 0] \ loss = (
.groupby(group_mask) \ bigdf.loc[bigdf["profit_abs"] < 0].groupby(group_mask).agg({"profit_abs": ["sum"]})
.agg({'profit_abs': ['sum']}) )
loss.columns = ['profit_abs_loss'] loss.columns = ["profit_abs_loss"]
new = bigdf.groupby(group_mask).agg({'profit_abs': [ new = bigdf.groupby(group_mask).agg(
'count', {"profit_abs": ["count", lambda x: sum(x > 0), lambda x: sum(x <= 0)]}
lambda x: sum(x > 0), )
lambda x: sum(x <= 0)]})
new = pd.concat([new, wins, loss], axis=1).fillna(0) new = pd.concat([new, wins, loss], axis=1).fillna(0)
new['profit_tot'] = new['profit_abs_wins'] - abs(new['profit_abs_loss']) new["profit_tot"] = new["profit_abs_wins"] - abs(new["profit_abs_loss"])
new['wl_ratio_pct'] = (new.iloc[:, 1] / new.iloc[:, 0] * 100).fillna(0) new["wl_ratio_pct"] = (new.iloc[:, 1] / new.iloc[:, 0] * 100).fillna(0)
new['avg_win'] = (new['profit_abs_wins'] / new.iloc[:, 1]).fillna(0) new["avg_win"] = (new["profit_abs_wins"] / new.iloc[:, 1]).fillna(0)
new['avg_loss'] = (new['profit_abs_loss'] / new.iloc[:, 2]).fillna(0) new["avg_loss"] = (new["profit_abs_loss"] / new.iloc[:, 2]).fillna(0)
new['exp_ratio'] = ( new["exp_ratio"] = (
( ((1 + (new["avg_win"] / abs(new["avg_loss"]))) * (new["wl_ratio_pct"] / 100)) - 1
(1 + (new['avg_win'] / abs(new['avg_loss']))) * (new['wl_ratio_pct'] / 100) ).fillna(0)
) - 1).fillna(0)
new.columns = ['total_num_buys', 'wins', 'losses', new.columns = [
'profit_abs_wins', 'profit_abs_loss', "total_num_buys",
'profit_tot', 'wl_ratio_pct', "wins",
'avg_win', 'avg_loss', 'exp_ratio'] "losses",
"profit_abs_wins",
"profit_abs_loss",
"profit_tot",
"wl_ratio_pct",
"avg_win",
"avg_loss",
"exp_ratio",
]
sortcols = ['total_num_buys'] sortcols = ["total_num_buys"]
_print_table(new, sortcols, show_index=True, name="Group 0:", _print_table(
to_csv=to_csv, csv_path=csv_path) new, sortcols, show_index=True, name="Group 0:", to_csv=to_csv, csv_path=csv_path
)
else: else:
agg_mask = {'profit_abs': ['count', 'sum', 'median', 'mean'], agg_mask = {
'profit_ratio': ['median', 'mean', 'sum']} "profit_abs": ["count", "sum", "median", "mean"],
agg_cols = ['num_buys', 'profit_abs_sum', 'profit_abs_median', "profit_ratio": ["median", "mean", "sum"],
'profit_abs_mean', 'median_profit_pct', 'mean_profit_pct', }
'total_profit_pct'] agg_cols = [
sortcols = ['profit_abs_sum', 'enter_reason'] "num_buys",
"profit_abs_sum",
"profit_abs_median",
"profit_abs_mean",
"median_profit_pct",
"mean_profit_pct",
"total_profit_pct",
]
sortcols = ["profit_abs_sum", "enter_reason"]
# 1: profit summaries grouped by enter_tag # 1: profit summaries grouped by enter_tag
if g == "1": if g == "1":
group_mask = ['enter_reason'] group_mask = ["enter_reason"]
# 2: profit summaries grouped by enter_tag and exit_tag # 2: profit summaries grouped by enter_tag and exit_tag
if g == "2": if g == "2":
group_mask = ['enter_reason', 'exit_reason'] group_mask = ["enter_reason", "exit_reason"]
# 3: profit summaries grouped by pair and enter_tag # 3: profit summaries grouped by pair and enter_tag
if g == "3": if g == "3":
group_mask = ['pair', 'enter_reason'] group_mask = ["pair", "enter_reason"]
# 4: profit summaries grouped by pair, enter_ and exit_tag (this can get quite large) # 4: profit summaries grouped by pair, enter_ and exit_tag (this can get quite large)
if g == "4": if g == "4":
group_mask = ['pair', 'enter_reason', 'exit_reason'] group_mask = ["pair", "enter_reason", "exit_reason"]
# 5: profit summaries grouped by exit_tag # 5: profit summaries grouped by exit_tag
if g == "5": if g == "5":
group_mask = ['exit_reason'] group_mask = ["exit_reason"]
sortcols = ['exit_reason'] sortcols = ["exit_reason"]
if group_mask: if group_mask:
new = bigdf.groupby(group_mask).agg(agg_mask).reset_index() new = bigdf.groupby(group_mask).agg(agg_mask).reset_index()
new.columns = group_mask + agg_cols new.columns = group_mask + agg_cols
new['median_profit_pct'] = new['median_profit_pct'] * 100 new["median_profit_pct"] = new["median_profit_pct"] * 100
new['mean_profit_pct'] = new['mean_profit_pct'] * 100 new["mean_profit_pct"] = new["mean_profit_pct"] * 100
new['total_profit_pct'] = new['total_profit_pct'] * 100 new["total_profit_pct"] = new["total_profit_pct"] * 100
_print_table(new, sortcols, name=f"Group {g}:", _print_table(new, sortcols, name=f"Group {g}:", to_csv=to_csv, csv_path=csv_path)
to_csv=to_csv, csv_path=csv_path)
else: else:
logger.warning("Invalid group mask specified.") logger.warning("Invalid group mask specified.")
def _do_rejected_signals_output(rejected_signals_df: pd.DataFrame, def _do_rejected_signals_output(
to_csv: bool = False, csv_path=None) -> None: rejected_signals_df: pd.DataFrame, to_csv: bool = False, csv_path=None
cols = ['pair', 'date', 'enter_tag'] ) -> None:
sortcols = ['date', 'pair', 'enter_tag'] cols = ["pair", "date", "enter_tag"]
_print_table(rejected_signals_df[cols], sortcols = ["date", "pair", "enter_tag"]
sortcols, _print_table(
show_index=False, rejected_signals_df[cols],
name="Rejected Signals:", sortcols,
to_csv=to_csv, show_index=False,
csv_path=csv_path) name="Rejected Signals:",
to_csv=to_csv,
csv_path=csv_path,
)
def _select_rows_within_dates(df, timerange=None, df_date_col: str = 'date'): def _select_rows_within_dates(df, timerange=None, df_date_col: str = "date"):
if timerange: if timerange:
if timerange.starttype == 'date': if timerange.starttype == "date":
df = df.loc[(df[df_date_col] >= timerange.startdt)] df = df.loc[(df[df_date_col] >= timerange.startdt)]
if timerange.stoptype == 'date': if timerange.stoptype == "date":
df = df.loc[(df[df_date_col] < timerange.stopdt)] df = df.loc[(df[df_date_col] < timerange.stopdt)]
return df return df
def _select_rows_by_tags(df, enter_reason_list, exit_reason_list): def _select_rows_by_tags(df, enter_reason_list, exit_reason_list):
if enter_reason_list and "all" not in enter_reason_list: if enter_reason_list and "all" not in enter_reason_list:
df = df.loc[(df['enter_reason'].isin(enter_reason_list))] df = df.loc[(df["enter_reason"].isin(enter_reason_list))]
if exit_reason_list and "all" not in exit_reason_list: if exit_reason_list and "all" not in exit_reason_list:
df = df.loc[(df['exit_reason'].isin(exit_reason_list))] df = df.loc[(df["exit_reason"].isin(exit_reason_list))]
return df return df
def prepare_results(analysed_trades, stratname, def prepare_results(
enter_reason_list, exit_reason_list, analysed_trades, stratname, enter_reason_list, exit_reason_list, timerange=None
timerange=None): ):
res_df = pd.DataFrame() res_df = pd.DataFrame()
for pair, trades in analysed_trades[stratname].items(): for pair, trades in analysed_trades[stratname].items():
if (trades.shape[0] > 0): if trades.shape[0] > 0:
trades.dropna(subset=['close_date'], inplace=True) trades.dropna(subset=["close_date"], inplace=True)
res_df = pd.concat([res_df, trades], ignore_index=True) res_df = pd.concat([res_df, trades], ignore_index=True)
res_df = _select_rows_within_dates(res_df, timerange) res_df = _select_rows_within_dates(res_df, timerange)
if res_df is not None and res_df.shape[0] > 0 and ('enter_reason' in res_df.columns): if res_df is not None and res_df.shape[0] > 0 and ("enter_reason" in res_df.columns):
res_df = _select_rows_by_tags(res_df, enter_reason_list, exit_reason_list) res_df = _select_rows_by_tags(res_df, enter_reason_list, exit_reason_list)
return res_df return res_df
def print_results(res_df: pd.DataFrame, analysis_groups: List[str], indicator_list: List[str], def print_results(
csv_path: Path, rejected_signals=None, to_csv=False): res_df: pd.DataFrame,
analysis_groups: List[str],
indicator_list: List[str],
csv_path: Path,
rejected_signals=None,
to_csv=False,
):
if res_df.shape[0] > 0: if res_df.shape[0] > 0:
if analysis_groups: if analysis_groups:
_do_group_table_output(res_df, analysis_groups, to_csv=to_csv, csv_path=csv_path) _do_group_table_output(res_df, analysis_groups, to_csv=to_csv, csv_path=csv_path)
@ -240,30 +270,31 @@ def print_results(res_df: pd.DataFrame, analysis_groups: List[str], indicator_li
# NB this can be large for big dataframes! # NB this can be large for big dataframes!
if "all" in indicator_list: if "all" in indicator_list:
_print_table(res_df, _print_table(
show_index=False, res_df, show_index=False, name="Indicators:", to_csv=to_csv, csv_path=csv_path
name="Indicators:", )
to_csv=to_csv,
csv_path=csv_path)
elif indicator_list is not None and indicator_list: elif indicator_list is not None and indicator_list:
available_inds = [] available_inds = []
for ind in indicator_list: for ind in indicator_list:
if ind in res_df: if ind in res_df:
available_inds.append(ind) available_inds.append(ind)
ilist = ["pair", "enter_reason", "exit_reason"] + available_inds ilist = ["pair", "enter_reason", "exit_reason"] + available_inds
_print_table(res_df[ilist], _print_table(
sortcols=['exit_reason'], res_df[ilist],
show_index=False, sortcols=["exit_reason"],
name="Indicators:", show_index=False,
to_csv=to_csv, name="Indicators:",
csv_path=csv_path) to_csv=to_csv,
csv_path=csv_path,
)
else: else:
print("\\No trades to show") print("\\No trades to show")
def _print_table(df: pd.DataFrame, sortcols=None, *, show_index=False, name=None, def _print_table(
to_csv=False, csv_path: Path): df: pd.DataFrame, sortcols=None, *, show_index=False, name=None, to_csv=False, csv_path: Path
if (sortcols is not None): ):
if sortcols is not None:
data = df.sort_values(sortcols) data = df.sort_values(sortcols)
else: else:
data = df data = df
@ -276,60 +307,64 @@ def _print_table(df: pd.DataFrame, sortcols=None, *, show_index=False, name=None
if name is not None: if name is not None:
print(name) print(name)
print( print(tabulate(data, headers="keys", tablefmt="psql", showindex=show_index))
tabulate(
data,
headers='keys',
tablefmt='psql',
showindex=show_index
)
)
def process_entry_exit_reasons(config: Config): def process_entry_exit_reasons(config: Config):
try: try:
analysis_groups = config.get('analysis_groups', []) analysis_groups = config.get("analysis_groups", [])
enter_reason_list = config.get('enter_reason_list', ["all"]) enter_reason_list = config.get("enter_reason_list", ["all"])
exit_reason_list = config.get('exit_reason_list', ["all"]) exit_reason_list = config.get("exit_reason_list", ["all"])
indicator_list = config.get('indicator_list', []) indicator_list = config.get("indicator_list", [])
do_rejected = config.get('analysis_rejected', False) do_rejected = config.get("analysis_rejected", False)
to_csv = config.get('analysis_to_csv', False) to_csv = config.get("analysis_to_csv", False)
csv_path = Path(config.get('analysis_csv_path', config['exportfilename'])) csv_path = Path(config.get("analysis_csv_path", config["exportfilename"]))
if to_csv and not csv_path.is_dir(): if to_csv and not csv_path.is_dir():
raise OperationalException(f"Specified directory {csv_path} does not exist.") raise OperationalException(f"Specified directory {csv_path} does not exist.")
timerange = TimeRange.parse_timerange(None if config.get( timerange = TimeRange.parse_timerange(
'timerange') is None else str(config.get('timerange'))) None if config.get("timerange") is None else str(config.get("timerange"))
)
backtest_stats = load_backtest_stats(config['exportfilename']) backtest_stats = load_backtest_stats(config["exportfilename"])
for strategy_name, results in backtest_stats['strategy'].items(): for strategy_name, results in backtest_stats["strategy"].items():
trades = load_backtest_data(config['exportfilename'], strategy_name) trades = load_backtest_data(config["exportfilename"], strategy_name)
if trades is not None and not trades.empty: if trades is not None and not trades.empty:
signal_candles = _load_signal_candles(config['exportfilename']) signal_candles = _load_signal_candles(config["exportfilename"])
rej_df = None rej_df = None
if do_rejected: if do_rejected:
rejected_signals_dict = _load_rejected_signals(config['exportfilename']) rejected_signals_dict = _load_rejected_signals(config["exportfilename"])
rej_df = prepare_results(rejected_signals_dict, strategy_name, rej_df = prepare_results(
enter_reason_list, exit_reason_list, rejected_signals_dict,
timerange=timerange) strategy_name,
enter_reason_list,
exit_reason_list,
timerange=timerange,
)
analysed_trades_dict = _process_candles_and_indicators( analysed_trades_dict = _process_candles_and_indicators(
config['exchange']['pair_whitelist'], strategy_name, config["exchange"]["pair_whitelist"], strategy_name, trades, signal_candles
trades, signal_candles) )
res_df = prepare_results(analysed_trades_dict, strategy_name, res_df = prepare_results(
enter_reason_list, exit_reason_list, analysed_trades_dict,
timerange=timerange) strategy_name,
enter_reason_list,
exit_reason_list,
timerange=timerange,
)
print_results(res_df, print_results(
analysis_groups, res_df,
indicator_list, analysis_groups,
rejected_signals=rej_df, indicator_list,
to_csv=to_csv, rejected_signals=rej_df,
csv_path=csv_path) to_csv=to_csv,
csv_path=csv_path,
)
except ValueError as e: except ValueError as e:
raise OperationalException(e) from e raise OperationalException(e) from e

View File

@ -5,6 +5,7 @@ Includes:
* load data for a pair (or a list of pairs) from disk * load data for a pair (or a list of pairs) from disk
* download data from exchange and store to disk * download data from exchange and store to disk
""" """
# flake8: noqa: F401 # flake8: noqa: F401
from .datahandlers import get_datahandler from .datahandlers import get_datahandler
from .history_utils import ( from .history_utils import (

View File

@ -14,11 +14,11 @@ logger = logging.getLogger(__name__)
class FeatherDataHandler(IDataHandler): class FeatherDataHandler(IDataHandler):
_columns = DEFAULT_DATAFRAME_COLUMNS _columns = DEFAULT_DATAFRAME_COLUMNS
def ohlcv_store( def ohlcv_store(
self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType) -> None: self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType
) -> None:
""" """
Store data in json format "values". Store data in json format "values".
format looks as follows: format looks as follows:
@ -33,11 +33,12 @@ class FeatherDataHandler(IDataHandler):
self.create_dir_if_needed(filename) self.create_dir_if_needed(filename)
data.reset_index(drop=True).loc[:, self._columns].to_feather( data.reset_index(drop=True).loc[:, self._columns].to_feather(
filename, compression_level=9, compression='lz4') filename, compression_level=9, compression="lz4"
)
def _ohlcv_load(self, pair: str, timeframe: str, def _ohlcv_load(
timerange: Optional[TimeRange], candle_type: CandleType self, pair: str, timeframe: str, timerange: Optional[TimeRange], candle_type: CandleType
) -> DataFrame: ) -> DataFrame:
""" """
Internal method used to load data for one pair from disk. Internal method used to load data for one pair from disk.
Implements the loading and conversion to a Pandas dataframe. Implements the loading and conversion to a Pandas dataframe.
@ -50,28 +51,31 @@ class FeatherDataHandler(IDataHandler):
:param candle_type: Any of the enum CandleType (must match trading mode!) :param candle_type: Any of the enum CandleType (must match trading mode!)
:return: DataFrame with ohlcv data, or empty DataFrame :return: DataFrame with ohlcv data, or empty DataFrame
""" """
filename = self._pair_data_filename( filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type=candle_type)
self._datadir, pair, timeframe, candle_type=candle_type)
if not filename.exists(): if not filename.exists():
# Fallback mode for 1M files # Fallback mode for 1M files
filename = self._pair_data_filename( filename = self._pair_data_filename(
self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True) self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True
)
if not filename.exists(): if not filename.exists():
return DataFrame(columns=self._columns) return DataFrame(columns=self._columns)
pairdata = read_feather(filename) pairdata = read_feather(filename)
pairdata.columns = self._columns pairdata.columns = self._columns
pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float', pairdata = pairdata.astype(
'low': 'float', 'close': 'float', 'volume': 'float'}) dtype={
pairdata['date'] = to_datetime(pairdata['date'], unit='ms', utc=True) "open": "float",
"high": "float",
"low": "float",
"close": "float",
"volume": "float",
}
)
pairdata["date"] = to_datetime(pairdata["date"], unit="ms", utc=True)
return pairdata return pairdata
def ohlcv_append( def ohlcv_append(
self, self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType
pair: str,
timeframe: str,
data: DataFrame,
candle_type: CandleType
) -> None: ) -> None:
""" """
Append data to existing data structures Append data to existing data structures
@ -92,7 +96,7 @@ class FeatherDataHandler(IDataHandler):
""" """
filename = self._pair_trades_filename(self._datadir, pair, trading_mode) filename = self._pair_trades_filename(self._datadir, pair, trading_mode)
self.create_dir_if_needed(filename) self.create_dir_if_needed(filename)
data.reset_index(drop=True).to_feather(filename, compression_level=9, compression='lz4') data.reset_index(drop=True).to_feather(filename, compression_level=9, compression="lz4")
def trades_append(self, pair: str, data: DataFrame): def trades_append(self, pair: str, data: DataFrame):
""" """
@ -104,7 +108,7 @@ class FeatherDataHandler(IDataHandler):
raise NotImplementedError() raise NotImplementedError()
def _trades_load( def _trades_load(
self, pair: str, trading_mode: TradingMode, timerange: Optional[TimeRange] = None self, pair: str, trading_mode: TradingMode, timerange: Optional[TimeRange] = None
) -> DataFrame: ) -> DataFrame:
""" """
Load a pair from file, either .json.gz or .json Load a pair from file, either .json.gz or .json

View File

@ -15,11 +15,11 @@ logger = logging.getLogger(__name__)
class HDF5DataHandler(IDataHandler): class HDF5DataHandler(IDataHandler):
_columns = DEFAULT_DATAFRAME_COLUMNS _columns = DEFAULT_DATAFRAME_COLUMNS
def ohlcv_store( def ohlcv_store(
self, pair: str, timeframe: str, data: pd.DataFrame, candle_type: CandleType) -> None: self, pair: str, timeframe: str, data: pd.DataFrame, candle_type: CandleType
) -> None:
""" """
Store data in hdf5 file. Store data in hdf5 file.
:param pair: Pair - used to generate filename :param pair: Pair - used to generate filename
@ -35,13 +35,18 @@ class HDF5DataHandler(IDataHandler):
self.create_dir_if_needed(filename) self.create_dir_if_needed(filename)
_data.loc[:, self._columns].to_hdf( _data.loc[:, self._columns].to_hdf(
filename, key=key, mode='a', complevel=9, complib='blosc', filename,
format='table', data_columns=['date'] key=key,
mode="a",
complevel=9,
complib="blosc",
format="table",
data_columns=["date"],
) )
def _ohlcv_load(self, pair: str, timeframe: str, def _ohlcv_load(
timerange: Optional[TimeRange], candle_type: CandleType self, pair: str, timeframe: str, timerange: Optional[TimeRange], candle_type: CandleType
) -> pd.DataFrame: ) -> pd.DataFrame:
""" """
Internal method used to load data for one pair from disk. Internal method used to load data for one pair from disk.
Implements the loading and conversion to a Pandas dataframe. Implements the loading and conversion to a Pandas dataframe.
@ -55,41 +60,40 @@ class HDF5DataHandler(IDataHandler):
:return: DataFrame with ohlcv data, or empty DataFrame :return: DataFrame with ohlcv data, or empty DataFrame
""" """
key = self._pair_ohlcv_key(pair, timeframe) key = self._pair_ohlcv_key(pair, timeframe)
filename = self._pair_data_filename( filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type=candle_type)
self._datadir,
pair,
timeframe,
candle_type=candle_type
)
if not filename.exists(): if not filename.exists():
# Fallback mode for 1M files # Fallback mode for 1M files
filename = self._pair_data_filename( filename = self._pair_data_filename(
self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True) self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True
)
if not filename.exists(): if not filename.exists():
return pd.DataFrame(columns=self._columns) return pd.DataFrame(columns=self._columns)
where = [] where = []
if timerange: if timerange:
if timerange.starttype == 'date': if timerange.starttype == "date":
where.append(f"date >= Timestamp({timerange.startts * 1e9})") where.append(f"date >= Timestamp({timerange.startts * 1e9})")
if timerange.stoptype == 'date': if timerange.stoptype == "date":
where.append(f"date <= Timestamp({timerange.stopts * 1e9})") where.append(f"date <= Timestamp({timerange.stopts * 1e9})")
pairdata = pd.read_hdf(filename, key=key, mode="r", where=where) pairdata = pd.read_hdf(filename, key=key, mode="r", where=where)
if list(pairdata.columns) != self._columns: if list(pairdata.columns) != self._columns:
raise ValueError("Wrong dataframe format") raise ValueError("Wrong dataframe format")
pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float', pairdata = pairdata.astype(
'low': 'float', 'close': 'float', 'volume': 'float'}) dtype={
"open": "float",
"high": "float",
"low": "float",
"close": "float",
"volume": "float",
}
)
pairdata = pairdata.reset_index(drop=True) pairdata = pairdata.reset_index(drop=True)
return pairdata return pairdata
def ohlcv_append( def ohlcv_append(
self, self, pair: str, timeframe: str, data: pd.DataFrame, candle_type: CandleType
pair: str,
timeframe: str,
data: pd.DataFrame,
candle_type: CandleType
) -> None: ) -> None:
""" """
Append data to existing data structures Append data to existing data structures
@ -111,9 +115,13 @@ class HDF5DataHandler(IDataHandler):
key = self._pair_trades_key(pair) key = self._pair_trades_key(pair)
data.to_hdf( data.to_hdf(
self._pair_trades_filename(self._datadir, pair, trading_mode), key=key, self._pair_trades_filename(self._datadir, pair, trading_mode),
mode='a', complevel=9, complib='blosc', key=key,
format='table', data_columns=['timestamp'] mode="a",
complevel=9,
complib="blosc",
format="table",
data_columns=["timestamp"],
) )
def trades_append(self, pair: str, data: pd.DataFrame): def trades_append(self, pair: str, data: pd.DataFrame):
@ -142,13 +150,13 @@ class HDF5DataHandler(IDataHandler):
return pd.DataFrame(columns=DEFAULT_TRADES_COLUMNS) return pd.DataFrame(columns=DEFAULT_TRADES_COLUMNS)
where = [] where = []
if timerange: if timerange:
if timerange.starttype == 'date': if timerange.starttype == "date":
where.append(f"timestamp >= {timerange.startts * 1e3}") where.append(f"timestamp >= {timerange.startts * 1e3}")
if timerange.stoptype == 'date': if timerange.stoptype == "date":
where.append(f"timestamp < {timerange.stopts * 1e3}") where.append(f"timestamp < {timerange.stopts * 1e3}")
trades: pd.DataFrame = pd.read_hdf(filename, key=key, mode="r", where=where) trades: pd.DataFrame = pd.read_hdf(filename, key=key, mode="r", where=where)
trades[['id', 'type']] = trades[['id', 'type']].replace({np.nan: None}) trades[["id", "type"]] = trades[["id", "type"]].replace({np.nan: None})
return trades return trades
@classmethod @classmethod
@ -158,7 +166,7 @@ class HDF5DataHandler(IDataHandler):
@classmethod @classmethod
def _pair_ohlcv_key(cls, pair: str, timeframe: str) -> str: def _pair_ohlcv_key(cls, pair: str, timeframe: str) -> str:
# Escape futures pairs to avoid warnings # Escape futures pairs to avoid warnings
pair_esc = pair.replace(':', '_') pair_esc = pair.replace(":", "_")
return f"{pair_esc}/ohlcv/tf_{timeframe}" return f"{pair_esc}/ohlcv/tf_{timeframe}"
@classmethod @classmethod

View File

@ -3,6 +3,7 @@ Abstract datahandler interface.
It's subclasses handle and storing data from disk. It's subclasses handle and storing data from disk.
""" """
import logging import logging
import re import re
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
@ -30,8 +31,7 @@ logger = logging.getLogger(__name__)
class IDataHandler(ABC): class IDataHandler(ABC):
_OHLCV_REGEX = r"^([a-zA-Z_\d-]+)\-(\d+[a-zA-Z]{1,2})\-?([a-zA-Z_]*)?(?=\.)"
_OHLCV_REGEX = r'^([a-zA-Z_\d-]+)\-(\d+[a-zA-Z]{1,2})\-?([a-zA-Z_]*)?(?=\.)'
def __init__(self, datadir: Path) -> None: def __init__(self, datadir: Path) -> None:
self._datadir = datadir self._datadir = datadir
@ -45,7 +45,8 @@ class IDataHandler(ABC):
@classmethod @classmethod
def ohlcv_get_available_data( def ohlcv_get_available_data(
cls, datadir: Path, trading_mode: TradingMode) -> ListPairsWithTimeframes: cls, datadir: Path, trading_mode: TradingMode
) -> ListPairsWithTimeframes:
""" """
Returns a list of all pairs with ohlcv data available in this datadir Returns a list of all pairs with ohlcv data available in this datadir
:param datadir: Directory to search for ohlcv files :param datadir: Directory to search for ohlcv files
@ -53,17 +54,20 @@ class IDataHandler(ABC):
:return: List of Tuples of (pair, timeframe, CandleType) :return: List of Tuples of (pair, timeframe, CandleType)
""" """
if trading_mode == TradingMode.FUTURES: if trading_mode == TradingMode.FUTURES:
datadir = datadir.joinpath('futures') datadir = datadir.joinpath("futures")
_tmp = [ _tmp = [
re.search( re.search(cls._OHLCV_REGEX, p.name)
cls._OHLCV_REGEX, p.name for p in datadir.glob(f"*.{cls._get_file_extension()}")
) for p in datadir.glob(f"*.{cls._get_file_extension()}")] ]
return [ return [
( (
cls.rebuild_pair_from_filename(match[1]), cls.rebuild_pair_from_filename(match[1]),
cls.rebuild_timeframe_from_filename(match[2]), cls.rebuild_timeframe_from_filename(match[2]),
CandleType.from_string(match[3]) CandleType.from_string(match[3]),
) for match in _tmp if match and len(match.groups()) > 1] )
for match in _tmp
if match and len(match.groups()) > 1
]
@classmethod @classmethod
def ohlcv_get_pairs(cls, datadir: Path, timeframe: str, candle_type: CandleType) -> List[str]: def ohlcv_get_pairs(cls, datadir: Path, timeframe: str, candle_type: CandleType) -> List[str]:
@ -77,17 +81,20 @@ class IDataHandler(ABC):
""" """
candle = "" candle = ""
if candle_type != CandleType.SPOT: if candle_type != CandleType.SPOT:
datadir = datadir.joinpath('futures') datadir = datadir.joinpath("futures")
candle = f"-{candle_type}" candle = f"-{candle_type}"
ext = cls._get_file_extension() ext = cls._get_file_extension()
_tmp = [re.search(r'^(\S+)(?=\-' + timeframe + candle + f'.{ext})', p.name) _tmp = [
for p in datadir.glob(f"*{timeframe}{candle}.{ext}")] re.search(r"^(\S+)(?=\-" + timeframe + candle + f".{ext})", p.name)
for p in datadir.glob(f"*{timeframe}{candle}.{ext}")
]
# Check if regex found something and only return these results # Check if regex found something and only return these results
return [cls.rebuild_pair_from_filename(match[0]) for match in _tmp if match] return [cls.rebuild_pair_from_filename(match[0]) for match in _tmp if match]
@abstractmethod @abstractmethod
def ohlcv_store( def ohlcv_store(
self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType) -> None: self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType
) -> None:
""" """
Store ohlcv data. Store ohlcv data.
:param pair: Pair - used to generate filename :param pair: Pair - used to generate filename
@ -97,8 +104,9 @@ class IDataHandler(ABC):
:return: None :return: None
""" """
def ohlcv_data_min_max(self, pair: str, timeframe: str, def ohlcv_data_min_max(
candle_type: CandleType) -> Tuple[datetime, datetime, int]: self, pair: str, timeframe: str, candle_type: CandleType
) -> Tuple[datetime, datetime, int]:
""" """
Returns the min and max timestamp for the given pair and timeframe. Returns the min and max timestamp for the given pair and timeframe.
:param pair: Pair to get min/max for :param pair: Pair to get min/max for
@ -113,12 +121,12 @@ class IDataHandler(ABC):
datetime.fromtimestamp(0, tz=timezone.utc), datetime.fromtimestamp(0, tz=timezone.utc),
0, 0,
) )
return df.iloc[0]['date'].to_pydatetime(), df.iloc[-1]['date'].to_pydatetime(), len(df) return df.iloc[0]["date"].to_pydatetime(), df.iloc[-1]["date"].to_pydatetime(), len(df)
@abstractmethod @abstractmethod
def _ohlcv_load(self, pair: str, timeframe: str, timerange: Optional[TimeRange], def _ohlcv_load(
candle_type: CandleType self, pair: str, timeframe: str, timerange: Optional[TimeRange], candle_type: CandleType
) -> DataFrame: ) -> DataFrame:
""" """
Internal method used to load data for one pair from disk. Internal method used to load data for one pair from disk.
Implements the loading and conversion to a Pandas dataframe. Implements the loading and conversion to a Pandas dataframe.
@ -148,11 +156,7 @@ class IDataHandler(ABC):
@abstractmethod @abstractmethod
def ohlcv_append( def ohlcv_append(
self, self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType
pair: str,
timeframe: str,
data: DataFrame,
candle_type: CandleType
) -> None: ) -> None:
""" """
Append data to existing data structures Append data to existing data structures
@ -170,8 +174,10 @@ class IDataHandler(ABC):
:return: List of Pairs :return: List of Pairs
""" """
_ext = cls._get_file_extension() _ext = cls._get_file_extension()
_tmp = [re.search(r'^(\S+)(?=\-trades.' + _ext + ')', p.name) _tmp = [
for p in datadir.glob(f"*trades.{_ext}")] re.search(r"^(\S+)(?=\-trades." + _ext + ")", p.name)
for p in datadir.glob(f"*trades.{_ext}")
]
# Check if regex found something and only return these results to avoid exceptions. # Check if regex found something and only return these results to avoid exceptions.
return [cls.rebuild_pair_from_filename(match[0]) for match in _tmp if match] return [cls.rebuild_pair_from_filename(match[0]) for match in _tmp if match]
@ -231,7 +237,7 @@ class IDataHandler(ABC):
return False return False
def trades_load( def trades_load(
self, pair: str, trading_mode: TradingMode, timerange: Optional[TimeRange] = None self, pair: str, trading_mode: TradingMode, timerange: Optional[TimeRange] = None
) -> DataFrame: ) -> DataFrame:
""" """
Load a pair from file, either .json.gz or .json Load a pair from file, either .json.gz or .json
@ -264,7 +270,7 @@ class IDataHandler(ABC):
pair: str, pair: str,
timeframe: str, timeframe: str,
candle_type: CandleType, candle_type: CandleType,
no_timeframe_modify: bool = False no_timeframe_modify: bool = False,
) -> Path: ) -> Path:
pair_s = misc.pair_to_filename(pair) pair_s = misc.pair_to_filename(pair)
candle = "" candle = ""
@ -272,10 +278,9 @@ class IDataHandler(ABC):
timeframe = cls.timeframe_to_file(timeframe) timeframe = cls.timeframe_to_file(timeframe)
if candle_type != CandleType.SPOT: if candle_type != CandleType.SPOT:
datadir = datadir.joinpath('futures') datadir = datadir.joinpath("futures")
candle = f"-{candle_type}" candle = f"-{candle_type}"
filename = datadir.joinpath( filename = datadir.joinpath(f"{pair_s}-{timeframe}{candle}.{cls._get_file_extension()}")
f'{pair_s}-{timeframe}{candle}.{cls._get_file_extension()}')
return filename return filename
@classmethod @classmethod
@ -283,14 +288,14 @@ class IDataHandler(ABC):
pair_s = misc.pair_to_filename(pair) pair_s = misc.pair_to_filename(pair)
if trading_mode == TradingMode.FUTURES: if trading_mode == TradingMode.FUTURES:
# Futures pair ... # Futures pair ...
datadir = datadir.joinpath('futures') datadir = datadir.joinpath("futures")
filename = datadir.joinpath(f'{pair_s}-trades.{cls._get_file_extension()}') filename = datadir.joinpath(f"{pair_s}-trades.{cls._get_file_extension()}")
return filename return filename
@staticmethod @staticmethod
def timeframe_to_file(timeframe: str): def timeframe_to_file(timeframe: str):
return timeframe.replace('M', 'Mo') return timeframe.replace("M", "Mo")
@staticmethod @staticmethod
def rebuild_timeframe_from_filename(timeframe: str) -> str: def rebuild_timeframe_from_filename(timeframe: str) -> str:
@ -298,7 +303,7 @@ class IDataHandler(ABC):
converts timeframe from disk to file converts timeframe from disk to file
Replaces mo with M (to avoid problems on case-insensitive filesystems) Replaces mo with M (to avoid problems on case-insensitive filesystems)
""" """
return re.sub('1mo', '1M', timeframe, flags=re.IGNORECASE) return re.sub("1mo", "1M", timeframe, flags=re.IGNORECASE)
@staticmethod @staticmethod
def rebuild_pair_from_filename(pair: str) -> str: def rebuild_pair_from_filename(pair: str) -> str:
@ -306,18 +311,22 @@ class IDataHandler(ABC):
Rebuild pair name from filename Rebuild pair name from filename
Assumes a asset name of max. 7 length to also support BTC-PERP and BTC-PERP:USD names. Assumes a asset name of max. 7 length to also support BTC-PERP and BTC-PERP:USD names.
""" """
res = re.sub(r'^(([A-Za-z\d]{1,10})|^([A-Za-z\-]{1,6}))(_)', r'\g<1>/', pair, count=1) res = re.sub(r"^(([A-Za-z\d]{1,10})|^([A-Za-z\-]{1,6}))(_)", r"\g<1>/", pair, count=1)
res = re.sub('_', ':', res, count=1) res = re.sub("_", ":", res, count=1)
return res return res
def ohlcv_load(self, pair, timeframe: str, def ohlcv_load(
candle_type: CandleType, *, self,
timerange: Optional[TimeRange] = None, pair,
fill_missing: bool = True, timeframe: str,
drop_incomplete: bool = False, candle_type: CandleType,
startup_candles: int = 0, *,
warn_no_data: bool = True, timerange: Optional[TimeRange] = None,
) -> DataFrame: fill_missing: bool = True,
drop_incomplete: bool = False,
startup_candles: int = 0,
warn_no_data: bool = True,
) -> DataFrame:
""" """
Load cached candle (OHLCV) data for the given pair. Load cached candle (OHLCV) data for the given pair.
@ -337,15 +346,12 @@ class IDataHandler(ABC):
timerange_startup.subtract_start(timeframe_to_seconds(timeframe) * startup_candles) timerange_startup.subtract_start(timeframe_to_seconds(timeframe) * startup_candles)
pairdf = self._ohlcv_load( pairdf = self._ohlcv_load(
pair, pair, timeframe, timerange=timerange_startup, candle_type=candle_type
timeframe,
timerange=timerange_startup,
candle_type=candle_type
) )
if self._check_empty_df(pairdf, pair, timeframe, candle_type, warn_no_data): if self._check_empty_df(pairdf, pair, timeframe, candle_type, warn_no_data):
return pairdf return pairdf
else: else:
enddate = pairdf.iloc[-1]['date'] enddate = pairdf.iloc[-1]["date"]
if timerange_startup: if timerange_startup:
self._validate_pairdata(pair, pairdf, timeframe, candle_type, timerange_startup) self._validate_pairdata(pair, pairdf, timeframe, candle_type, timerange_startup)
@ -354,17 +360,25 @@ class IDataHandler(ABC):
return pairdf return pairdf
# incomplete candles should only be dropped if we didn't trim the end beforehand. # incomplete candles should only be dropped if we didn't trim the end beforehand.
pairdf = clean_ohlcv_dataframe(pairdf, timeframe, pairdf = clean_ohlcv_dataframe(
pair=pair, pairdf,
fill_missing=fill_missing, timeframe,
drop_incomplete=(drop_incomplete and pair=pair,
enddate == pairdf.iloc[-1]['date'])) fill_missing=fill_missing,
drop_incomplete=(drop_incomplete and enddate == pairdf.iloc[-1]["date"]),
)
self._check_empty_df(pairdf, pair, timeframe, candle_type, warn_no_data) self._check_empty_df(pairdf, pair, timeframe, candle_type, warn_no_data)
return pairdf return pairdf
def _check_empty_df( def _check_empty_df(
self, pairdf: DataFrame, pair: str, timeframe: str, candle_type: CandleType, self,
warn_no_data: bool, warn_price: bool = False) -> bool: pairdf: DataFrame,
pair: str,
timeframe: str,
candle_type: CandleType,
warn_no_data: bool,
warn_price: bool = False,
) -> bool:
""" """
Warn on empty dataframe Warn on empty dataframe
""" """
@ -377,39 +391,55 @@ class IDataHandler(ABC):
return True return True
elif warn_price: elif warn_price:
candle_price_gap = 0 candle_price_gap = 0
if (candle_type in (CandleType.SPOT, CandleType.FUTURES) and if (
not pairdf.empty candle_type in (CandleType.SPOT, CandleType.FUTURES)
and 'close' in pairdf.columns and 'open' in pairdf.columns): and not pairdf.empty
and "close" in pairdf.columns
and "open" in pairdf.columns
):
# Detect gaps between prior close and open # Detect gaps between prior close and open
gaps = ((pairdf['open'] - pairdf['close'].shift(1)) / pairdf['close'].shift(1)) gaps = (pairdf["open"] - pairdf["close"].shift(1)) / pairdf["close"].shift(1)
gaps = gaps.dropna() gaps = gaps.dropna()
if len(gaps): if len(gaps):
candle_price_gap = max(abs(gaps)) candle_price_gap = max(abs(gaps))
if candle_price_gap > 0.1: if candle_price_gap > 0.1:
logger.info(f"Price jump in {pair}, {timeframe}, {candle_type} between two candles " logger.info(
f"of {candle_price_gap:.2%} detected.") f"Price jump in {pair}, {timeframe}, {candle_type} between two candles "
f"of {candle_price_gap:.2%} detected."
)
return False return False
def _validate_pairdata(self, pair, pairdata: DataFrame, timeframe: str, def _validate_pairdata(
candle_type: CandleType, timerange: TimeRange): self,
pair,
pairdata: DataFrame,
timeframe: str,
candle_type: CandleType,
timerange: TimeRange,
):
""" """
Validates pairdata for missing data at start end end and logs warnings. Validates pairdata for missing data at start end end and logs warnings.
:param pairdata: Dataframe to validate :param pairdata: Dataframe to validate
:param timerange: Timerange specified for start and end dates :param timerange: Timerange specified for start and end dates
""" """
if timerange.starttype == 'date': if timerange.starttype == "date":
if pairdata.iloc[0]['date'] > timerange.startdt: if pairdata.iloc[0]["date"] > timerange.startdt:
logger.warning(f"{pair}, {candle_type}, {timeframe}, " logger.warning(
f"data starts at {pairdata.iloc[0]['date']:%Y-%m-%d %H:%M:%S}") f"{pair}, {candle_type}, {timeframe}, "
if timerange.stoptype == 'date': f"data starts at {pairdata.iloc[0]['date']:%Y-%m-%d %H:%M:%S}"
if pairdata.iloc[-1]['date'] < timerange.stopdt: )
logger.warning(f"{pair}, {candle_type}, {timeframe}, " if timerange.stoptype == "date":
f"data ends at {pairdata.iloc[-1]['date']:%Y-%m-%d %H:%M:%S}") if pairdata.iloc[-1]["date"] < timerange.stopdt:
logger.warning(
f"{pair}, {candle_type}, {timeframe}, "
f"data ends at {pairdata.iloc[-1]['date']:%Y-%m-%d %H:%M:%S}"
)
def rename_futures_data( def rename_futures_data(
self, pair: str, new_pair: str, timeframe: str, candle_type: CandleType): self, pair: str, new_pair: str, timeframe: str, candle_type: CandleType
):
""" """
Temporary method to migrate data from old naming to new naming (BTC/USDT -> BTC/USDT:USDT) Temporary method to migrate data from old naming to new naming (BTC/USDT -> BTC/USDT:USDT)
Only used for binance to support the binance futures naming unification. Only used for binance to support the binance futures naming unification.
@ -435,18 +465,19 @@ class IDataHandler(ABC):
if funding_rate_combs: if funding_rate_combs:
logger.warning( logger.warning(
f'Migrating {len(funding_rate_combs)} funding fees to correct timeframe.') f"Migrating {len(funding_rate_combs)} funding fees to correct timeframe."
)
for pair, timeframe, candletype in funding_rate_combs: for pair, timeframe, candletype in funding_rate_combs:
old_name = self._pair_data_filename(self._datadir, pair, timeframe, candletype) old_name = self._pair_data_filename(self._datadir, pair, timeframe, candletype)
new_name = self._pair_data_filename(self._datadir, pair, ff_timeframe, candletype) new_name = self._pair_data_filename(self._datadir, pair, ff_timeframe, candletype)
if not Path(old_name).exists(): if not Path(old_name).exists():
logger.warning(f'{old_name} does not exist, skipping.') logger.warning(f"{old_name} does not exist, skipping.")
continue continue
if Path(new_name).exists(): if Path(new_name).exists():
logger.warning(f'{new_name} already exists, Removing.') logger.warning(f"{new_name} already exists, Removing.")
Path(new_name).unlink() Path(new_name).unlink()
Path(old_name).rename(new_name) Path(old_name).rename(new_name)
@ -461,27 +492,33 @@ def get_datahandlerclass(datatype: str) -> Type[IDataHandler]:
:return: Datahandler class :return: Datahandler class
""" """
if datatype == 'json': if datatype == "json":
from .jsondatahandler import JsonDataHandler from .jsondatahandler import JsonDataHandler
return JsonDataHandler return JsonDataHandler
elif datatype == 'jsongz': elif datatype == "jsongz":
from .jsondatahandler import JsonGzDataHandler from .jsondatahandler import JsonGzDataHandler
return JsonGzDataHandler return JsonGzDataHandler
elif datatype == 'hdf5': elif datatype == "hdf5":
from .hdf5datahandler import HDF5DataHandler from .hdf5datahandler import HDF5DataHandler
return HDF5DataHandler return HDF5DataHandler
elif datatype == 'feather': elif datatype == "feather":
from .featherdatahandler import FeatherDataHandler from .featherdatahandler import FeatherDataHandler
return FeatherDataHandler return FeatherDataHandler
elif datatype == 'parquet': elif datatype == "parquet":
from .parquetdatahandler import ParquetDataHandler from .parquetdatahandler import ParquetDataHandler
return ParquetDataHandler return ParquetDataHandler
else: else:
raise ValueError(f"No datahandler for datatype {datatype} available.") raise ValueError(f"No datahandler for datatype {datatype} available.")
def get_datahandler(datadir: Path, data_format: Optional[str] = None, def get_datahandler(
data_handler: Optional[IDataHandler] = None) -> IDataHandler: datadir: Path, data_format: Optional[str] = None, data_handler: Optional[IDataHandler] = None
) -> IDataHandler:
""" """
:param datadir: Folder to save data :param datadir: Folder to save data
:param data_format: dataformat to use :param data_format: dataformat to use
@ -489,6 +526,6 @@ def get_datahandler(datadir: Path, data_format: Optional[str] = None,
""" """
if not data_handler: if not data_handler:
HandlerClass = get_datahandlerclass(data_format or 'feather') HandlerClass = get_datahandlerclass(data_format or "feather")
data_handler = HandlerClass(datadir) data_handler = HandlerClass(datadir)
return data_handler return data_handler

View File

@ -17,12 +17,12 @@ logger = logging.getLogger(__name__)
class JsonDataHandler(IDataHandler): class JsonDataHandler(IDataHandler):
_use_zip = False _use_zip = False
_columns = DEFAULT_DATAFRAME_COLUMNS _columns = DEFAULT_DATAFRAME_COLUMNS
def ohlcv_store( def ohlcv_store(
self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType) -> None: self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType
) -> None:
""" """
Store data in json format "values". Store data in json format "values".
format looks as follows: format looks as follows:
@ -37,16 +37,16 @@ class JsonDataHandler(IDataHandler):
self.create_dir_if_needed(filename) self.create_dir_if_needed(filename)
_data = data.copy() _data = data.copy()
# Convert date to int # Convert date to int
_data['date'] = _data['date'].astype(np.int64) // 1000 // 1000 _data["date"] = _data["date"].astype(np.int64) // 1000 // 1000
# Reset index, select only appropriate columns and save as json # Reset index, select only appropriate columns and save as json
_data.reset_index(drop=True).loc[:, self._columns].to_json( _data.reset_index(drop=True).loc[:, self._columns].to_json(
filename, orient="values", filename, orient="values", compression="gzip" if self._use_zip else None
compression='gzip' if self._use_zip else None) )
def _ohlcv_load(self, pair: str, timeframe: str, def _ohlcv_load(
timerange: Optional[TimeRange], candle_type: CandleType self, pair: str, timeframe: str, timerange: Optional[TimeRange], candle_type: CandleType
) -> DataFrame: ) -> DataFrame:
""" """
Internal method used to load data for one pair from disk. Internal method used to load data for one pair from disk.
Implements the loading and conversion to a Pandas dataframe. Implements the loading and conversion to a Pandas dataframe.
@ -59,31 +59,34 @@ class JsonDataHandler(IDataHandler):
:param candle_type: Any of the enum CandleType (must match trading mode!) :param candle_type: Any of the enum CandleType (must match trading mode!)
:return: DataFrame with ohlcv data, or empty DataFrame :return: DataFrame with ohlcv data, or empty DataFrame
""" """
filename = self._pair_data_filename( filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type=candle_type)
self._datadir, pair, timeframe, candle_type=candle_type)
if not filename.exists(): if not filename.exists():
# Fallback mode for 1M files # Fallback mode for 1M files
filename = self._pair_data_filename( filename = self._pair_data_filename(
self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True) self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True
)
if not filename.exists(): if not filename.exists():
return DataFrame(columns=self._columns) return DataFrame(columns=self._columns)
try: try:
pairdata = read_json(filename, orient='values') pairdata = read_json(filename, orient="values")
pairdata.columns = self._columns pairdata.columns = self._columns
except ValueError: except ValueError:
logger.error(f"Could not load data for {pair}.") logger.error(f"Could not load data for {pair}.")
return DataFrame(columns=self._columns) return DataFrame(columns=self._columns)
pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float', pairdata = pairdata.astype(
'low': 'float', 'close': 'float', 'volume': 'float'}) dtype={
pairdata['date'] = to_datetime(pairdata['date'], unit='ms', utc=True) "open": "float",
"high": "float",
"low": "float",
"close": "float",
"volume": "float",
}
)
pairdata["date"] = to_datetime(pairdata["date"], unit="ms", utc=True)
return pairdata return pairdata
def ohlcv_append( def ohlcv_append(
self, self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType
pair: str,
timeframe: str,
data: DataFrame,
candle_type: CandleType
) -> None: ) -> None:
""" """
Append data to existing data structures Append data to existing data structures
@ -145,5 +148,4 @@ class JsonDataHandler(IDataHandler):
class JsonGzDataHandler(JsonDataHandler): class JsonGzDataHandler(JsonDataHandler):
_use_zip = True _use_zip = True

View File

@ -14,11 +14,11 @@ logger = logging.getLogger(__name__)
class ParquetDataHandler(IDataHandler): class ParquetDataHandler(IDataHandler):
_columns = DEFAULT_DATAFRAME_COLUMNS _columns = DEFAULT_DATAFRAME_COLUMNS
def ohlcv_store( def ohlcv_store(
self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType) -> None: self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType
) -> None:
""" """
Store data in json format "values". Store data in json format "values".
format looks as follows: format looks as follows:
@ -34,9 +34,9 @@ class ParquetDataHandler(IDataHandler):
data.reset_index(drop=True).loc[:, self._columns].to_parquet(filename) data.reset_index(drop=True).loc[:, self._columns].to_parquet(filename)
def _ohlcv_load(self, pair: str, timeframe: str, def _ohlcv_load(
timerange: Optional[TimeRange], candle_type: CandleType self, pair: str, timeframe: str, timerange: Optional[TimeRange], candle_type: CandleType
) -> DataFrame: ) -> DataFrame:
""" """
Internal method used to load data for one pair from disk. Internal method used to load data for one pair from disk.
Implements the loading and conversion to a Pandas dataframe. Implements the loading and conversion to a Pandas dataframe.
@ -49,28 +49,31 @@ class ParquetDataHandler(IDataHandler):
:param candle_type: Any of the enum CandleType (must match trading mode!) :param candle_type: Any of the enum CandleType (must match trading mode!)
:return: DataFrame with ohlcv data, or empty DataFrame :return: DataFrame with ohlcv data, or empty DataFrame
""" """
filename = self._pair_data_filename( filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type=candle_type)
self._datadir, pair, timeframe, candle_type=candle_type)
if not filename.exists(): if not filename.exists():
# Fallback mode for 1M files # Fallback mode for 1M files
filename = self._pair_data_filename( filename = self._pair_data_filename(
self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True) self._datadir, pair, timeframe, candle_type=candle_type, no_timeframe_modify=True
)
if not filename.exists(): if not filename.exists():
return DataFrame(columns=self._columns) return DataFrame(columns=self._columns)
pairdata = read_parquet(filename) pairdata = read_parquet(filename)
pairdata.columns = self._columns pairdata.columns = self._columns
pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float', pairdata = pairdata.astype(
'low': 'float', 'close': 'float', 'volume': 'float'}) dtype={
pairdata['date'] = to_datetime(pairdata['date'], unit='ms', utc=True) "open": "float",
"high": "float",
"low": "float",
"close": "float",
"volume": "float",
}
)
pairdata["date"] = to_datetime(pairdata["date"], unit="ms", utc=True)
return pairdata return pairdata
def ohlcv_append( def ohlcv_append(
self, self, pair: str, timeframe: str, data: DataFrame, candle_type: CandleType
pair: str,
timeframe: str,
data: DataFrame,
candle_type: CandleType
) -> None: ) -> None:
""" """
Append data to existing data structures Append data to existing data structures

View File

@ -34,17 +34,19 @@ from freqtrade.util.migrations import migrate_data
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def load_pair_history(pair: str, def load_pair_history(
timeframe: str, pair: str,
datadir: Path, *, timeframe: str,
timerange: Optional[TimeRange] = None, datadir: Path,
fill_up_missing: bool = True, *,
drop_incomplete: bool = False, timerange: Optional[TimeRange] = None,
startup_candles: int = 0, fill_up_missing: bool = True,
data_format: Optional[str] = None, drop_incomplete: bool = False,
data_handler: Optional[IDataHandler] = None, startup_candles: int = 0,
candle_type: CandleType = CandleType.SPOT data_format: Optional[str] = None,
) -> DataFrame: data_handler: Optional[IDataHandler] = None,
candle_type: CandleType = CandleType.SPOT,
) -> DataFrame:
""" """
Load cached ohlcv history for the given pair. Load cached ohlcv history for the given pair.
@ -63,27 +65,30 @@ def load_pair_history(pair: str,
""" """
data_handler = get_datahandler(datadir, data_format, data_handler) data_handler = get_datahandler(datadir, data_format, data_handler)
return data_handler.ohlcv_load(pair=pair, return data_handler.ohlcv_load(
timeframe=timeframe, pair=pair,
timerange=timerange, timeframe=timeframe,
fill_missing=fill_up_missing, timerange=timerange,
drop_incomplete=drop_incomplete, fill_missing=fill_up_missing,
startup_candles=startup_candles, drop_incomplete=drop_incomplete,
candle_type=candle_type, startup_candles=startup_candles,
) candle_type=candle_type,
)
def load_data(datadir: Path, def load_data(
timeframe: str, datadir: Path,
pairs: List[str], *, timeframe: str,
timerange: Optional[TimeRange] = None, pairs: List[str],
fill_up_missing: bool = True, *,
startup_candles: int = 0, timerange: Optional[TimeRange] = None,
fail_without_data: bool = False, fill_up_missing: bool = True,
data_format: str = 'feather', startup_candles: int = 0,
candle_type: CandleType = CandleType.SPOT, fail_without_data: bool = False,
user_futures_funding_rate: Optional[int] = None, data_format: str = "feather",
) -> Dict[str, DataFrame]: candle_type: CandleType = CandleType.SPOT,
user_futures_funding_rate: Optional[int] = None,
) -> Dict[str, DataFrame]:
""" """
Load ohlcv history data for a list of pairs. Load ohlcv history data for a list of pairs.
@ -100,18 +105,21 @@ def load_data(datadir: Path,
""" """
result: Dict[str, DataFrame] = {} result: Dict[str, DataFrame] = {}
if startup_candles > 0 and timerange: if startup_candles > 0 and timerange:
logger.info(f'Using indicator startup period: {startup_candles} ...') logger.info(f"Using indicator startup period: {startup_candles} ...")
data_handler = get_datahandler(datadir, data_format) data_handler = get_datahandler(datadir, data_format)
for pair in pairs: for pair in pairs:
hist = load_pair_history(pair=pair, timeframe=timeframe, hist = load_pair_history(
datadir=datadir, timerange=timerange, pair=pair,
fill_up_missing=fill_up_missing, timeframe=timeframe,
startup_candles=startup_candles, datadir=datadir,
data_handler=data_handler, timerange=timerange,
candle_type=candle_type, fill_up_missing=fill_up_missing,
) startup_candles=startup_candles,
data_handler=data_handler,
candle_type=candle_type,
)
if not hist.empty: if not hist.empty:
result[pair] = hist result[pair] = hist
else: else:
@ -125,14 +133,16 @@ def load_data(datadir: Path,
return result return result
def refresh_data(*, datadir: Path, def refresh_data(
timeframe: str, *,
pairs: List[str], datadir: Path,
exchange: Exchange, timeframe: str,
data_format: Optional[str] = None, pairs: List[str],
timerange: Optional[TimeRange] = None, exchange: Exchange,
candle_type: CandleType, data_format: Optional[str] = None,
) -> None: timerange: Optional[TimeRange] = None,
candle_type: CandleType,
) -> None:
""" """
Refresh ohlcv history data for a list of pairs. Refresh ohlcv history data for a list of pairs.
@ -146,11 +156,17 @@ def refresh_data(*, datadir: Path,
""" """
data_handler = get_datahandler(datadir, data_format) data_handler = get_datahandler(datadir, data_format)
for idx, pair in enumerate(pairs): for idx, pair in enumerate(pairs):
process = f'{idx}/{len(pairs)}' process = f"{idx}/{len(pairs)}"
_download_pair_history(pair=pair, process=process, _download_pair_history(
timeframe=timeframe, datadir=datadir, pair=pair,
timerange=timerange, exchange=exchange, data_handler=data_handler, process=process,
candle_type=candle_type) timeframe=timeframe,
datadir=datadir,
timerange=timerange,
exchange=exchange,
data_handler=data_handler,
candle_type=candle_type,
)
def _load_cached_data_for_updating( def _load_cached_data_for_updating(
@ -172,42 +188,49 @@ def _load_cached_data_for_updating(
start = None start = None
end = None end = None
if timerange: if timerange:
if timerange.starttype == 'date': if timerange.starttype == "date":
start = timerange.startdt start = timerange.startdt
if timerange.stoptype == 'date': if timerange.stoptype == "date":
end = timerange.stopdt end = timerange.stopdt
# Intentionally don't pass timerange in - since we need to load the full dataset. # Intentionally don't pass timerange in - since we need to load the full dataset.
data = data_handler.ohlcv_load(pair, timeframe=timeframe, data = data_handler.ohlcv_load(
timerange=None, fill_missing=False, pair,
drop_incomplete=True, warn_no_data=False, timeframe=timeframe,
candle_type=candle_type) timerange=None,
fill_missing=False,
drop_incomplete=True,
warn_no_data=False,
candle_type=candle_type,
)
if not data.empty: if not data.empty:
if not prepend and start and start < data.iloc[0]['date']: if not prepend and start and start < data.iloc[0]["date"]:
# Earlier data than existing data requested, redownload all # Earlier data than existing data requested, redownload all
data = DataFrame(columns=DEFAULT_DATAFRAME_COLUMNS) data = DataFrame(columns=DEFAULT_DATAFRAME_COLUMNS)
else: else:
if prepend: if prepend:
end = data.iloc[0]['date'] end = data.iloc[0]["date"]
else: else:
start = data.iloc[-1]['date'] start = data.iloc[-1]["date"]
start_ms = int(start.timestamp() * 1000) if start else None start_ms = int(start.timestamp() * 1000) if start else None
end_ms = int(end.timestamp() * 1000) if end else None end_ms = int(end.timestamp() * 1000) if end else None
return data, start_ms, end_ms return data, start_ms, end_ms
def _download_pair_history(pair: str, *, def _download_pair_history(
datadir: Path, pair: str,
exchange: Exchange, *,
timeframe: str = '5m', datadir: Path,
process: str = '', exchange: Exchange,
new_pairs_days: int = 30, timeframe: str = "5m",
data_handler: Optional[IDataHandler] = None, process: str = "",
timerange: Optional[TimeRange] = None, new_pairs_days: int = 30,
candle_type: CandleType, data_handler: Optional[IDataHandler] = None,
erase: bool = False, timerange: Optional[TimeRange] = None,
prepend: bool = False, candle_type: CandleType,
) -> bool: erase: bool = False,
prepend: bool = False,
) -> bool:
""" """
Download latest candles from the exchange for the pair and timeframe passed in parameters Download latest candles from the exchange for the pair and timeframe passed in parameters
The data is downloaded starting from the last correct data that The data is downloaded starting from the last correct data that
@ -226,54 +249,69 @@ def _download_pair_history(pair: str, *,
try: try:
if erase: if erase:
if data_handler.ohlcv_purge(pair, timeframe, candle_type=candle_type): if data_handler.ohlcv_purge(pair, timeframe, candle_type=candle_type):
logger.info(f'Deleting existing data for pair {pair}, {timeframe}, {candle_type}.') logger.info(f"Deleting existing data for pair {pair}, {timeframe}, {candle_type}.")
data, since_ms, until_ms = _load_cached_data_for_updating( data, since_ms, until_ms = _load_cached_data_for_updating(
pair, timeframe, timerange, pair,
timeframe,
timerange,
data_handler=data_handler, data_handler=data_handler,
candle_type=candle_type, candle_type=candle_type,
prepend=prepend) prepend=prepend,
)
logger.info(f'({process}) - Download history data for "{pair}", {timeframe}, ' logger.info(
f'{candle_type} and store in {datadir}. ' f'({process}) - Download history data for "{pair}", {timeframe}, '
f'From {format_ms_time(since_ms) if since_ms else "start"} to ' f'{candle_type} and store in {datadir}. '
f'{format_ms_time(until_ms) if until_ms else "now"}' f'From {format_ms_time(since_ms) if since_ms else "start"} to '
) f'{format_ms_time(until_ms) if until_ms else "now"}'
)
logger.debug("Current Start: %s", logger.debug(
f"{data.iloc[0]['date']:{DATETIME_PRINT_FORMAT}}" "Current Start: %s",
if not data.empty else 'None') f"{data.iloc[0]['date']:{DATETIME_PRINT_FORMAT}}" if not data.empty else "None",
logger.debug("Current End: %s", )
f"{data.iloc[-1]['date']:{DATETIME_PRINT_FORMAT}}" logger.debug(
if not data.empty else 'None') "Current End: %s",
f"{data.iloc[-1]['date']:{DATETIME_PRINT_FORMAT}}" if not data.empty else "None",
)
# Default since_ms to 30 days if nothing is given # Default since_ms to 30 days if nothing is given
new_data = exchange.get_historic_ohlcv(pair=pair, new_data = exchange.get_historic_ohlcv(
timeframe=timeframe, pair=pair,
since_ms=since_ms if since_ms else timeframe=timeframe,
int((datetime.now() - timedelta(days=new_pairs_days) since_ms=since_ms
).timestamp()) * 1000, if since_ms
is_new_pair=data.empty, else int((datetime.now() - timedelta(days=new_pairs_days)).timestamp()) * 1000,
candle_type=candle_type, is_new_pair=data.empty,
until_ms=until_ms if until_ms else None candle_type=candle_type,
) until_ms=until_ms if until_ms else None,
)
# TODO: Maybe move parsing to exchange class (?) # TODO: Maybe move parsing to exchange class (?)
new_dataframe = ohlcv_to_dataframe(new_data, timeframe, pair, new_dataframe = ohlcv_to_dataframe(
fill_missing=False, drop_incomplete=True) new_data, timeframe, pair, fill_missing=False, drop_incomplete=True
)
if data.empty: if data.empty:
data = new_dataframe data = new_dataframe
else: else:
# Run cleaning again to ensure there were no duplicate candles # Run cleaning again to ensure there were no duplicate candles
# Especially between existing and new data. # Especially between existing and new data.
data = clean_ohlcv_dataframe(concat([data, new_dataframe], axis=0), timeframe, pair, data = clean_ohlcv_dataframe(
fill_missing=False, drop_incomplete=False) concat([data, new_dataframe], axis=0),
timeframe,
pair,
fill_missing=False,
drop_incomplete=False,
)
logger.debug("New Start: %s", logger.debug(
f"{data.iloc[0]['date']:{DATETIME_PRINT_FORMAT}}" "New Start: %s",
if not data.empty else 'None') f"{data.iloc[0]['date']:{DATETIME_PRINT_FORMAT}}" if not data.empty else "None",
logger.debug("New End: %s", )
f"{data.iloc[-1]['date']:{DATETIME_PRINT_FORMAT}}" logger.debug(
if not data.empty else 'None') "New End: %s",
f"{data.iloc[-1]['date']:{DATETIME_PRINT_FORMAT}}" if not data.empty else "None",
)
data_handler.ohlcv_store(pair, timeframe, data=data, candle_type=candle_type) data_handler.ohlcv_store(pair, timeframe, data=data, candle_type=candle_type)
return True return True
@ -285,13 +323,18 @@ def _download_pair_history(pair: str, *,
return False return False
def refresh_backtest_ohlcv_data(exchange: Exchange, pairs: List[str], timeframes: List[str], def refresh_backtest_ohlcv_data(
datadir: Path, trading_mode: str, exchange: Exchange,
timerange: Optional[TimeRange] = None, pairs: List[str],
new_pairs_days: int = 30, erase: bool = False, timeframes: List[str],
data_format: Optional[str] = None, datadir: Path,
prepend: bool = False, trading_mode: str,
) -> List[str]: timerange: Optional[TimeRange] = None,
new_pairs_days: int = 30,
erase: bool = False,
data_format: Optional[str] = None,
prepend: bool = False,
) -> List[str]:
""" """
Refresh stored ohlcv data for backtesting and hyperopt operations. Refresh stored ohlcv data for backtesting and hyperopt operations.
Used by freqtrade download-data subcommand. Used by freqtrade download-data subcommand.
@ -300,63 +343,77 @@ def refresh_backtest_ohlcv_data(exchange: Exchange, pairs: List[str], timeframes
pairs_not_available = [] pairs_not_available = []
data_handler = get_datahandler(datadir, data_format) data_handler = get_datahandler(datadir, data_format)
candle_type = CandleType.get_default(trading_mode) candle_type = CandleType.get_default(trading_mode)
process = '' process = ""
for idx, pair in enumerate(pairs, start=1): for idx, pair in enumerate(pairs, start=1):
if pair not in exchange.markets: if pair not in exchange.markets:
pairs_not_available.append(pair) pairs_not_available.append(pair)
logger.info(f"Skipping pair {pair}...") logger.info(f"Skipping pair {pair}...")
continue continue
for timeframe in timeframes: for timeframe in timeframes:
logger.debug(f"Downloading pair {pair}, {candle_type}, interval {timeframe}.")
logger.debug(f'Downloading pair {pair}, {candle_type}, interval {timeframe}.') process = f"{idx}/{len(pairs)}"
process = f'{idx}/{len(pairs)}' _download_pair_history(
_download_pair_history(pair=pair, process=process, pair=pair,
datadir=datadir, exchange=exchange, process=process,
timerange=timerange, data_handler=data_handler, datadir=datadir,
timeframe=str(timeframe), new_pairs_days=new_pairs_days, exchange=exchange,
candle_type=candle_type, timerange=timerange,
erase=erase, prepend=prepend) data_handler=data_handler,
if trading_mode == 'futures': timeframe=str(timeframe),
new_pairs_days=new_pairs_days,
candle_type=candle_type,
erase=erase,
prepend=prepend,
)
if trading_mode == "futures":
# Predefined candletype (and timeframe) depending on exchange # Predefined candletype (and timeframe) depending on exchange
# Downloads what is necessary to backtest based on futures data. # Downloads what is necessary to backtest based on futures data.
tf_mark = exchange.get_option('mark_ohlcv_timeframe') tf_mark = exchange.get_option("mark_ohlcv_timeframe")
tf_funding_rate = exchange.get_option('funding_fee_timeframe') tf_funding_rate = exchange.get_option("funding_fee_timeframe")
fr_candle_type = CandleType.from_string(exchange.get_option('mark_ohlcv_price')) fr_candle_type = CandleType.from_string(exchange.get_option("mark_ohlcv_price"))
# All exchanges need FundingRate for futures trading. # All exchanges need FundingRate for futures trading.
# The timeframe is aligned to the mark-price timeframe. # The timeframe is aligned to the mark-price timeframe.
combs = ((CandleType.FUNDING_RATE, tf_funding_rate), (fr_candle_type, tf_mark)) combs = ((CandleType.FUNDING_RATE, tf_funding_rate), (fr_candle_type, tf_mark))
for candle_type_f, tf in combs: for candle_type_f, tf in combs:
logger.debug(f'Downloading pair {pair}, {candle_type_f}, interval {tf}.') logger.debug(f"Downloading pair {pair}, {candle_type_f}, interval {tf}.")
_download_pair_history(pair=pair, process=process, _download_pair_history(
datadir=datadir, exchange=exchange, pair=pair,
timerange=timerange, data_handler=data_handler, process=process,
timeframe=str(tf), new_pairs_days=new_pairs_days, datadir=datadir,
candle_type=candle_type_f, exchange=exchange,
erase=erase, prepend=prepend) timerange=timerange,
data_handler=data_handler,
timeframe=str(tf),
new_pairs_days=new_pairs_days,
candle_type=candle_type_f,
erase=erase,
prepend=prepend,
)
return pairs_not_available return pairs_not_available
def _download_trades_history(exchange: Exchange, def _download_trades_history(
pair: str, *, exchange: Exchange,
new_pairs_days: int = 30, pair: str,
timerange: Optional[TimeRange] = None, *,
data_handler: IDataHandler, new_pairs_days: int = 30,
trading_mode: TradingMode, timerange: Optional[TimeRange] = None,
) -> bool: data_handler: IDataHandler,
trading_mode: TradingMode,
) -> bool:
""" """
Download trade history from the exchange. Download trade history from the exchange.
Appends to previously downloaded trades data. Appends to previously downloaded trades data.
""" """
try: try:
until = None until = None
since = 0 since = 0
if timerange: if timerange:
if timerange.starttype == 'date': if timerange.starttype == "date":
since = timerange.startts * 1000 since = timerange.startts * 1000
if timerange.stoptype == 'date': if timerange.stoptype == "date":
until = timerange.stopts * 1000 until = timerange.stopts * 1000
trades = data_handler.trades_load(pair, trading_mode) trades = data_handler.trades_load(pair, trading_mode)
@ -365,60 +422,76 @@ def _download_trades_history(exchange: Exchange,
# DEFAULT_TRADES_COLUMNS: 0 -> timestamp # DEFAULT_TRADES_COLUMNS: 0 -> timestamp
# DEFAULT_TRADES_COLUMNS: 1 -> id # DEFAULT_TRADES_COLUMNS: 1 -> id
if not trades.empty and since > 0 and since < trades.iloc[0]['timestamp']: if not trades.empty and since > 0 and since < trades.iloc[0]["timestamp"]:
# since is before the first trade # since is before the first trade
logger.info(f"Start ({trades.iloc[0]['date']:{DATETIME_PRINT_FORMAT}}) earlier than " logger.info(
f"available data. Redownloading trades for {pair}...") f"Start ({trades.iloc[0]['date']:{DATETIME_PRINT_FORMAT}}) earlier than "
f"available data. Redownloading trades for {pair}..."
)
trades = trades_list_to_df([]) trades = trades_list_to_df([])
from_id = trades.iloc[-1]['id'] if not trades.empty else None from_id = trades.iloc[-1]["id"] if not trades.empty else None
if not trades.empty and since < trades.iloc[-1]['timestamp']: if not trades.empty and since < trades.iloc[-1]["timestamp"]:
# Reset since to the last available point # Reset since to the last available point
# - 5 seconds (to ensure we're getting all trades) # - 5 seconds (to ensure we're getting all trades)
since = trades.iloc[-1]['timestamp'] - (5 * 1000) since = trades.iloc[-1]["timestamp"] - (5 * 1000)
logger.info(f"Using last trade date -5s - Downloading trades for {pair} " logger.info(
f"since: {format_ms_time(since)}.") f"Using last trade date -5s - Downloading trades for {pair} "
f"since: {format_ms_time(since)}."
)
if not since: if not since:
since = dt_ts(dt_now() - timedelta(days=new_pairs_days)) since = dt_ts(dt_now() - timedelta(days=new_pairs_days))
logger.debug("Current Start: %s", 'None' if trades.empty else logger.debug(
f"{trades.iloc[0]['date']:{DATETIME_PRINT_FORMAT}}") "Current Start: %s",
logger.debug("Current End: %s", 'None' if trades.empty else "None" if trades.empty else f"{trades.iloc[0]['date']:{DATETIME_PRINT_FORMAT}}",
f"{trades.iloc[-1]['date']:{DATETIME_PRINT_FORMAT}}") )
logger.debug(
"Current End: %s",
"None" if trades.empty else f"{trades.iloc[-1]['date']:{DATETIME_PRINT_FORMAT}}",
)
logger.info(f"Current Amount of trades: {len(trades)}") logger.info(f"Current Amount of trades: {len(trades)}")
# Default since_ms to 30 days if nothing is given # Default since_ms to 30 days if nothing is given
new_trades = exchange.get_historic_trades(pair=pair, new_trades = exchange.get_historic_trades(
since=since, pair=pair,
until=until, since=since,
from_id=from_id, until=until,
) from_id=from_id,
)
new_trades_df = trades_list_to_df(new_trades[1]) new_trades_df = trades_list_to_df(new_trades[1])
trades = concat([trades, new_trades_df], axis=0) trades = concat([trades, new_trades_df], axis=0)
# Remove duplicates to make sure we're not storing data we don't need # Remove duplicates to make sure we're not storing data we don't need
trades = trades_df_remove_duplicates(trades) trades = trades_df_remove_duplicates(trades)
data_handler.trades_store(pair, trades, trading_mode) data_handler.trades_store(pair, trades, trading_mode)
logger.debug("New Start: %s", 'None' if trades.empty else logger.debug(
f"{trades.iloc[0]['date']:{DATETIME_PRINT_FORMAT}}") "New Start: %s",
logger.debug("New End: %s", 'None' if trades.empty else "None" if trades.empty else f"{trades.iloc[0]['date']:{DATETIME_PRINT_FORMAT}}",
f"{trades.iloc[-1]['date']:{DATETIME_PRINT_FORMAT}}") )
logger.debug(
"New End: %s",
"None" if trades.empty else f"{trades.iloc[-1]['date']:{DATETIME_PRINT_FORMAT}}",
)
logger.info(f"New Amount of trades: {len(trades)}") logger.info(f"New Amount of trades: {len(trades)}")
return True return True
except Exception: except Exception:
logger.exception( logger.exception(f'Failed to download historic trades for pair: "{pair}". ')
f'Failed to download historic trades for pair: "{pair}". '
)
return False return False
def refresh_backtest_trades_data(exchange: Exchange, pairs: List[str], datadir: Path, def refresh_backtest_trades_data(
timerange: TimeRange, trading_mode: TradingMode, exchange: Exchange,
new_pairs_days: int = 30, pairs: List[str],
erase: bool = False, data_format: str = 'feather', datadir: Path,
) -> List[str]: timerange: TimeRange,
trading_mode: TradingMode,
new_pairs_days: int = 30,
erase: bool = False,
data_format: str = "feather",
) -> List[str]:
""" """
Refresh stored trades data for backtesting and hyperopt operations. Refresh stored trades data for backtesting and hyperopt operations.
Used by freqtrade download-data subcommand. Used by freqtrade download-data subcommand.
@ -434,15 +507,17 @@ def refresh_backtest_trades_data(exchange: Exchange, pairs: List[str], datadir:
if erase: if erase:
if data_handler.trades_purge(pair, trading_mode): if data_handler.trades_purge(pair, trading_mode):
logger.info(f'Deleting existing data for pair {pair}.') logger.info(f"Deleting existing data for pair {pair}.")
logger.info(f'Downloading trades for pair {pair}.') logger.info(f"Downloading trades for pair {pair}.")
_download_trades_history(exchange=exchange, _download_trades_history(
pair=pair, exchange=exchange,
new_pairs_days=new_pairs_days, pair=pair,
timerange=timerange, new_pairs_days=new_pairs_days,
data_handler=data_handler, timerange=timerange,
trading_mode=trading_mode) data_handler=data_handler,
trading_mode=trading_mode,
)
return pairs_not_available return pairs_not_available
@ -454,15 +529,18 @@ def get_timerange(data: Dict[str, DataFrame]) -> Tuple[datetime, datetime]:
:return: tuple containing min_date, max_date :return: tuple containing min_date, max_date
""" """
timeranges = [ timeranges = [
(frame['date'].min().to_pydatetime(), frame['date'].max().to_pydatetime()) (frame["date"].min().to_pydatetime(), frame["date"].max().to_pydatetime())
for frame in data.values() for frame in data.values()
] ]
return (min(timeranges, key=operator.itemgetter(0))[0], return (
max(timeranges, key=operator.itemgetter(1))[1]) min(timeranges, key=operator.itemgetter(0))[0],
max(timeranges, key=operator.itemgetter(1))[1],
)
def validate_backtest_data(data: DataFrame, pair: str, min_date: datetime, def validate_backtest_data(
max_date: datetime, timeframe_min: int) -> bool: data: DataFrame, pair: str, min_date: datetime, max_date: datetime, timeframe_min: int
) -> bool:
""" """
Validates preprocessed backtesting data for missing values and shows warnings about it that. Validates preprocessed backtesting data for missing values and shows warnings about it that.
@ -478,89 +556,111 @@ def validate_backtest_data(data: DataFrame, pair: str, min_date: datetime,
dflen = len(data) dflen = len(data)
if dflen < expected_frames: if dflen < expected_frames:
found_missing = True found_missing = True
logger.warning("%s has missing frames: expected %s, got %s, that's %s missing values", logger.warning(
pair, expected_frames, dflen, expected_frames - dflen) "%s has missing frames: expected %s, got %s, that's %s missing values",
pair,
expected_frames,
dflen,
expected_frames - dflen,
)
return found_missing return found_missing
def download_data_main(config: Config) -> None: def download_data_main(config: Config) -> None:
timerange = TimeRange() timerange = TimeRange()
if 'days' in config: if "days" in config:
time_since = (datetime.now() - timedelta(days=config['days'])).strftime("%Y%m%d") time_since = (datetime.now() - timedelta(days=config["days"])).strftime("%Y%m%d")
timerange = TimeRange.parse_timerange(f'{time_since}-') timerange = TimeRange.parse_timerange(f"{time_since}-")
if 'timerange' in config: if "timerange" in config:
timerange = timerange.parse_timerange(config['timerange']) timerange = timerange.parse_timerange(config["timerange"])
# Remove stake-currency to skip checks which are not relevant for datadownload # Remove stake-currency to skip checks which are not relevant for datadownload
config['stake_currency'] = '' config["stake_currency"] = ""
pairs_not_available: List[str] = [] pairs_not_available: List[str] = []
# Init exchange # Init exchange
from freqtrade.resolvers.exchange_resolver import ExchangeResolver from freqtrade.resolvers.exchange_resolver import ExchangeResolver
exchange = ExchangeResolver.load_exchange(config, validate=False) exchange = ExchangeResolver.load_exchange(config, validate=False)
available_pairs = [ available_pairs = [
p for p in exchange.get_markets( p
tradable_only=True, active_only=not config.get('include_inactive') for p in exchange.get_markets(
).keys() tradable_only=True, active_only=not config.get("include_inactive")
).keys()
] ]
expanded_pairs = dynamic_expand_pairlist(config, available_pairs) expanded_pairs = dynamic_expand_pairlist(config, available_pairs)
if 'timeframes' not in config: if "timeframes" not in config:
config['timeframes'] = DL_DATA_TIMEFRAMES config["timeframes"] = DL_DATA_TIMEFRAMES
# Manual validations of relevant settings # Manual validations of relevant settings
if not config['exchange'].get('skip_pair_validation', False): if not config["exchange"].get("skip_pair_validation", False):
exchange.validate_pairs(expanded_pairs) exchange.validate_pairs(expanded_pairs)
logger.info(f"About to download pairs: {expanded_pairs}, " logger.info(
f"intervals: {config['timeframes']} to {config['datadir']}") f"About to download pairs: {expanded_pairs}, "
f"intervals: {config['timeframes']} to {config['datadir']}"
)
if len(expanded_pairs) == 0: if len(expanded_pairs) == 0:
logger.warning( logger.warning(
"No pairs available for download. " "No pairs available for download. "
"Please make sure you're using the correct Pair naming for your selected trade mode. \n" "Please make sure you're using the correct Pair naming for your selected trade mode. \n"
f"More info: {DOCS_LINK}/bot-basics/#pair-naming") f"More info: {DOCS_LINK}/bot-basics/#pair-naming"
)
for timeframe in config['timeframes']: for timeframe in config["timeframes"]:
exchange.validate_timeframes(timeframe) exchange.validate_timeframes(timeframe)
# Start downloading # Start downloading
try: try:
if config.get('download_trades'): if config.get("download_trades"):
pairs_not_available = refresh_backtest_trades_data( pairs_not_available = refresh_backtest_trades_data(
exchange, pairs=expanded_pairs, datadir=config['datadir'], exchange,
timerange=timerange, new_pairs_days=config['new_pairs_days'], pairs=expanded_pairs,
erase=bool(config.get('erase')), data_format=config['dataformat_trades'], datadir=config["datadir"],
trading_mode=config.get('trading_mode', TradingMode.SPOT), timerange=timerange,
) new_pairs_days=config["new_pairs_days"],
erase=bool(config.get("erase")),
data_format=config["dataformat_trades"],
trading_mode=config.get("trading_mode", TradingMode.SPOT),
)
# Convert downloaded trade data to different timeframes # Convert downloaded trade data to different timeframes
convert_trades_to_ohlcv( convert_trades_to_ohlcv(
pairs=expanded_pairs, timeframes=config['timeframes'], pairs=expanded_pairs,
datadir=config['datadir'], timerange=timerange, erase=bool(config.get('erase')), timeframes=config["timeframes"],
data_format_ohlcv=config['dataformat_ohlcv'], datadir=config["datadir"],
data_format_trades=config['dataformat_trades'], timerange=timerange,
candle_type=config.get('candle_type_def', CandleType.SPOT), erase=bool(config.get("erase")),
data_format_ohlcv=config["dataformat_ohlcv"],
data_format_trades=config["dataformat_trades"],
candle_type=config.get("candle_type_def", CandleType.SPOT),
) )
else: else:
if not exchange.get_option('ohlcv_has_history', True): if not exchange.get_option("ohlcv_has_history", True):
raise OperationalException( raise OperationalException(
f"Historic klines not available for {exchange.name}. " f"Historic klines not available for {exchange.name}. "
"Please use `--dl-trades` instead for this exchange " "Please use `--dl-trades` instead for this exchange "
"(will unfortunately take a long time)." "(will unfortunately take a long time)."
) )
migrate_data(config, exchange) migrate_data(config, exchange)
pairs_not_available = refresh_backtest_ohlcv_data( pairs_not_available = refresh_backtest_ohlcv_data(
exchange, pairs=expanded_pairs, timeframes=config['timeframes'], exchange,
datadir=config['datadir'], timerange=timerange, pairs=expanded_pairs,
new_pairs_days=config['new_pairs_days'], timeframes=config["timeframes"],
erase=bool(config.get('erase')), data_format=config['dataformat_ohlcv'], datadir=config["datadir"],
trading_mode=config.get('trading_mode', 'spot'), timerange=timerange,
prepend=config.get('prepend_data', False) new_pairs_days=config["new_pairs_days"],
erase=bool(config.get("erase")),
data_format=config["dataformat_ohlcv"],
trading_mode=config.get("trading_mode", "spot"),
prepend=config.get("prepend_data", False),
) )
finally: finally:
if pairs_not_available: if pairs_not_available:
logger.info(f"Pairs [{','.join(pairs_not_available)}] not available " logger.info(
f"on exchange {exchange.name}.") f"Pairs [{','.join(pairs_not_available)}] not available "
f"on exchange {exchange.name}."
)

View File

@ -31,7 +31,8 @@ def calculate_market_change(data: Dict[str, pd.DataFrame], column: str = "close"
def combine_dataframes_by_column( def combine_dataframes_by_column(
data: Dict[str, pd.DataFrame], column: str = "close") -> pd.DataFrame: data: Dict[str, pd.DataFrame], column: str = "close"
) -> pd.DataFrame:
""" """
Combine multiple dataframes "column" Combine multiple dataframes "column"
:param data: Dict of Dataframes, dict key should be pair. :param data: Dict of Dataframes, dict key should be pair.
@ -41,14 +42,15 @@ def combine_dataframes_by_column(
""" """
if not data: if not data:
raise ValueError("No data provided.") raise ValueError("No data provided.")
df_comb = pd.concat([data[pair].set_index('date').rename( df_comb = pd.concat(
{column: pair}, axis=1)[pair] for pair in data], axis=1) [data[pair].set_index("date").rename({column: pair}, axis=1)[pair] for pair in data], axis=1
)
return df_comb return df_comb
def combined_dataframes_with_rel_mean( def combined_dataframes_with_rel_mean(
data: Dict[str, pd.DataFrame], fromdt: datetime, todt: datetime, data: Dict[str, pd.DataFrame], fromdt: datetime, todt: datetime, column: str = "close"
column: str = "close") -> pd.DataFrame: ) -> pd.DataFrame:
""" """
Combine multiple dataframes "column" Combine multiple dataframes "column"
:param data: Dict of Dataframes, dict key should be pair. :param data: Dict of Dataframes, dict key should be pair.
@ -60,14 +62,15 @@ def combined_dataframes_with_rel_mean(
df_comb = combine_dataframes_by_column(data, column) df_comb = combine_dataframes_by_column(data, column)
# Trim dataframes to the given timeframe # Trim dataframes to the given timeframe
df_comb = df_comb.iloc[(df_comb.index >= fromdt) & (df_comb.index < todt)] df_comb = df_comb.iloc[(df_comb.index >= fromdt) & (df_comb.index < todt)]
df_comb['count'] = df_comb.count(axis=1) df_comb["count"] = df_comb.count(axis=1)
df_comb['mean'] = df_comb.mean(axis=1) df_comb["mean"] = df_comb.mean(axis=1)
df_comb['rel_mean'] = df_comb['mean'].pct_change().fillna(0).cumsum() df_comb["rel_mean"] = df_comb["mean"].pct_change().fillna(0).cumsum()
return df_comb[['mean', 'rel_mean', 'count']] return df_comb[["mean", "rel_mean", "count"]]
def combine_dataframes_with_mean( def combine_dataframes_with_mean(
data: Dict[str, pd.DataFrame], column: str = "close") -> pd.DataFrame: data: Dict[str, pd.DataFrame], column: str = "close"
) -> pd.DataFrame:
""" """
Combine multiple dataframes "column" Combine multiple dataframes "column"
:param data: Dict of Dataframes, dict key should be pair. :param data: Dict of Dataframes, dict key should be pair.
@ -78,13 +81,14 @@ def combine_dataframes_with_mean(
""" """
df_comb = combine_dataframes_by_column(data, column) df_comb = combine_dataframes_by_column(data, column)
df_comb['mean'] = df_comb.mean(axis=1) df_comb["mean"] = df_comb.mean(axis=1)
return df_comb return df_comb
def create_cum_profit(df: pd.DataFrame, trades: pd.DataFrame, col_name: str, def create_cum_profit(
timeframe: str) -> pd.DataFrame: df: pd.DataFrame, trades: pd.DataFrame, col_name: str, timeframe: str
) -> pd.DataFrame:
""" """
Adds a column `col_name` with the cumulative profit for the given trades array. Adds a column `col_name` with the cumulative profit for the given trades array.
:param df: DataFrame with date index :param df: DataFrame with date index
@ -97,11 +101,11 @@ def create_cum_profit(df: pd.DataFrame, trades: pd.DataFrame, col_name: str,
if len(trades) == 0: if len(trades) == 0:
raise ValueError("Trade dataframe empty.") raise ValueError("Trade dataframe empty.")
from freqtrade.exchange import timeframe_to_resample_freq from freqtrade.exchange import timeframe_to_resample_freq
timeframe_freq = timeframe_to_resample_freq(timeframe) timeframe_freq = timeframe_to_resample_freq(timeframe)
# Resample to timeframe to make sure trades match candles # Resample to timeframe to make sure trades match candles
_trades_sum = trades.resample(timeframe_freq, on='close_date' _trades_sum = trades.resample(timeframe_freq, on="close_date")[["profit_abs"]].sum()
)[['profit_abs']].sum() df.loc[:, col_name] = _trades_sum["profit_abs"].cumsum()
df.loc[:, col_name] = _trades_sum['profit_abs'].cumsum()
# Set first value to 0 # Set first value to 0
df.loc[df.iloc[0].name, col_name] = 0 df.loc[df.iloc[0].name, col_name] = 0
# FFill to get continuous # FFill to get continuous
@ -109,29 +113,34 @@ def create_cum_profit(df: pd.DataFrame, trades: pd.DataFrame, col_name: str,
return df return df
def _calc_drawdown_series(profit_results: pd.DataFrame, *, date_col: str, value_col: str, def _calc_drawdown_series(
starting_balance: float) -> pd.DataFrame: profit_results: pd.DataFrame, *, date_col: str, value_col: str, starting_balance: float
) -> pd.DataFrame:
max_drawdown_df = pd.DataFrame() max_drawdown_df = pd.DataFrame()
max_drawdown_df['cumulative'] = profit_results[value_col].cumsum() max_drawdown_df["cumulative"] = profit_results[value_col].cumsum()
max_drawdown_df['high_value'] = max_drawdown_df['cumulative'].cummax() max_drawdown_df["high_value"] = max_drawdown_df["cumulative"].cummax()
max_drawdown_df['drawdown'] = max_drawdown_df['cumulative'] - max_drawdown_df['high_value'] max_drawdown_df["drawdown"] = max_drawdown_df["cumulative"] - max_drawdown_df["high_value"]
max_drawdown_df['date'] = profit_results.loc[:, date_col] max_drawdown_df["date"] = profit_results.loc[:, date_col]
if starting_balance: if starting_balance:
cumulative_balance = starting_balance + max_drawdown_df['cumulative'] cumulative_balance = starting_balance + max_drawdown_df["cumulative"]
max_balance = starting_balance + max_drawdown_df['high_value'] max_balance = starting_balance + max_drawdown_df["high_value"]
max_drawdown_df['drawdown_relative'] = ((max_balance - cumulative_balance) / max_balance) max_drawdown_df["drawdown_relative"] = (max_balance - cumulative_balance) / max_balance
else: else:
# NOTE: This is not completely accurate, # NOTE: This is not completely accurate,
# but might good enough if starting_balance is not available # but might good enough if starting_balance is not available
max_drawdown_df['drawdown_relative'] = ( max_drawdown_df["drawdown_relative"] = (
(max_drawdown_df['high_value'] - max_drawdown_df['cumulative']) max_drawdown_df["high_value"] - max_drawdown_df["cumulative"]
/ max_drawdown_df['high_value']) ) / max_drawdown_df["high_value"]
return max_drawdown_df return max_drawdown_df
def calculate_underwater(trades: pd.DataFrame, *, date_col: str = 'close_date', def calculate_underwater(
value_col: str = 'profit_ratio', starting_balance: float = 0.0 trades: pd.DataFrame,
): *,
date_col: str = "close_date",
value_col: str = "profit_ratio",
starting_balance: float = 0.0,
):
""" """
Calculate max drawdown and the corresponding close dates Calculate max drawdown and the corresponding close dates
:param trades: DataFrame containing trades (requires columns close_date and profit_ratio) :param trades: DataFrame containing trades (requires columns close_date and profit_ratio)
@ -145,18 +154,20 @@ def calculate_underwater(trades: pd.DataFrame, *, date_col: str = 'close_date',
raise ValueError("Trade dataframe empty.") raise ValueError("Trade dataframe empty.")
profit_results = trades.sort_values(date_col).reset_index(drop=True) profit_results = trades.sort_values(date_col).reset_index(drop=True)
max_drawdown_df = _calc_drawdown_series( max_drawdown_df = _calc_drawdown_series(
profit_results, profit_results, date_col=date_col, value_col=value_col, starting_balance=starting_balance
date_col=date_col, )
value_col=value_col,
starting_balance=starting_balance)
return max_drawdown_df return max_drawdown_df
def calculate_max_drawdown(trades: pd.DataFrame, *, date_col: str = 'close_date', def calculate_max_drawdown(
value_col: str = 'profit_abs', starting_balance: float = 0, trades: pd.DataFrame,
relative: bool = False *,
) -> Tuple[float, pd.Timestamp, pd.Timestamp, float, float, float]: date_col: str = "close_date",
value_col: str = "profit_abs",
starting_balance: float = 0,
relative: bool = False,
) -> Tuple[float, pd.Timestamp, pd.Timestamp, float, float, float]:
""" """
Calculate max drawdown and the corresponding close dates Calculate max drawdown and the corresponding close dates
:param trades: DataFrame containing trades (requires columns close_date and profit_ratio) :param trades: DataFrame containing trades (requires columns close_date and profit_ratio)
@ -172,32 +183,31 @@ def calculate_max_drawdown(trades: pd.DataFrame, *, date_col: str = 'close_date'
raise ValueError("Trade dataframe empty.") raise ValueError("Trade dataframe empty.")
profit_results = trades.sort_values(date_col).reset_index(drop=True) profit_results = trades.sort_values(date_col).reset_index(drop=True)
max_drawdown_df = _calc_drawdown_series( max_drawdown_df = _calc_drawdown_series(
profit_results, profit_results, date_col=date_col, value_col=value_col, starting_balance=starting_balance
date_col=date_col,
value_col=value_col,
starting_balance=starting_balance
) )
idxmin = ( idxmin = (
max_drawdown_df['drawdown_relative'].idxmax() max_drawdown_df["drawdown_relative"].idxmax()
if relative else max_drawdown_df['drawdown'].idxmin() if relative
else max_drawdown_df["drawdown"].idxmin()
) )
if idxmin == 0: if idxmin == 0:
raise ValueError("No losing trade, therefore no drawdown.") raise ValueError("No losing trade, therefore no drawdown.")
high_date = profit_results.loc[max_drawdown_df.iloc[:idxmin]['high_value'].idxmax(), date_col] high_date = profit_results.loc[max_drawdown_df.iloc[:idxmin]["high_value"].idxmax(), date_col]
low_date = profit_results.loc[idxmin, date_col] low_date = profit_results.loc[idxmin, date_col]
high_val = max_drawdown_df.loc[max_drawdown_df.iloc[:idxmin] high_val = max_drawdown_df.loc[
['high_value'].idxmax(), 'cumulative'] max_drawdown_df.iloc[:idxmin]["high_value"].idxmax(), "cumulative"
low_val = max_drawdown_df.loc[idxmin, 'cumulative'] ]
max_drawdown_rel = max_drawdown_df.loc[idxmin, 'drawdown_relative'] low_val = max_drawdown_df.loc[idxmin, "cumulative"]
max_drawdown_rel = max_drawdown_df.loc[idxmin, "drawdown_relative"]
return ( return (
abs(max_drawdown_df.loc[idxmin, 'drawdown']), abs(max_drawdown_df.loc[idxmin, "drawdown"]),
high_date, high_date,
low_date, low_date,
high_val, high_val,
low_val, low_val,
max_drawdown_rel max_drawdown_rel,
) )
@ -213,9 +223,9 @@ def calculate_csum(trades: pd.DataFrame, starting_balance: float = 0) -> Tuple[f
raise ValueError("Trade dataframe empty.") raise ValueError("Trade dataframe empty.")
csum_df = pd.DataFrame() csum_df = pd.DataFrame()
csum_df['sum'] = trades['profit_abs'].cumsum() csum_df["sum"] = trades["profit_abs"].cumsum()
csum_min = csum_df['sum'].min() + starting_balance csum_min = csum_df["sum"].min() + starting_balance
csum_max = csum_df['sum'].max() + starting_balance csum_max = csum_df["sum"].max() + starting_balance
return csum_min, csum_max return csum_min, csum_max
@ -245,28 +255,29 @@ def calculate_expectancy(trades: pd.DataFrame) -> Tuple[float, float]:
expectancy_ratio = 100 expectancy_ratio = 100
if len(trades) > 0: if len(trades) > 0:
winning_trades = trades.loc[trades['profit_abs'] > 0] winning_trades = trades.loc[trades["profit_abs"] > 0]
losing_trades = trades.loc[trades['profit_abs'] < 0] losing_trades = trades.loc[trades["profit_abs"] < 0]
profit_sum = winning_trades['profit_abs'].sum() profit_sum = winning_trades["profit_abs"].sum()
loss_sum = abs(losing_trades['profit_abs'].sum()) loss_sum = abs(losing_trades["profit_abs"].sum())
nb_win_trades = len(winning_trades) nb_win_trades = len(winning_trades)
nb_loss_trades = len(losing_trades) nb_loss_trades = len(losing_trades)
average_win = (profit_sum / nb_win_trades) if nb_win_trades > 0 else 0 average_win = (profit_sum / nb_win_trades) if nb_win_trades > 0 else 0
average_loss = (loss_sum / nb_loss_trades) if nb_loss_trades > 0 else 0 average_loss = (loss_sum / nb_loss_trades) if nb_loss_trades > 0 else 0
winrate = (nb_win_trades / len(trades)) winrate = nb_win_trades / len(trades)
loserate = (nb_loss_trades / len(trades)) loserate = nb_loss_trades / len(trades)
expectancy = (winrate * average_win) - (loserate * average_loss) expectancy = (winrate * average_win) - (loserate * average_loss)
if (average_loss > 0): if average_loss > 0:
risk_reward_ratio = average_win / average_loss risk_reward_ratio = average_win / average_loss
expectancy_ratio = ((1 + risk_reward_ratio) * winrate) - 1 expectancy_ratio = ((1 + risk_reward_ratio) * winrate) - 1
return expectancy, expectancy_ratio return expectancy, expectancy_ratio
def calculate_sortino(trades: pd.DataFrame, min_date: datetime, max_date: datetime, def calculate_sortino(
starting_balance: float) -> float: trades: pd.DataFrame, min_date: datetime, max_date: datetime, starting_balance: float
) -> float:
""" """
Calculate sortino Calculate sortino
:param trades: DataFrame containing trades (requires columns profit_abs) :param trades: DataFrame containing trades (requires columns profit_abs)
@ -275,12 +286,12 @@ def calculate_sortino(trades: pd.DataFrame, min_date: datetime, max_date: dateti
if (len(trades) == 0) or (min_date is None) or (max_date is None) or (min_date == max_date): if (len(trades) == 0) or (min_date is None) or (max_date is None) or (min_date == max_date):
return 0 return 0
total_profit = trades['profit_abs'] / starting_balance total_profit = trades["profit_abs"] / starting_balance
days_period = max(1, (max_date - min_date).days) days_period = max(1, (max_date - min_date).days)
expected_returns_mean = total_profit.sum() / days_period expected_returns_mean = total_profit.sum() / days_period
down_stdev = np.std(trades.loc[trades['profit_abs'] < 0, 'profit_abs'] / starting_balance) down_stdev = np.std(trades.loc[trades["profit_abs"] < 0, "profit_abs"] / starting_balance)
if down_stdev != 0 and not np.isnan(down_stdev): if down_stdev != 0 and not np.isnan(down_stdev):
sortino_ratio = expected_returns_mean / down_stdev * np.sqrt(365) sortino_ratio = expected_returns_mean / down_stdev * np.sqrt(365)
@ -292,8 +303,9 @@ def calculate_sortino(trades: pd.DataFrame, min_date: datetime, max_date: dateti
return sortino_ratio return sortino_ratio
def calculate_sharpe(trades: pd.DataFrame, min_date: datetime, max_date: datetime, def calculate_sharpe(
starting_balance: float) -> float: trades: pd.DataFrame, min_date: datetime, max_date: datetime, starting_balance: float
) -> float:
""" """
Calculate sharpe Calculate sharpe
:param trades: DataFrame containing trades (requires column profit_abs) :param trades: DataFrame containing trades (requires column profit_abs)
@ -302,7 +314,7 @@ def calculate_sharpe(trades: pd.DataFrame, min_date: datetime, max_date: datetim
if (len(trades) == 0) or (min_date is None) or (max_date is None) or (min_date == max_date): if (len(trades) == 0) or (min_date is None) or (max_date is None) or (min_date == max_date):
return 0 return 0
total_profit = trades['profit_abs'] / starting_balance total_profit = trades["profit_abs"] / starting_balance
days_period = max(1, (max_date - min_date).days) days_period = max(1, (max_date - min_date).days)
expected_returns_mean = total_profit.sum() / days_period expected_returns_mean = total_profit.sum() / days_period
@ -318,8 +330,9 @@ def calculate_sharpe(trades: pd.DataFrame, min_date: datetime, max_date: datetim
return sharp_ratio return sharp_ratio
def calculate_calmar(trades: pd.DataFrame, min_date: datetime, max_date: datetime, def calculate_calmar(
starting_balance: float) -> float: trades: pd.DataFrame, min_date: datetime, max_date: datetime, starting_balance: float
) -> float:
""" """
Calculate calmar Calculate calmar
:param trades: DataFrame containing trades (requires columns close_date and profit_abs) :param trades: DataFrame containing trades (requires columns close_date and profit_abs)
@ -328,7 +341,7 @@ def calculate_calmar(trades: pd.DataFrame, min_date: datetime, max_date: datetim
if (len(trades) == 0) or (min_date is None) or (max_date is None) or (min_date == max_date): if (len(trades) == 0) or (min_date is None) or (max_date is None) or (min_date == max_date):
return 0 return 0
total_profit = trades['profit_abs'].sum() / starting_balance total_profit = trades["profit_abs"].sum() / starting_balance
days_period = max(1, (max_date - min_date).days) days_period = max(1, (max_date - min_date).days)
# adding slippage of 0.1% per trade # adding slippage of 0.1% per trade