make docs reflect reality, move download_all_data to new utils.py file, automatic startup_candle detection

This commit is contained in:
robcaulk 2022-08-26 15:30:01 +02:00
parent 4b7e640f31
commit 65b552e310
4 changed files with 14 additions and 63 deletions

View File

@ -113,7 +113,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi
| `use_SVM_to_remove_outliers` | Ask FreqAI to train a support vector machine to detect and remove outliers from the training data set as well as from incoming data points. <br> **Datatype:** boolean.
| `svm_params` | All parameters available in Sklearn's `SGDOneClassSVM()`. E.g. `nu` *Very* broadly, is the percentage of data points that should be considered outliers. `shuffle` is by default false to maintain reproducibility. But these and all others can be added/changed in this dictionary. <br> **Datatype:** dictionary.
| `stratify_training_data` | This value is used to indicate the stratification of the data. e.g. 2 would set every 2nd data point into a separate dataset to be pulled from during training/testing. <br> **Datatype:** positive integer.
| `indicator_max_period_candles` | **Deprecated in favor of** strategy set `startup_candle_count`, however, both configuration parameters provide the same functionality; the maximum *period* used in `populate_any_indicators()` for indicator creation (timeframe independent). FreqAI uses this information in combination with the maximum timeframe to calculate how many data points it should download so that the first data point does not have a NaN <br> **Datatype:** positive integer.
| `indicator_max_period_candles` | **No longer used**. User must use the strategy set `startup_candle_count` which defines the maximum *period* used in `populate_any_indicators()` for indicator creation (timeframe independent). FreqAI uses this information in combination with the maximum timeframe to calculate how many data points it should download so that the first data point does not have a NaN <br> **Datatype:** positive integer.
| `indicator_periods_candles` | A list of integers used to duplicate all indicators according to a set of periods and add them to the feature set. <br> **Datatype:** list of positive integers.
| `use_DBSCAN_to_remove_outliers` | Inactive by default. If true, FreqAI clusters data using DBSCAN to identify and remove outliers from training and prediction data. <br> **Datatype:** float (fraction of 1).
| | **Data split parameters**
@ -162,7 +162,6 @@ The user interface is isolated to the typical config file. A typical FreqAI conf
"label_period_candles": 24,
"include_shifted_candles": 2,
"weight_factor": 0,
"indicator_max_period_candles": 20,
"indicator_periods_candles": [10, 20]
},
"data_split_parameters" : {
@ -387,6 +386,10 @@ The FreqAI strategy requires the user to include the following lines of code in
```python
# user should define the maximum startup candle count (the largest number of candles
# passed to any single indicator)
startup_candle_count: int = 20
def informative_pairs(self):
whitelist_pairs = self.dp.current_whitelist()
corr_pairs = self.config["freqai"]["feature_parameters"]["include_corr_pairlist"]

View File

@ -106,15 +106,17 @@ class DataProvider:
return self.__cached_pairs_backtesting[saved_pair].copy()
def get_required_startup(self, timeframe: str) -> int:
if not self._config.get('freqai', {}).get('enabled', False):
freqai_config = self._config.get('freqai', {})
if not freqai_config.get('enabled', False):
return self._config.get('startup_candle_count', 0)
else:
if not self._config['startup_candle_count']:
raise OperationalException('FreqAI backtesting module requires strategy '
'set startup_candle_count.')
startup_candles = self._config.get('startup_candle_count', 0)
indicator_periods = freqai_config['feature_parameters']['indicator_periods_candles']
# make sure the startupcandles is at least the set maximum indicator periods
self._config['startup_candle_count'] = max(startup_candles, max(indicator_periods))
tf_seconds = timeframe_to_seconds(timeframe)
train_candles = self._config['freqai']['train_period_days'] * 86400 / tf_seconds
total_candles = int(self._config.get('startup_candle_count', 0) + train_candles)
train_candles = freqai_config['train_period_days'] * 86400 / tf_seconds
total_candles = int(self._config['startup_candle_count'] + train_candles)
logger.info(f'Increasing startup_candle_count for freqai to {total_candles}')
return total_candles

View File

@ -16,12 +16,8 @@ from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
from freqtrade.configuration import TimeRange
from freqtrade.data.dataprovider import DataProvider
from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds
from freqtrade.exchange.exchange import market_is_active
from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist
from freqtrade.strategy.interface import IStrategy
@ -1002,53 +998,3 @@ class FreqaiDataKitchen:
if self.unique_classes:
for label in self.unique_classes:
self.unique_class_list += list(self.unique_classes[label])
# Methods called by interface.py (load_freqai_model())
def download_all_data_for_training(dp: DataProvider, config: dict) -> None:
"""
Called only once upon start of bot to download the necessary data for
populating indicators and training the model.
:param timerange: TimeRange = The full data timerange for populating the indicators
and training the model.
:param dp: DataProvider instance attached to the strategy
"""
if dp._exchange is not None:
markets = [p for p, m in dp._exchange.markets.items() if market_is_active(m)
or config.get('include_inactive')]
else:
# This should not occur:
raise OperationalException('No exchange object found.')
all_pairs = dynamic_expand_pairlist(config, markets)
if not dp._exchange:
# Not realistic - this is only called in live mode.
raise OperationalException("Dataprovider did not have an exchange attached.")
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
for tf in config["freqai"]["feature_parameters"].get("include_timeframes"):
timerange = TimeRange()
timerange.startts = int(time)
timerange.stopts = int(time)
startup_candles = dp.get_required_startup(str(tf))
tf_seconds = timeframe_to_seconds(str(tf))
timerange.subtract_start(tf_seconds * startup_candles)
new_pairs_days = int((timerange.stopts - timerange.startts) / SECONDS_IN_DAY)
# FIXME: now that we are looping on `refresh_backtest_ohlcv_data`, the function
# redownloads the funding rate for each pair.
refresh_backtest_ohlcv_data(
dp._exchange,
pairs=all_pairs,
timeframes=[tf],
datadir=config["datadir"],
timerange=timerange,
new_pairs_days=new_pairs_days,
erase=False,
data_format=config.get("dataformat_ohlcv", "json"),
trading_mode=config.get("trading_mode", "spot"),
prepend=config.get("prepend_data", False),
)

View File

@ -148,7 +148,7 @@ class IStrategy(ABC, HyperStrategyMixin):
def load_freqAI_model(self) -> None:
if self.config.get('freqai', {}).get('enabled', False):
# Import here to avoid importing this if freqAI is disabled
from freqtrade.freqai.data_kitchen import (download_all_data_for_training)
from freqtrade.freqai.utils import download_all_data_for_training
from freqtrade.resolvers.freqaimodel_resolver import FreqaiModelResolver
self.freqai = FreqaiModelResolver.load_freqaimodel(self.config)
self.freqai_info = self.config["freqai"]