Allow user to go live and start from pretrained models (after a completed backtest) by simply reusing the identifier config parameter while dry/live.

This commit is contained in:
robcaulk 2022-05-25 14:40:32 +02:00
parent 7486d9d9e2
commit b79d4e8876
5 changed files with 33 additions and 39 deletions

View File

@ -55,10 +55,9 @@
"15m"
],
"train_period": 30,
"backtest_period": 7,
"backtest_period": 10,
"identifier": "example",
"live_trained_timerange": "",
"live_full_backtestrange": "",
"live_trained_timestamp": 0,
"corr_pairlist": [
"BTC/USDT",
"ETH/USDT",

View File

@ -158,7 +158,7 @@ a specific pair or timeframe, they should use the following structure inside `po
if pair == metadata['pair'] and tf == self.timeframe:
df['%-day_of_week'] = (df["date"].dt.dayofweek + 1) / 7
df['%-hour_of_day'] = (df['date'].dt.hour + 1) / 25
```
(Please see the example script located in `freqtrade/templates/FreqaiExampleStrategy.py` for a full example of `populate_any_indicators()`)
@ -270,27 +270,22 @@ freqtrade trade --strategy FreqaiExampleStrategy --config config_freqai.example.
By default, Freqai will not find find any existing models and will start by training a new one
given the user configuration settings. Following training, it will use that model to predict for the
duration of `backtest_period`. After a full `backtest_period` has elapsed, Freqai will auto retrain
a new model, and begin making predictions with the updated model.
a new model, and begin making predictions with the updated model. FreqAI in live mode permits
the user to use fractional days (i.e. 0.1) in the `backtest_period`, which enables more frequent
retraining.
If the user wishes to start dry/live from a saved model, the following configuration
parameters need to be set:
If the user wishes to start dry/live from a backtested saved model, the user only needs to reuse
the same `identifier` parameter
```json
"freqai": {
"identifier": "example",
"live_trained_timerange": "20220330-20220429",
"live_full_backtestrange": "20220302-20220501"
}
```
Where the `identifier` is the same identifier which was set during the backtesting/training. Meanwhile,
the `live_trained_timerange` is the sub-trained timerange (the training window) which was set
during backtesting/training. These are available to the user inside `user_data/models/*/sub-train-*`.
`live_full_backtestrange` was the full data range associated with the backtest/training (the full time
window that the training window and backtesting windows slide through). These values can be located
inside the `user_data/models/` directory. In this case, although Freqai will initiate with a
pre-trained model, if a full `backtest_period` has elapsed since the end of the user set
`live_trained_timerange`, it will self retrain.
In this case, although Freqai will initiate with a
pre-trained model, it will still check to see how much time has elapsed since the model was trained,
and if a full `backtest_period` has elapsed since the end of the loaded model, FreqAI will self retrain.
## Data anylsis techniques

View File

@ -440,15 +440,13 @@ CONF_SCHEMA = {
"train_period": {"type": "integer", "default": 0},
"backtest_period": {"type": "float", "default": 7},
"identifier": {"type": "str", "default": "example"},
"live_trained_timerange": {"type": "str"},
"live_full_backtestrange": {"type": "str"},
"corr_pairlist": {"type": "list"},
"feature_parameters": {
"type": "object",
"properties": {
"period": {"type": "integer"},
"shift": {"type": "integer", "default": 0},
"DI_threshold": {"type": "integer", "default": 0},
"DI_threshold": {"type": "float", "default": 0},
"weight_factor": {"type": "number", "default": 0},
"principal_component_analysis": {"type": "boolean", "default": False},
"use_SVM_to_remove_outliers": {"type": "boolean", "default": False},

View File

@ -74,8 +74,7 @@ class FreqaiDataKitchen:
def set_paths(self, metadata: dict, trained_timestamp: int = None,) -> None:
self.full_path = Path(self.config['user_data_dir'] /
"models" /
str(self.freqai_config.get('live_full_backtestrange') +
self.freqai_config.get('identifier')))
str(self.freqai_config.get('identifier')))
self.data_path = Path(self.full_path / str("sub-train" + "-" +
metadata['pair'].split("/")[0] +
@ -114,11 +113,11 @@ class FreqaiDataKitchen:
save_path / str(self.model_filename + "_trained_df.pkl")
)
if self.live:
self.data_drawer.model_dictionary[self.model_filename] = model
self.data_drawer.pair_dict[coin]['model_filename'] = self.model_filename
self.data_drawer.pair_dict[coin]['data_path'] = str(self.data_path)
self.data_drawer.save_drawer_to_disk()
# if self.live:
self.data_drawer.model_dictionary[self.model_filename] = model
self.data_drawer.pair_dict[coin]['model_filename'] = self.model_filename
self.data_drawer.pair_dict[coin]['data_path'] = str(self.data_path)
self.data_drawer.save_drawer_to_disk()
# TODO add a helper function to let user save/load any data they are custom adding. We
# do not want them having to edit the default save/load methods here. Below is an example
@ -142,9 +141,9 @@ class FreqaiDataKitchen:
:model: User trained model which can be inferenced for new predictions
"""
if self.live:
self.model_filename = self.data_drawer.pair_dict[coin]['model_filename']
self.data_path = Path(self.data_drawer.pair_dict[coin]['data_path'])
# if self.live:
self.model_filename = self.data_drawer.pair_dict[coin]['model_filename']
self.data_path = Path(self.data_drawer.pair_dict[coin]['data_path'])
with open(self.data_path / str(self.model_filename + "_metadata.json"), "r") as fp:
self.data = json.load(fp)
@ -696,7 +695,7 @@ class FreqaiDataKitchen:
self.full_path = Path(
self.config["user_data_dir"]
/ "models"
/ str(full_timerange + self.freqai_config.get("identifier"))
/ str(self.freqai_config.get("identifier"))
)
config_path = Path(self.config["config_files"][0])
@ -750,10 +749,10 @@ class FreqaiDataKitchen:
str(int(trained_timerange.stopts))))
self.model_filename = "cb_" + coin.lower() + "_" + str(int(trained_timerange.stopts))
# this is not persistent at the moment TODO
self.freqai_config['live_trained_timerange'] = str(int(trained_timerange.stopts))
# self.freqai_config['live_trained_timerange'] = str(int(trained_timerange.stopts))
# enables persistence, but not fully implemented into save/load data yer
self.data['live_trained_timerange'] = str(int(trained_timerange.stopts))
# self.data['live_trained_timerange'] = str(int(trained_timerange.stopts))
def download_new_data_for_retraining(self, timerange: TimeRange, metadata: dict) -> None:

View File

@ -77,13 +77,13 @@ class IFreqaiModel(ABC):
"""
self.live = strategy.dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE)
self.data_drawer.set_pair_dict_info(metadata)
# For live, we may be training new models on a separate thread while other pairs still need
# to inference their historical models. Here we use a training queue system to handle this
# and we keep the flag self.training_on_separate_threaad in the current object to help
# determine what the current pair will do
if self.live:
self.data_drawer.set_pair_dict_info(metadata)
if (not self.training_on_separate_thread and
self.data_drawer.training_queue == 1):
@ -137,6 +137,7 @@ class IFreqaiModel(ABC):
for tr_train, tr_backtest in zip(
dh.training_timeranges, dh.backtesting_timeranges
):
(_, _, _) = self.data_drawer.get_pair_dict_info(metadata)
gc.collect()
dh.data = {} # clean the pair specific data between training window sliding
self.training_timerange = tr_train
@ -150,9 +151,12 @@ class IFreqaiModel(ABC):
if not self.model_exists(metadata["pair"], dh,
trained_timestamp=trained_timestamp.stopts):
self.model = self.train(dataframe_train, metadata, dh)
dh.save_data(self.model)
self.data_drawer.pair_dict[metadata['pair']][
'trained_timestamp'] = trained_timestamp.stopts
dh.set_new_model_names(metadata, trained_timestamp)
dh.save_data(self.model, metadata['pair'])
else:
self.model = dh.load_data()
self.model = dh.load_data(metadata['pair'])
# strategy_provided_features = self.dh.find_features(dataframe_train)
# # FIXME doesnt work with PCA
@ -295,8 +299,7 @@ class IFreqaiModel(ABC):
def set_full_path(self) -> None:
self.full_path = Path(self.config['user_data_dir'] /
"models" /
str(self.freqai_info.get('live_full_backtestrange') +
self.freqai_info.get('identifier')))
str(self.freqai_info.get('identifier')))
@threaded
def retrain_model_on_separate_thread(self, new_trained_timerange: TimeRange, metadata: dict,