mirror of
https://github.com/freqtrade/freqtrade.git
synced 2024-11-10 10:21:59 +00:00
cleanup, add clarity to comments and docstrings
This commit is contained in:
parent
58b5abbaa6
commit
35bed842cb
|
@ -21,18 +21,6 @@ from freqtrade.strategy.interface import IStrategy
|
|||
pd.options.mode.chained_assignment = None
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# FIXME: suppress stdout for background training?
|
||||
# class DummyFile(object):
|
||||
# def write(self, x): pass
|
||||
|
||||
|
||||
# @contextlib.contextmanager
|
||||
# def nostdout():
|
||||
# save_stdout = sys.stdout
|
||||
# sys.stdout = DummyFile()
|
||||
# yield
|
||||
# sys.stdout = save_stdout
|
||||
|
||||
|
||||
def threaded(fn):
|
||||
def wrapper(*args, **kwargs):
|
||||
|
@ -57,8 +45,6 @@ class IFreqaiModel(ABC):
|
|||
self.data_split_parameters = config["freqai"]["data_split_parameters"]
|
||||
self.model_training_parameters = config["freqai"]["model_training_parameters"]
|
||||
self.feature_parameters = config["freqai"]["feature_parameters"]
|
||||
# self.backtest_timerange = config["timerange"]
|
||||
|
||||
self.time_last_trained = None
|
||||
self.current_time = None
|
||||
self.model = None
|
||||
|
@ -66,12 +52,6 @@ class IFreqaiModel(ABC):
|
|||
self.training_on_separate_thread = False
|
||||
self.retrain = False
|
||||
self.first = True
|
||||
# if self.freqai_info.get('live_trained_timerange'):
|
||||
# self.new_trained_timerange = TimeRange.parse_timerange(
|
||||
# self.freqai_info['live_trained_timerange'])
|
||||
# else:
|
||||
# self.new_trained_timerange = TimeRange()
|
||||
|
||||
self.set_full_path()
|
||||
self.data_drawer = FreqaiDataDrawer(Path(self.full_path),
|
||||
self.config['exchange']['pair_whitelist'])
|
||||
|
@ -93,12 +73,7 @@ class IFreqaiModel(ABC):
|
|||
"""
|
||||
Entry point to the FreqaiModel from a specific pair, it will train a new model if
|
||||
necessary before making the prediction.
|
||||
The backtesting and training paradigm is a sliding training window
|
||||
with a following backtest window. Both windows slide according to the
|
||||
length of the backtest window. This function is not intended to be
|
||||
overridden by children of IFreqaiModel, but technically, it can be
|
||||
if the user wishes to make deeper changes to the sliding window
|
||||
logic.
|
||||
|
||||
:params:
|
||||
:dataframe: Full dataframe coming from strategy - it contains entire
|
||||
backtesting timerange + additional historical data necessary to train
|
||||
|
@ -108,10 +83,12 @@ class IFreqaiModel(ABC):
|
|||
|
||||
self.live = strategy.dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE)
|
||||
|
||||
# FreqaiDataKitchen is reinstantiated for each coin
|
||||
# For live, we may be training new models on a separate thread while other pairs still need
|
||||
# to inference their historical models. Here we use a training queue system to handle this
|
||||
# and we keep the flag self.training_on_separate_threaad in the current object to help
|
||||
# determine what the current pair will do
|
||||
if self.live:
|
||||
self.data_drawer.set_pair_dict_info(metadata)
|
||||
print('Current train queue:', self.data_drawer.training_queue)
|
||||
if (not self.training_on_separate_thread and
|
||||
self.data_drawer.training_queue == 1):
|
||||
|
||||
|
@ -124,13 +101,38 @@ class IFreqaiModel(ABC):
|
|||
self.live, metadata["pair"])
|
||||
dh = self.start_live(dataframe, metadata, strategy, self.dh_fg)
|
||||
|
||||
# return (dh.full_predictions, dh.full_do_predict,
|
||||
# dh.full_target_mean, dh.full_target_std)
|
||||
|
||||
# For backtesting, each pair enters and then gets trained for each window along the
|
||||
# sliding window defined by "train_period" (training window) and "backtest_period"
|
||||
# (backtest window, i.e. window immediately following the training window).
|
||||
# FreqAI slides the window and sequentially builds the backtesting results before returning
|
||||
# the concatenated results for the full backtesting period back to the strategy.
|
||||
else:
|
||||
self.dh = FreqaiDataKitchen(self.config, self.data_drawer, self.live, metadata["pair"])
|
||||
logger.info(f'Training {len(self.dh.training_timeranges)} timeranges')
|
||||
dh = self.start_backtesting(dataframe, metadata, self.dh)
|
||||
|
||||
return (dh.full_predictions, dh.full_do_predict,
|
||||
dh.full_target_mean, dh.full_target_std)
|
||||
|
||||
# Backtesting only
|
||||
self.dh = FreqaiDataKitchen(self.config, self.data_drawer, self.live, metadata["pair"])
|
||||
|
||||
logger.info(f'Training {len(self.dh.training_timeranges)} timeranges')
|
||||
def start_backtesting(self, dataframe: DataFrame, metadata: dict,
|
||||
dh: FreqaiDataKitchen) -> FreqaiDataKitchen:
|
||||
"""
|
||||
The main broad execution for backtesting. For backtesting, each pair enters and then gets
|
||||
trained for each window along the sliding window defined by "train_period" (training window)
|
||||
and "backtest_period" (backtest window, i.e. window immediately following the
|
||||
training window). FreqAI slides the window and sequentially builds the backtesting results
|
||||
before returning the concatenated results for the full backtesting period back to the
|
||||
strategy.
|
||||
:params:
|
||||
dataframe: DataFrame = strategy passed dataframe
|
||||
metadata: Dict = pair metadata
|
||||
dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
|
||||
:returns:
|
||||
dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
|
||||
"""
|
||||
|
||||
# Loop enforcing the sliding window training/backtesting paradigm
|
||||
# tr_train is the training time range e.g. 1 historical month
|
||||
|
@ -138,49 +140,54 @@ class IFreqaiModel(ABC):
|
|||
# following tr_train. Both of these windows slide through the
|
||||
# entire backtest
|
||||
for tr_train, tr_backtest in zip(
|
||||
self.dh.training_timeranges, self.dh.backtesting_timeranges
|
||||
dh.training_timeranges, dh.backtesting_timeranges
|
||||
):
|
||||
gc.collect()
|
||||
# self.config['timerange'] = tr_train
|
||||
self.dh.data = {} # clean the pair specific data between models
|
||||
dh.data = {} # clean the pair specific data between training window sliding
|
||||
self.training_timerange = tr_train
|
||||
dataframe_train = self.dh.slice_dataframe(tr_train, dataframe)
|
||||
dataframe_backtest = self.dh.slice_dataframe(tr_backtest, dataframe)
|
||||
dataframe_train = dh.slice_dataframe(tr_train, dataframe)
|
||||
dataframe_backtest = dh.slice_dataframe(tr_backtest, dataframe)
|
||||
logger.info("training %s for %s", metadata["pair"], tr_train)
|
||||
trained_timestamp = TimeRange.parse_timerange(tr_train)
|
||||
self.dh.data_path = Path(self.dh.full_path /
|
||||
dh.data_path = Path(dh.full_path /
|
||||
str("sub-train" + "-" + metadata['pair'].split("/")[0] +
|
||||
str(int(trained_timestamp.stopts))))
|
||||
if not self.model_exists(metadata["pair"], self.dh,
|
||||
if not self.model_exists(metadata["pair"], dh,
|
||||
trained_timestamp=trained_timestamp.stopts):
|
||||
self.model = self.train(dataframe_train, metadata, self.dh)
|
||||
self.dh.save_data(self.model)
|
||||
self.model = self.train(dataframe_train, metadata, dh)
|
||||
dh.save_data(self.model)
|
||||
else:
|
||||
self.model = self.dh.load_data()
|
||||
self.model = dh.load_data()
|
||||
|
||||
# strategy_provided_features = self.dh.find_features(dataframe_train)
|
||||
# # TOFIX doesnt work with PCA
|
||||
# # FIXME doesnt work with PCA
|
||||
# if strategy_provided_features != self.dh.training_features_list:
|
||||
# logger.info("User changed input features, retraining model.")
|
||||
# self.model = self.train(dataframe_train, metadata)
|
||||
# self.dh.save_data(self.model)
|
||||
|
||||
preds, do_preds = self.predict(dataframe_backtest, self.dh)
|
||||
preds, do_preds = self.predict(dataframe_backtest, dh)
|
||||
|
||||
self.dh.append_predictions(preds, do_preds, len(dataframe_backtest))
|
||||
print('predictions', len(self.dh.full_predictions),
|
||||
'do_predict', len(self.dh.full_do_predict))
|
||||
dh.append_predictions(preds, do_preds, len(dataframe_backtest))
|
||||
print('predictions', len(dh.full_predictions),
|
||||
'do_predict', len(dh.full_do_predict))
|
||||
|
||||
self.dh.fill_predictions(len(dataframe))
|
||||
dh.fill_predictions(len(dataframe))
|
||||
|
||||
return (self.dh.full_predictions, self.dh.full_do_predict,
|
||||
self.dh.full_target_mean, self.dh.full_target_std)
|
||||
return dh
|
||||
|
||||
def start_live(self, dataframe: DataFrame, metadata: dict,
|
||||
strategy: IStrategy, dh: FreqaiDataKitchen) -> FreqaiDataKitchen:
|
||||
"""
|
||||
The main broad execution for dry/live. This function will check if a retraining should be
|
||||
performed, and if so, retrain and reset the model.
|
||||
|
||||
:params:
|
||||
dataframe: DataFrame = strategy passed dataframe
|
||||
metadata: Dict = pair metadata
|
||||
strategy: IStrategy = currently employed strategy
|
||||
dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
|
||||
:returns:
|
||||
dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
|
||||
"""
|
||||
|
||||
(model_filename,
|
||||
|
@ -190,22 +197,16 @@ class IFreqaiModel(ABC):
|
|||
if not self.training_on_separate_thread:
|
||||
file_exists = False
|
||||
|
||||
if trained_timestamp != 0:
|
||||
if trained_timestamp != 0: # historical model available
|
||||
dh.set_paths(metadata, trained_timestamp)
|
||||
# data_drawer thinks the file eixts, verify here
|
||||
file_exists = self.model_exists(metadata['pair'],
|
||||
dh,
|
||||
trained_timestamp=trained_timestamp,
|
||||
model_filename=model_filename)
|
||||
|
||||
# if not self.training_on_separate_thread:
|
||||
# this will also prevent other pairs from trying to train simultaneously.
|
||||
(self.retrain,
|
||||
new_trained_timerange) = dh.check_if_new_training_required(trained_timestamp)
|
||||
dh.set_paths(metadata, new_trained_timerange.stopts)
|
||||
# if self.training_on_separate_thread:
|
||||
# logger.info("FreqAI training a new model on background thread.")
|
||||
# self.retrain = False
|
||||
|
||||
if self.retrain or not file_exists:
|
||||
if coin_first:
|
||||
|
@ -217,10 +218,10 @@ class IFreqaiModel(ABC):
|
|||
|
||||
else:
|
||||
logger.info("FreqAI training a new model on background thread.")
|
||||
self.data_drawer.pair_dict[metadata['pair']]['priority'] = 1
|
||||
|
||||
self.model = dh.load_data(coin=metadata['pair'])
|
||||
|
||||
# FIXME
|
||||
# strategy_provided_features = dh.find_features(dataframe)
|
||||
# if strategy_provided_features != dh.training_features_list:
|
||||
# self.train_model_in_series(new_trained_timerange, metadata, strategy)
|
||||
|
@ -240,17 +241,17 @@ class IFreqaiModel(ABC):
|
|||
if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'):
|
||||
dh.principal_component_analysis()
|
||||
|
||||
# if self.feature_parameters["determine_statistical_distributions"]:
|
||||
# dh.determine_statistical_distributions()
|
||||
# if self.feature_parameters["remove_outliers"]:
|
||||
# dh.remove_outliers(predict=False)
|
||||
|
||||
if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'):
|
||||
dh.use_SVM_to_remove_outliers(predict=False)
|
||||
|
||||
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold'):
|
||||
dh.data["avg_mean_dist"] = dh.compute_distances()
|
||||
|
||||
# if self.feature_parameters["determine_statistical_distributions"]:
|
||||
# dh.determine_statistical_distributions()
|
||||
# if self.feature_parameters["remove_outliers"]:
|
||||
# dh.remove_outliers(predict=False)
|
||||
|
||||
def data_cleaning_predict(self, dh: FreqaiDataKitchen) -> None:
|
||||
"""
|
||||
Base data cleaning method for predict.
|
||||
|
@ -265,16 +266,16 @@ class IFreqaiModel(ABC):
|
|||
if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'):
|
||||
dh.pca_transform()
|
||||
|
||||
# if self.feature_parameters["determine_statistical_distributions"]:
|
||||
# dh.determine_statistical_distributions()
|
||||
# if self.feature_parameters["remove_outliers"]:
|
||||
# dh.remove_outliers(predict=True) # creates dropped index
|
||||
|
||||
if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'):
|
||||
dh.use_SVM_to_remove_outliers(predict=True)
|
||||
|
||||
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold'):
|
||||
dh.check_if_pred_in_training_spaces() # sets do_predict
|
||||
dh.check_if_pred_in_training_spaces()
|
||||
|
||||
# if self.feature_parameters["determine_statistical_distributions"]:
|
||||
# dh.determine_statistical_distributions()
|
||||
# if self.feature_parameters["remove_outliers"]:
|
||||
# dh.remove_outliers(predict=True) # creates dropped index
|
||||
|
||||
def model_exists(self, pair: str, dh: FreqaiDataKitchen, trained_timestamp: int = None,
|
||||
model_filename: str = '') -> bool:
|
||||
|
@ -285,8 +286,6 @@ class IFreqaiModel(ABC):
|
|||
"""
|
||||
coin, _ = pair.split("/")
|
||||
|
||||
# if self.live and trained_timestamp == 0:
|
||||
# dh.model_filename = model_filename
|
||||
if not self.live:
|
||||
dh.model_filename = model_filename = "cb_" + coin.lower() + "_" + str(trained_timestamp)
|
||||
|
||||
|
@ -312,7 +311,6 @@ class IFreqaiModel(ABC):
|
|||
dh.download_new_data_for_retraining(new_trained_timerange, metadata)
|
||||
corr_dataframes, base_dataframes = dh.load_pairs_histories(new_trained_timerange,
|
||||
metadata)
|
||||
|
||||
unfiltered_dataframe = dh.use_strategy_to_populate_indicators(strategy,
|
||||
corr_dataframes,
|
||||
base_dataframes,
|
||||
|
@ -322,13 +320,8 @@ class IFreqaiModel(ABC):
|
|||
|
||||
self.data_drawer.pair_dict[metadata['pair']][
|
||||
'trained_timestamp'] = new_trained_timerange.stopts
|
||||
|
||||
dh.set_new_model_names(metadata, new_trained_timerange)
|
||||
|
||||
# send the pair to the end of the queue so other coins can take on the background thread
|
||||
# retraining
|
||||
self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
|
||||
|
||||
dh.save_data(self.model, coin=metadata['pair'])
|
||||
|
||||
self.training_on_separate_thread = False
|
||||
|
@ -350,11 +343,8 @@ class IFreqaiModel(ABC):
|
|||
|
||||
self.data_drawer.pair_dict[metadata['pair']][
|
||||
'trained_timestamp'] = new_trained_timerange.stopts
|
||||
|
||||
dh.set_new_model_names(metadata, new_trained_timerange)
|
||||
|
||||
self.data_drawer.pair_dict[metadata['pair']]['first'] = False
|
||||
|
||||
dh.save_data(self.model, coin=metadata['pair'])
|
||||
self.retrain = False
|
||||
|
||||
|
@ -380,7 +370,7 @@ class IFreqaiModel(ABC):
|
|||
can drop in LGBMRegressor in place of CatBoostRegressor and all data
|
||||
management will be properly handled by Freqai.
|
||||
:params:
|
||||
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
data_dictionary: Dict = the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
|
@ -391,11 +381,13 @@ class IFreqaiModel(ABC):
|
|||
dh: FreqaiDataKitchen) -> Tuple[npt.ArrayLike, npt.ArrayLike]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||
:param:
|
||||
unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||
dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
|
||||
:return:
|
||||
:predictions: np.array of predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||
data (NaNs) or felt uncertain about data (i.e. SVM and/or DI index)
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
|
@ -403,7 +395,8 @@ class IFreqaiModel(ABC):
|
|||
"""
|
||||
User defines the labels here (target values).
|
||||
:params:
|
||||
:dataframe: the full dataframe for the present training period
|
||||
dataframe: DataFrame = the full dataframe for the present training period
|
||||
dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
|
||||
"""
|
||||
|
||||
return
|
||||
|
|
Loading…
Reference in New Issue
Block a user