From c5a16e91fbb3052d01e4311ee72f739ba97fbf51 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 28 May 2022 11:11:41 +0200 Subject: [PATCH] throw user error if user tries to load models but feeds the wrong features (while using PCA) --- docs/freqai.md | 2 ++ freqtrade/freqai/data_kitchen.py | 8 +++++++- freqtrade/freqai/freqai_interface.py | 10 +++++++--- .../prediction_models/CatboostPredictionModel.py | 5 +++-- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/docs/freqai.md b/docs/freqai.md index 78e25a234..57ff8f897 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -313,6 +313,8 @@ $$ W_i = \exp(\frac{-i}{\alpha*n}) $$ where $W_i$ is the weight of data point $i$ in a total set of $n$ data points._ +![weight-factor](assets/weights_factor.png) + Finally, `period` defines the offset used for the `labels`. In the present example, the user is asking for `labels` that are 24 candles in the future. diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 93e7b74ad..58b14b9f1 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -477,6 +477,11 @@ class FreqaiDataKitchen: index=self.data_dictionary["train_features"].index, ) + # keeping a copy of the non-transformed features so we can check for errors during + # model load from disk + self.data['training_features_list_raw'] = copy.deepcopy(self.training_features_list) + self.training_features_list = self.data_dictionary["train_features"].columns + self.data_dictionary["test_features"] = pd.DataFrame( data=test_components, columns=["PC" + str(i) for i in range(0, n_keep_components)], @@ -563,7 +568,8 @@ class FreqaiDataKitchen: def find_features(self, dataframe: DataFrame) -> list: column_names = dataframe.columns features = [c for c in column_names if '%' in c] - assert features, ("Could not find any features!") + if not features: + raise OperationalException("Could not find any features!") return features def check_if_pred_in_training_spaces(self) -> None: diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 68d21ecdc..ab2d37753 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -225,7 +225,11 @@ class IFreqaiModel(ABC): def check_if_feature_list_matches_strategy(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> None: strategy_provided_features = dh.find_features(dataframe) - if strategy_provided_features != dh.training_features_list: + if dh.data['training_features_list_raw']: + feature_list = dh.data['training_features_list_raw'] + else: + feature_list = dh.training_features_list + if strategy_provided_features != feature_list: raise OperationalException("Trying to access pretrained model with `identifier` " "but found different features furnished by current strategy." "Change `identifer` to train from scratch, or ensure the" @@ -254,7 +258,7 @@ class IFreqaiModel(ABC): # if self.feature_parameters["remove_outliers"]: # dh.remove_outliers(predict=False) - def data_cleaning_predict(self, dh: FreqaiDataKitchen) -> None: + def data_cleaning_predict(self, dh: FreqaiDataKitchen, dataframe: DataFrame) -> None: """ Base data cleaning method for predict. These functions each modify dh.do_predict, which is a dataframe with equal length @@ -266,7 +270,7 @@ class IFreqaiModel(ABC): for buy signals. """ if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'): - dh.pca_transform() + dh.pca_transform(dataframe) if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'): dh.use_SVM_to_remove_outliers(predict=True) diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py index 3f70400d8..5147faf0c 100644 --- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py +++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py @@ -71,7 +71,8 @@ class CatboostPredictionModel(IFreqaiModel): # optional additional data cleaning/analysis self.data_cleaning_train(dh) - logger.info(f'Training model on {len(dh.training_features_list)} features') + logger.info(f'Training model on {len(dh.data_dictionary["train_features"].columns)}' + 'features') logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') model = self.fit(data_dictionary) @@ -129,7 +130,7 @@ class CatboostPredictionModel(IFreqaiModel): dh.data_dictionary["prediction_features"] = filtered_dataframe # optional additional data cleaning/analysis - self.data_cleaning_predict(dh) + self.data_cleaning_predict(dh, filtered_dataframe) predictions = self.model.predict(dh.data_dictionary["prediction_features"])