From f3d46613ee501888defb30926cd1f1b3a610e93b Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 23 Jul 2022 17:14:11 +0200 Subject: [PATCH] move prediction denormalization into datakitchen. remove duplicate associated code. avoid normalization/denormalization for string dtypes. --- freqtrade/freqai/data_kitchen.py | 37 +++++++++++++++---- freqtrade/freqai/freqai_interface.py | 8 +--- .../prediction_models/BaseRegressionModel.py | 7 +--- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 1c54a6375..3899c82df 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -356,7 +356,7 @@ class FreqaiDataKitchen: return self.data_dictionary - def normalize_data(self, data_dictionary: Dict, do_labels: bool = True) -> Dict[Any, Any]: + def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]: """ Normalize all data in the data_dictionary according to the training dataset :params: @@ -378,24 +378,26 @@ class FreqaiDataKitchen: self.data[item + "_max"] = train_max[item] self.data[item + "_min"] = train_min[item] - if do_labels: - train_labels_max = data_dictionary["train_labels"].max() - train_labels_min = data_dictionary["train_labels"].min() + for item in data_dictionary["train_labels"].keys(): + if data_dictionary["train_labels"][item].dtype == str: + continue + train_labels_max = data_dictionary["train_labels"][item].max() + train_labels_min = data_dictionary["train_labels"][item].min() data_dictionary["train_labels"] = ( 2 - * (data_dictionary["train_labels"] - train_labels_min) + * (data_dictionary["train_labels"][item] - train_labels_min) / (train_labels_max - train_labels_min) - 1 ) - data_dictionary["test_labels"] = ( + data_dictionary["test_labels"][item] = ( 2 * (data_dictionary["test_labels"] - train_labels_min) / (train_labels_max - train_labels_min) - 1 ) - self.data["labels_max"] = train_labels_max.to_dict() - self.data["labels_min"] = train_labels_min.to_dict() + self.data[f"{item}_max"] = train_labels_max # .to_dict() + self.data[f"{item}_min"] = train_labels_min # .to_dict() return data_dictionary @@ -417,6 +419,25 @@ class FreqaiDataKitchen: return df + def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame: + """ + Normalize a set of data using the mean and standard deviation from + the associated training data. + :params: + :df: Dataframe of predictions to be denormalized + """ + + for label in self.label_list: + if df[label].dtype == str: + continue + df[label] = ( + (df[label] + 1) + * (self.data[f"{label}_max"] - self.data[f"{label}_min"]) + / 2 + ) + self.data[f"{label}_min"] + + return df + def split_timerange( self, tr: str, train_split: int = 28, bt_split: int = 7 ) -> Tuple[list, list]: diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 5fa15ebf8..ac8cf6e60 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -501,12 +501,8 @@ class IFreqaiModel(ABC): ) -> None: trained_predictions = model.predict(df) pred_df = DataFrame(trained_predictions, columns=dk.label_list) - for label in dk.label_list: - pred_df[label] = ( - (pred_df[label] + 1) - * (dk.data["labels_max"][label] - dk.data["labels_min"][label]) - / 2 - ) + dk.data["labels_min"][label] + + pred_df = dk.denormalize_labels_from_metadata(pred_df) self.dd.historic_predictions[pair] = pd.DataFrame() self.dd.historic_predictions[pair] = copy.deepcopy(pred_df) diff --git a/freqtrade/freqai/prediction_models/BaseRegressionModel.py b/freqtrade/freqai/prediction_models/BaseRegressionModel.py index ffe30ef2a..2654b3726 100644 --- a/freqtrade/freqai/prediction_models/BaseRegressionModel.py +++ b/freqtrade/freqai/prediction_models/BaseRegressionModel.py @@ -107,11 +107,6 @@ class BaseRegressionModel(IFreqaiModel): predictions = self.model.predict(dk.data_dictionary["prediction_features"]) pred_df = DataFrame(predictions, columns=dk.label_list) - for label in dk.label_list: - pred_df[label] = ( - (pred_df[label] + 1) - * (dk.data["labels_max"][label] - dk.data["labels_min"][label]) - / 2 - ) + dk.data["labels_min"][label] + pred_df = dk.denormalize_labels_from_metadata(pred_df) return (pred_df, dk.do_predict)