From 60eb02bb62f5952695a69d9eef531d33d91727bf Mon Sep 17 00:00:00 2001
From: Emre <e@emre.pm>
Date: Sat, 10 Sep 2022 20:13:16 +0300
Subject: [PATCH] Add XGBoostClassifier

---
 .../prediction_models/XGBoostClassifier.py    | 85 +++++++++++++++++++
 tests/freqai/test_freqai_interface.py         | 30 +++++++
 2 files changed, 115 insertions(+)
 create mode 100644 freqtrade/freqai/prediction_models/XGBoostClassifier.py

diff --git a/freqtrade/freqai/prediction_models/XGBoostClassifier.py b/freqtrade/freqai/prediction_models/XGBoostClassifier.py
new file mode 100644
index 000000000..8bf5d6281
--- /dev/null
+++ b/freqtrade/freqai/prediction_models/XGBoostClassifier.py
@@ -0,0 +1,85 @@
+import logging
+from typing import Any, Dict, Tuple
+
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+from pandas import DataFrame
+from pandas.api.types import is_integer_dtype
+from sklearn.preprocessing import LabelEncoder
+from xgboost import XGBClassifier
+
+from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
+
+
+logger = logging.getLogger(__name__)
+
+
+class XGBoostClassifier(BaseClassifierModel):
+    """
+    User created prediction model. The class needs to override three necessary
+    functions, predict(), train(), fit(). The class inherits ModelHandler which
+    has its own DataHandler where data is held, saved, loaded, and managed.
+    """
+
+    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
+        """
+        User sets up the training and test data to fit their desired model here
+        :params:
+        :data_dictionary: the dictionary constructed by DataHandler to hold
+        all the training and test data/labels.
+        """
+
+        X = data_dictionary["train_features"].to_numpy()
+        y = data_dictionary["train_labels"].to_numpy()[:, 0]
+
+        le = LabelEncoder()
+        if not is_integer_dtype(y):
+            y = pd.Series(le.fit_transform(y), dtype="int64")
+
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
+            eval_set = None
+        else:
+            test_features = data_dictionary["test_features"].to_numpy()
+            test_labels = data_dictionary["test_labels"].to_numpy()[:, 0]
+
+            if not is_integer_dtype(test_labels):
+                test_labels = pd.Series(le.transform(test_labels), dtype="int64")
+
+            eval_set = [(test_features, test_labels)]
+
+        train_weights = data_dictionary["train_weights"]
+
+        init_model = self.get_init_model(dk.pair)
+
+        model = XGBClassifier(**self.model_training_parameters)
+
+        model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
+                  xgb_model=init_model)
+
+        return model
+
+    def predict(
+        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
+    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
+        """
+        Filter the prediction features data and predict with it.
+        :param: unfiltered_df: Full dataframe for the current backtest period.
+        :return:
+        :pred_df: dataframe containing the predictions
+        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
+        data (NaNs) or felt uncertain about data (PCA and DI index)
+        """
+
+        (pred_df, dk.do_predict) = super().predict(unfiltered_df, dk, **kwargs)
+
+        le = LabelEncoder()
+        label = dk.label_list[0]
+        labels_before = list(dk.data['labels_std'].keys())
+        labels_after = le.fit_transform(labels_before).tolist()
+        pred_df[label] = le.inverse_transform(pred_df[label])
+        pred_df = pred_df.rename(
+            columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))})
+
+        return (pred_df, dk.do_predict)
diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py
index 5f8eeb086..afcc4fd37 100644
--- a/tests/freqai/test_freqai_interface.py
+++ b/tests/freqai/test_freqai_interface.py
@@ -174,6 +174,36 @@ def test_extract_data_and_train_model_LightGBMClassifier(mocker, freqai_conf):
     shutil.rmtree(Path(freqai.dk.full_path))
 
 
+def test_extract_data_and_train_model_XGBoostClassifier(mocker, freqai_conf):
+    freqai_conf.update({"timerange": "20180110-20180130"})
+    freqai_conf.update({"freqaimodel": "XGBoostClassifier"})
+    freqai_conf.update({"strategy": "freqai_test_classifier"})
+    strategy = get_patched_freqai_strategy(mocker, freqai_conf)
+    exchange = get_patched_exchange(mocker, freqai_conf)
+    strategy.dp = DataProvider(freqai_conf, exchange)
+
+    strategy.freqai_info = freqai_conf.get("freqai", {})
+    freqai = strategy.freqai
+    freqai.live = True
+    freqai.dk = FreqaiDataKitchen(freqai_conf)
+
+    timerange = TimeRange.parse_timerange("20180110-20180130")
+    freqai.dd.load_all_pair_histories(timerange, freqai.dk)
+
+    freqai.dd.pair_dict = MagicMock()
+
+    data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
+    new_timerange = TimeRange.parse_timerange("20180120-20180130")
+
+    freqai.extract_data_and_train_model(new_timerange, "ADA/BTC",
+                                        strategy, freqai.dk, data_load_timerange)
+
+    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists()
+    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists()
+    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").exists()
+    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").exists()
+
+
 def test_extract_data_and_train_model_XGBoostRegressor(mocker, freqai_conf):
     freqai_conf.update({"timerange": "20180110-20180130"})
     freqai_conf.update({"freqaimodel": "XGBoostRegressor"})