freqtrade_origin/freqtrade/freqai/prediction_models/XGBoostClassifier.py

import logging
from typing import Any, Dict, Tuple

import numpy as np
import numpy.typing as npt
import pandas as pd
from pandas import DataFrame
from pandas.api.types import is_integer_dtype
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier

from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen


logger = logging.getLogger(__name__)


class XGBoostClassifier(BaseClassifierModel):
    """
    User created prediction model. The class inherits IFreqaiModel, which
    means it has full access to all Frequency AI functionality. Typically,
    users would use this to override the common `fit()`, `train()`, or
    `predict()` methods to add their custom data handling tools or change
    various aspects of the training that cannot be configured via the
    top level config.json file.
    """

    def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
        """
        User sets up the training and test data to fit their desired model here
        :param data_dictionary: the dictionary holding all data for train, test,
            labels, weights
        :param dk: The datakitchen object for the current coin/model
        """

        X = data_dictionary["train_features"].to_numpy()
        y = data_dictionary["train_labels"].to_numpy()[:, 0]

        le = LabelEncoder()
        if not is_integer_dtype(y):
            y = pd.Series(le.fit_transform(y), dtype="int64")

        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
            eval_set = None
        else:
            test_features = data_dictionary["test_features"].to_numpy()
            test_labels = data_dictionary["test_labels"].to_numpy()[:, 0]

            if not is_integer_dtype(test_labels):
                test_labels = pd.Series(le.transform(test_labels), dtype="int64")

            eval_set = [(test_features, test_labels)]

        train_weights = data_dictionary["train_weights"]

        init_model = self.get_init_model(dk.pair)

        model = XGBClassifier(**self.model_training_parameters)

        model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
                  xgb_model=init_model)

        return model

    def predict(
        self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
        """
        Filter the prediction features data and predict with it.
        :param unfiltered_df: Full dataframe for the current backtest period.
        :return:
        :pred_df: dataframe containing the predictions
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
        data (NaNs) or felt uncertain about data (PCA and DI index)
        """

        (pred_df, dk.do_predict) = super().predict(unfiltered_df, dk, **kwargs)

        le = LabelEncoder()
        label = dk.label_list[0]
        labels_before = list(dk.data['labels_std'].keys())
        labels_after = le.fit_transform(labels_before).tolist()
        pred_df[label] = le.inverse_transform(pred_df[label])
        pred_df = pred_df.rename(
            columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))})

        return (pred_df, dk.do_predict)
Add XGBoostClassifier 2022-09-10 17:13:16 +00:00			`import logging`
			`from typing import Any, Dict, Tuple`

			`import numpy as np`
			`import numpy.typing as npt`
			`import pandas as pd`
			`from pandas import DataFrame`
			`from pandas.api.types import is_integer_dtype`
			`from sklearn.preprocessing import LabelEncoder`
			`from xgboost import XGBClassifier`

			`from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel`
			`from freqtrade.freqai.data_kitchen import FreqaiDataKitchen`


			`logger = logging.getLogger(__name__)`


			`class XGBoostClassifier(BaseClassifierModel):`
			`"""`
fix model loading from disk bug, improve doc, clarify installation/docker instructions, add a torch tag to the freqairl docker image. Fix seriously outdated prediction_model docstrings 2023-04-08 10:09:53 +00:00			`User created prediction model. The class inherits IFreqaiModel, which`
			`means it has full access to all Frequency AI functionality. Typically,`
			users would use this to override the common `fit()`, `train()`, or
			`predict()` methods to add their custom data handling tools or change
			`various aspects of the training that cannot be configured via the`
			`top level config.json file.`
Add XGBoostClassifier 2022-09-10 17:13:16 +00:00			`"""`

			`def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:`
			`"""`
			`User sets up the training and test data to fit their desired model here`
fix model loading from disk bug, improve doc, clarify installation/docker instructions, add a torch tag to the freqairl docker image. Fix seriously outdated prediction_model docstrings 2023-04-08 10:09:53 +00:00			`:param data_dictionary: the dictionary holding all data for train, test,`
			`labels, weights`
			`:param dk: The datakitchen object for the current coin/model`
Add XGBoostClassifier 2022-09-10 17:13:16 +00:00			`"""`

			`X = data_dictionary["train_features"].to_numpy()`
			`y = data_dictionary["train_labels"].to_numpy()[:, 0]`

			`le = LabelEncoder()`
			`if not is_integer_dtype(y):`
			`y = pd.Series(le.fit_transform(y), dtype="int64")`

			`if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:`
			`eval_set = None`
			`else:`
			`test_features = data_dictionary["test_features"].to_numpy()`
			`test_labels = data_dictionary["test_labels"].to_numpy()[:, 0]`

			`if not is_integer_dtype(test_labels):`
			`test_labels = pd.Series(le.transform(test_labels), dtype="int64")`

			`eval_set = [(test_features, test_labels)]`

			`train_weights = data_dictionary["train_weights"]`

			`init_model = self.get_init_model(dk.pair)`

			`model = XGBClassifier(**self.model_training_parameters)`

			`model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,`
			`xgb_model=init_model)`

			`return model`

			`def predict(`
			`self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs`
			`) -> Tuple[DataFrame, npt.NDArray[np.int_]]:`
			`"""`
			`Filter the prediction features data and predict with it.`
Update docstring format 2022-10-10 12:15:30 +00:00			`:param unfiltered_df: Full dataframe for the current backtest period.`
Add XGBoostClassifier 2022-09-10 17:13:16 +00:00			`:return:`
			`:pred_df: dataframe containing the predictions`
			`:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove`
			`data (NaNs) or felt uncertain about data (PCA and DI index)`
			`"""`

			`(pred_df, dk.do_predict) = super().predict(unfiltered_df, dk, **kwargs)`

			`le = LabelEncoder()`
			`label = dk.label_list[0]`
			`labels_before = list(dk.data['labels_std'].keys())`
			`labels_after = le.fit_transform(labels_before).tolist()`
			`pred_df[label] = le.inverse_transform(pred_df[label])`
			`pred_df = pred_df.rename(`
			`columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))})`

			`return (pred_df, dk.do_predict)`