freqtrade_origin/freqtrade/freqai/prediction_models/XGBoostClassifier.py

89 lines
3.2 KiB
Python
Raw Normal View History

2022-09-10 17:13:16 +00:00
import logging
from typing import Any
2022-09-10 17:13:16 +00:00
import numpy as np
import numpy.typing as npt
import pandas as pd
from pandas import DataFrame
from pandas.api.types import is_integer_dtype
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class XGBoostClassifier(BaseClassifierModel):
"""
User created prediction model. The class inherits IFreqaiModel, which
means it has full access to all Frequency AI functionality. Typically,
users would use this to override the common `fit()`, `train()`, or
`predict()` methods to add their custom data handling tools or change
various aspects of the training that cannot be configured via the
top level config.json file.
2022-09-10 17:13:16 +00:00
"""
def fit(self, data_dictionary: dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
2022-09-10 17:13:16 +00:00
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
2022-09-10 17:13:16 +00:00
"""
X = data_dictionary["train_features"].to_numpy()
y = data_dictionary["train_labels"].to_numpy()[:, 0]
le = LabelEncoder()
if not is_integer_dtype(y):
y = pd.Series(le.fit_transform(y), dtype="int64")
2024-05-12 15:12:20 +00:00
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
2022-09-10 17:13:16 +00:00
eval_set = None
else:
test_features = data_dictionary["test_features"].to_numpy()
test_labels = data_dictionary["test_labels"].to_numpy()[:, 0]
if not is_integer_dtype(test_labels):
test_labels = pd.Series(le.transform(test_labels), dtype="int64")
eval_set = [(test_features, test_labels)]
train_weights = data_dictionary["train_weights"]
init_model = self.get_init_model(dk.pair)
model = XGBClassifier(**self.model_training_parameters)
2024-05-12 15:12:20 +00:00
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, xgb_model=init_model)
2022-09-10 17:13:16 +00:00
return model
def predict(
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
) -> tuple[DataFrame, npt.NDArray[np.int_]]:
2022-09-10 17:13:16 +00:00
"""
Filter the prediction features data and predict with it.
2022-10-10 12:15:30 +00:00
:param unfiltered_df: Full dataframe for the current backtest period.
2022-09-10 17:13:16 +00:00
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
(pred_df, dk.do_predict) = super().predict(unfiltered_df, dk, **kwargs)
le = LabelEncoder()
label = dk.label_list[0]
2024-05-12 15:12:20 +00:00
labels_before = list(dk.data["labels_std"].keys())
2022-09-10 17:13:16 +00:00
labels_after = le.fit_transform(labels_before).tolist()
pred_df[label] = le.inverse_transform(pred_df[label])
pred_df = pred_df.rename(
2024-05-12 15:12:20 +00:00
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))}
)
2022-09-10 17:13:16 +00:00
return (pred_df, dk.do_predict)