Merge pull request #8580 from freqtrade/feat/add-transformer

Add transformer to FreqAI
This commit is contained in:
Robert Caulk 2023-05-07 11:32:38 +02:00 committed by GitHub
commit 950eaf230e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 337 additions and 26 deletions

View File

@ -395,3 +395,21 @@ Here we create a `PyTorchMLPRegressor` class that implements the `fit` method. T
return dataframe
```
To see a full example, you can refer to the [classifier test strategy class](https://github.com/freqtrade/freqtrade/blob/develop/tests/strategy/strats/freqai_test_classifier.py).
#### Improving performance with `torch.compile()`
Torch provides a `torch.compile()` method that can be used to improve performance for specific GPU hardware. More details can be found [here](https://pytorch.org/tutorials/intermediate/torch_compile_tutorial.html). In brief, you simply wrap your `model` in `torch.compile()`:
```python
model = PyTorchMLPModel(
input_dim=n_features,
output_dim=1,
**self.model_kwargs
)
model.to(self.device)
model = torch.compile(model)
```
Then proceed to use the model as normal. Keep in mind that doing this will remove eager execution, which means errors and tracebacks will not be informative.

View File

@ -114,5 +114,5 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
|------------|-------------|
| | **Extraneous parameters**
| `freqai.keras` | If the selected model makes use of Keras (typical for TensorFlow-based prediction models), this flag needs to be activated so that the model save/loading follows Keras standards. <br> **Datatype:** Boolean. <br> Default: `False`.
| `freqai.conv_width` | The width of a convolutional neural network input tensor. This replaces the need for shifting candles (`include_shifted_candles`) by feeding in historical data points as the second dimension of the tensor. Technically, this parameter can also be used for regressors, but it only adds computational overhead and does not change the model training/prediction. <br> **Datatype:** Integer. <br> Default: `2`.
| `freqai.conv_width` | The width of a neural network input tensor. This replaces the need for shifting candles (`include_shifted_candles`) by feeding in historical data points as the second dimension of the tensor. Technically, this parameter can also be used for regressors, but it only adds computational overhead and does not change the model training/prediction. <br> **Datatype:** Integer. <br> Default: `2`.
| `freqai.reduce_df_footprint` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage and decreasing train/inference timing. This parameter is set in the main level of the Freqtrade configuration file (not inside FreqAI). <br> **Datatype:** Boolean. <br> Default: `False`.

View File

@ -75,6 +75,7 @@ class BasePyTorchClassifier(BasePyTorchModel):
dk.data_dictionary["prediction_features"],
device=self.device
)
self.model.model.eval()
logits = self.model.model(x)
probs = F.softmax(logits, dim=-1)
predicted_classes = torch.argmax(probs, dim=-1)

View File

@ -27,6 +27,7 @@ class BasePyTorchModel(IFreqaiModel, ABC):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
test_size = self.freqai_info.get('data_split_parameters', {}).get('test_size')
self.splits = ["train", "test"] if test_size != 0 else ["train"]
self.window_size = self.freqai_info.get("conv_width", 1)
def train(
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs

View File

@ -44,6 +44,7 @@ class BasePyTorchRegressor(BasePyTorchModel):
dk.data_dictionary["prediction_features"],
device=self.device
)
self.model.model.eval()
y = self.model.model(x)
pred_df = DataFrame(y.detach().tolist(), columns=[dk.label_list[0]])
return (pred_df, dk.do_predict)

View File

@ -80,6 +80,7 @@ class IFreqaiModel(ABC):
if self.keras and self.ft_params.get("DI_threshold", 0):
self.ft_params["DI_threshold"] = 0
logger.warning("DI threshold is not configured for Keras models yet. Deactivating.")
self.CONV_WIDTH = self.freqai_info.get('conv_width', 1)
if self.ft_params.get("inlier_metric_window", 0):
self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2

View File

@ -0,0 +1,139 @@
from typing import Any, Dict, Tuple
import numpy as np
import numpy.typing as npt
import pandas as pd
import torch
from freqtrade.freqai.base_models.BasePyTorchRegressor import BasePyTorchRegressor
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.torch.PyTorchDataConvertor import (DefaultPyTorchDataConvertor,
PyTorchDataConvertor)
from freqtrade.freqai.torch.PyTorchModelTrainer import PyTorchTransformerTrainer
from freqtrade.freqai.torch.PyTorchTransformerModel import PyTorchTransformerModel
class PyTorchTransformerRegressor(BasePyTorchRegressor):
"""
This class implements the fit method of IFreqaiModel.
in the fit method we initialize the model and trainer objects.
the only requirement from the model is to be aligned to PyTorchRegressor
predict method that expects the model to predict tensor of type float.
the trainer defines the training loop.
parameters are passed via `model_training_parameters` under the freqai
section in the config file. e.g:
{
...
"freqai": {
...
"model_training_parameters" : {
"learning_rate": 3e-4,
"trainer_kwargs": {
"max_iters": 5000,
"batch_size": 64,
"max_n_eval_batches": null,
"window_size": 10
},
"model_kwargs": {
"hidden_dim": 512,
"dropout_percent": 0.2,
"n_layer": 1,
},
}
}
}
"""
@property
def data_convertor(self) -> PyTorchDataConvertor:
return DefaultPyTorchDataConvertor(target_tensor_type=torch.float)
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
config = self.freqai_info.get("model_training_parameters", {})
self.learning_rate: float = config.get("learning_rate", 3e-4)
self.model_kwargs: Dict[str, Any] = config.get("model_kwargs", {})
self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs", {})
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary holding all data for train, test,
labels, weights
:param dk: The datakitchen object for the current coin/model
"""
n_features = data_dictionary["train_features"].shape[-1]
n_labels = data_dictionary["train_labels"].shape[-1]
model = PyTorchTransformerModel(
input_dim=n_features,
output_dim=n_labels,
time_window=self.window_size,
**self.model_kwargs
)
model.to(self.device)
optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
criterion = torch.nn.MSELoss()
init_model = self.get_init_model(dk.pair)
trainer = PyTorchTransformerTrainer(
model=model,
optimizer=optimizer,
criterion=criterion,
device=self.device,
init_model=init_model,
data_convertor=self.data_convertor,
window_size=self.window_size,
**self.trainer_kwargs,
)
trainer.fit(data_dictionary, self.splits)
return trainer
def predict(
self, unfiltered_df: pd.DataFrame, dk: FreqaiDataKitchen, **kwargs
) -> Tuple[pd.DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
dk.find_features(unfiltered_df)
filtered_df, _ = dk.filter_features(
unfiltered_df, dk.training_features_list, training_filter=False
)
filtered_df = dk.normalize_data_from_metadata(filtered_df)
dk.data_dictionary["prediction_features"] = filtered_df
self.data_cleaning_predict(dk)
x = self.data_convertor.convert_x(
dk.data_dictionary["prediction_features"],
device=self.device
)
# if user is asking for multiple predictions, slide the window
# along the tensor
x = x.unsqueeze(0)
# create empty torch tensor
self.model.model.eval()
yb = torch.empty(0)
if x.shape[1] > 1:
ws = self.window_size
for i in range(0, x.shape[1] - ws):
xb = x[:, i:i + ws, :]
y = self.model.model(xb)
yb = torch.cat((yb, y), dim=0)
else:
yb = self.model.model(x)
yb = yb.cpu().squeeze()
pred_df = pd.DataFrame(yb.detach().numpy(), columns=dk.label_list)
pred_df = dk.denormalize_labels_from_metadata(pred_df)
if x.shape[1] > 1:
zeros_df = pd.DataFrame(np.zeros((x.shape[1] - len(pred_df), len(pred_df.columns))),
columns=pred_df.columns)
pred_df = pd.concat([zeros_df, pred_df], axis=0, ignore_index=True)
return (pred_df, dk.do_predict)

View File

@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import List, Optional
from typing import Optional
import pandas as pd
import torch
@ -12,14 +12,14 @@ class PyTorchDataConvertor(ABC):
"""
@abstractmethod
def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]:
def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> torch.Tensor:
"""
:param df: "*_features" dataframe.
:param device: The device to use for training (e.g. 'cpu', 'cuda').
"""
@abstractmethod
def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]:
def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> torch.Tensor:
"""
:param df: "*_labels" dataframe.
:param device: The device to use for training (e.g. 'cpu', 'cuda').
@ -45,14 +45,14 @@ class DefaultPyTorchDataConvertor(PyTorchDataConvertor):
self._target_tensor_type = target_tensor_type
self._squeeze_target_tensor = squeeze_target_tensor
def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]:
def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> torch.Tensor:
x = torch.from_numpy(df.values).float()
if device:
x = x.to(device)
return [x]
return x
def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]:
def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> torch.Tensor:
y = torch.from_numpy(df.values)
if self._target_tensor_type:
@ -64,4 +64,4 @@ class DefaultPyTorchDataConvertor(PyTorchDataConvertor):
if device:
y = y.to(device)
return [y]
return y

View File

@ -1,5 +1,4 @@
import logging
from typing import List
import torch
from torch import nn
@ -47,8 +46,8 @@ class PyTorchMLPModel(nn.Module):
self.relu = nn.ReLU()
self.dropout = nn.Dropout(p=dropout_percent)
def forward(self, tensors: List[torch.Tensor]) -> torch.Tensor:
x: torch.Tensor = tensors[0]
def forward(self, x: torch.Tensor) -> torch.Tensor:
# x: torch.Tensor = tensors[0]
x = self.relu(self.input_layer(x))
x = self.dropout(x)
x = self.blocks(x)

View File

@ -12,6 +12,8 @@ from torch.utils.data import DataLoader, TensorDataset
from freqtrade.freqai.torch.PyTorchDataConvertor import PyTorchDataConvertor
from freqtrade.freqai.torch.PyTorchTrainerInterface import PyTorchTrainerInterface
from .datasets import WindowDataset
logger = logging.getLogger(__name__)
@ -26,6 +28,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
init_model: Dict,
data_convertor: PyTorchDataConvertor,
model_meta_data: Dict[str, Any] = {},
window_size: int = 1,
**kwargs
):
"""
@ -52,6 +55,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
self.batch_size: int = kwargs.get("batch_size", 64)
self.max_n_eval_batches: Optional[int] = kwargs.get("max_n_eval_batches", None)
self.data_convertor = data_convertor
self.window_size: int = window_size
if init_model:
self.load_from_checkpoint(init_model)
@ -75,16 +79,15 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
batch_size=self.batch_size,
n_iters=self.max_iters
)
self.model.train()
for epoch in range(1, epochs + 1):
# training
losses = []
for i, batch_data in enumerate(data_loaders_dictionary["train"]):
for tensor in batch_data:
tensor.to(self.device)
xb = batch_data[:-1]
yb = batch_data[-1]
xb, yb = batch_data
xb.to(self.device)
yb.to(self.device)
yb_pred = self.model(xb)
loss = self.criterion(yb_pred, yb)
@ -120,12 +123,10 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
if max_n_eval_batches and i > max_n_eval_batches:
n_batches += 1
break
xb, yb = batch_data
xb.to(self.device)
yb.to(self.device)
for tensor in batch_data:
tensor.to(self.device)
xb = batch_data[:-1]
yb = batch_data[-1]
yb_pred = self.model(xb)
loss = self.criterion(yb_pred, yb)
losses.append(loss.item())
@ -145,7 +146,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
for split in splits:
x = self.data_convertor.convert_x(data_dictionary[f"{split}_features"], self.device)
y = self.data_convertor.convert_y(data_dictionary[f"{split}_labels"], self.device)
dataset = TensorDataset(*x, *y)
dataset = TensorDataset(x, y)
data_loader = DataLoader(
dataset,
batch_size=self.batch_size,
@ -206,3 +207,33 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
self.model_meta_data = checkpoint["model_meta_data"]
return self
class PyTorchTransformerTrainer(PyTorchModelTrainer):
"""
Creating a trainer for the Transformer model.
"""
def create_data_loaders_dictionary(
self,
data_dictionary: Dict[str, pd.DataFrame],
splits: List[str]
) -> Dict[str, DataLoader]:
"""
Converts the input data to PyTorch tensors using a data loader.
"""
data_loader_dictionary = {}
for split in splits:
x = self.data_convertor.convert_x(data_dictionary[f"{split}_features"], self.device)
y = self.data_convertor.convert_y(data_dictionary[f"{split}_labels"], self.device)
dataset = WindowDataset(x, y, self.window_size)
data_loader = DataLoader(
dataset,
batch_size=self.batch_size,
shuffle=False,
drop_last=True,
num_workers=0,
)
data_loader_dictionary[split] = data_loader
return data_loader_dictionary

View File

@ -0,0 +1,93 @@
import math
import torch
import torch.nn as nn
"""
The architecture is based on the paper Attention Is All You Need.
Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
Lukasz Kaiser, and Illia Polosukhin. 2017.
"""
class PyTorchTransformerModel(nn.Module):
"""
A transformer approach to time series modeling using positional encoding.
The architecture is based on the paper Attention Is All You Need.
Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
Lukasz Kaiser, and Illia Polosukhin. 2017.
"""
def __init__(self, input_dim: int = 7, output_dim: int = 7, hidden_dim=1024,
n_layer=2, dropout_percent=0.1, time_window=10, nhead=8):
super().__init__()
self.time_window = time_window
# ensure the input dimension to the transformer is divisible by nhead
self.dim_val = input_dim - (input_dim % nhead)
self.input_net = nn.Sequential(
nn.Dropout(dropout_percent), nn.Linear(input_dim, self.dim_val)
)
# Encode the timeseries with Positional encoding
self.positional_encoding = PositionalEncoding(d_model=self.dim_val, max_len=self.dim_val)
# Define the encoder block of the Transformer
self.encoder_layer = nn.TransformerEncoderLayer(
d_model=self.dim_val, nhead=nhead, dropout=dropout_percent, batch_first=True)
self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=n_layer)
# the pseudo decoding FC
self.output_net = nn.Sequential(
nn.Linear(self.dim_val * time_window, int(hidden_dim)),
nn.ReLU(),
nn.Dropout(dropout_percent),
nn.Linear(int(hidden_dim), int(hidden_dim / 2)),
nn.ReLU(),
nn.Dropout(dropout_percent),
nn.Linear(int(hidden_dim / 2), int(hidden_dim / 4)),
nn.ReLU(),
nn.Dropout(dropout_percent),
nn.Linear(int(hidden_dim / 4), output_dim)
)
def forward(self, x, mask=None, add_positional_encoding=True):
"""
Args:
x: Input features of shape [Batch, SeqLen, input_dim]
mask: Mask to apply on the attention outputs (optional)
add_positional_encoding: If True, we add the positional encoding to the input.
Might not be desired for some tasks.
"""
x = self.input_net(x)
if add_positional_encoding:
x = self.positional_encoding(x)
x = self.transformer(x, mask=mask)
x = x.reshape(-1, 1, self.time_window * x.shape[-1])
x = self.output_net(x)
return x
class PositionalEncoding(torch.nn.Module):
def __init__(self, d_model, max_len=5000):
"""
Args
d_model: Hidden dimensionality of the input.
max_len: Maximum length of a sequence to expect.
"""
super().__init__()
# Create matrix of [SeqLen, HiddenDim] representing the positional encoding
# for max_len inputs
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0)
self.register_buffer("pe", pe, persistent=False)
def forward(self, x):
x = x + self.pe[:, : x.size(1)]
return x

View File

@ -0,0 +1,19 @@
import torch
class WindowDataset(torch.utils.data.Dataset):
def __init__(self, xs, ys, window_size):
self.xs = xs
self.ys = ys
self.window_size = window_size
def __len__(self):
return len(self.xs) - self.window_size
def __getitem__(self, index):
idx_rev = len(self.xs) - self.window_size - index - 1
window_x = self.xs[idx_rev:idx_rev + self.window_size, :]
# Beware of indexing, these two window_x and window_y are aimed at the same row!
# this is what happens when you use :
window_y = self.ys[idx_rev + self.window_size - 1, :].unsqueeze(0)
return window_x, window_y

View File

@ -2,7 +2,7 @@
-r requirements-freqai.txt
# Required for freqai-rl
torch==1.13.1; python_version < '3.11'
torch==2.0.0
#until these branches will be released we can use this
gymnasium==0.28.1
stable_baselines3==2.0.0a5

View File

@ -50,7 +50,8 @@ def can_run_model(model: str) -> None:
('XGBoostRegressor', False, True, False, True, False, 10),
('XGBoostRFRegressor', False, False, False, True, False, 0),
('CatboostRegressor', False, False, False, True, True, 0),
('PyTorchMLPRegressor', False, False, False, True, False, 0),
('PyTorchMLPRegressor', False, False, False, False, False, 0),
('PyTorchTransformerRegressor', False, False, False, False, False, 0),
('ReinforcementLearner', False, True, False, True, False, 0),
('ReinforcementLearner_multiproc', False, False, False, True, False, 0),
('ReinforcementLearner_test_3ac', False, False, False, False, False, 0),
@ -82,10 +83,13 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
freqai_conf["freqaimodel_path"] = str(Path(__file__).parents[1] / "freqai" / "test_models")
freqai_conf["freqai"]["rl_config"]["drop_ohlc_from_features"] = True
if 'PyTorchMLPRegressor' in model:
if 'PyTorch' in model:
model_save_ext = 'zip'
pytorch_mlp_mtp = mock_pytorch_mlp_model_training_parameters()
freqai_conf['freqai']['model_training_parameters'].update(pytorch_mlp_mtp)
if 'Transformer' in model:
# transformer model takes a window, unlike the MLP regressor
freqai_conf.update({"conv_width": 10})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
@ -228,6 +232,7 @@ def test_extract_data_and_train_model_Classifiers(mocker, freqai_conf, model):
("XGBoostRegressor", 2, "freqai_test_strat"),
("CatboostRegressor", 2, "freqai_test_strat"),
("PyTorchMLPRegressor", 2, "freqai_test_strat"),
("PyTorchTransformerRegressor", 2, "freqai_test_strat"),
("ReinforcementLearner", 3, "freqai_rl_test_strat"),
("XGBoostClassifier", 2, "freqai_test_classifier"),
("LightGBMClassifier", 2, "freqai_test_classifier"),
@ -253,9 +258,12 @@ def test_start_backtesting(mocker, freqai_conf, model, num_files, strat, caplog)
if 'test_4ac' in model:
freqai_conf["freqaimodel_path"] = str(Path(__file__).parents[1] / "freqai" / "test_models")
if 'PyTorchMLP' in model:
if 'PyTorch' in model:
pytorch_mlp_mtp = mock_pytorch_mlp_model_training_parameters()
freqai_conf['freqai']['model_training_parameters'].update(pytorch_mlp_mtp)
if 'Transformer' in model:
# transformer model takes a window, unlike the MLP regressor
freqai_conf.update({"conv_width": 10})
freqai_conf.get("freqai", {}).get("feature_parameters", {}).update(
{"indicator_periods_candles": [2]})