From af139ffbab1a99927fd62cb970c862516566a59f Mon Sep 17 00:00:00 2001 From: robcaulk Date: Mon, 1 May 2023 13:18:03 +0000 Subject: [PATCH 1/3] add transformer with positional encoding, fix some odds and ends in pytorch, upgrade to PyTorch 2.0 --- docs/freqai-parameter-table.md | 2 +- .../base_models/BasePyTorchClassifier.py | 1 + .../freqai/base_models/BasePyTorchModel.py | 1 + .../base_models/BasePyTorchRegressor.py | 1 + freqtrade/freqai/freqai_interface.py | 1 + .../PyTorchTransformerRegressor.py | 139 ++++++++++++++++++ .../freqai/torch/PyTorchDataConvertor.py | 14 +- freqtrade/freqai/torch/PyTorchMLPModel.py | 5 +- freqtrade/freqai/torch/PyTorchModelTrainer.py | 53 +++++-- .../freqai/torch/PyTorchTransformerModel.py | 91 ++++++++++++ freqtrade/freqai/torch/datasets.py | 19 +++ requirements-freqai-rl.txt | 2 +- tests/freqai/test_freqai_interface.py | 14 +- 13 files changed, 317 insertions(+), 26 deletions(-) create mode 100644 freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py create mode 100644 freqtrade/freqai/torch/PyTorchTransformerModel.py create mode 100644 freqtrade/freqai/torch/datasets.py diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md index 1487b92c2..76c175304 100644 --- a/docs/freqai-parameter-table.md +++ b/docs/freqai-parameter-table.md @@ -114,5 +114,5 @@ Mandatory parameters are marked as **Required** and have to be set in one of the |------------|-------------| | | **Extraneous parameters** | `freqai.keras` | If the selected model makes use of Keras (typical for TensorFlow-based prediction models), this flag needs to be activated so that the model save/loading follows Keras standards.
**Datatype:** Boolean.
Default: `False`. -| `freqai.conv_width` | The width of a convolutional neural network input tensor. This replaces the need for shifting candles (`include_shifted_candles`) by feeding in historical data points as the second dimension of the tensor. Technically, this parameter can also be used for regressors, but it only adds computational overhead and does not change the model training/prediction.
**Datatype:** Integer.
Default: `2`. +| `freqai.conv_width` | The width of a neural network input tensor. This replaces the need for shifting candles (`include_shifted_candles`) by feeding in historical data points as the second dimension of the tensor. Technically, this parameter can also be used for regressors, but it only adds computational overhead and does not change the model training/prediction.
**Datatype:** Integer.
Default: `2`. | `freqai.reduce_df_footprint` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage and decreasing train/inference timing. This parameter is set in the main level of the Freqtrade configuration file (not inside FreqAI).
**Datatype:** Boolean.
Default: `False`. diff --git a/freqtrade/freqai/base_models/BasePyTorchClassifier.py b/freqtrade/freqai/base_models/BasePyTorchClassifier.py index 977152cc5..1f54e7609 100644 --- a/freqtrade/freqai/base_models/BasePyTorchClassifier.py +++ b/freqtrade/freqai/base_models/BasePyTorchClassifier.py @@ -74,6 +74,7 @@ class BasePyTorchClassifier(BasePyTorchModel): dk.data_dictionary["prediction_features"], device=self.device ) + self.model.model.eval() logits = self.model.model(x) probs = F.softmax(logits, dim=-1) predicted_classes = torch.argmax(probs, dim=-1) diff --git a/freqtrade/freqai/base_models/BasePyTorchModel.py b/freqtrade/freqai/base_models/BasePyTorchModel.py index 8177b8eb8..82042d24c 100644 --- a/freqtrade/freqai/base_models/BasePyTorchModel.py +++ b/freqtrade/freqai/base_models/BasePyTorchModel.py @@ -27,6 +27,7 @@ class BasePyTorchModel(IFreqaiModel, ABC): self.device = "cuda" if torch.cuda.is_available() else "cpu" test_size = self.freqai_info.get('data_split_parameters', {}).get('test_size') self.splits = ["train", "test"] if test_size != 0 else ["train"] + self.window_size = self.freqai_info.get("conv_width", 1) def train( self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs diff --git a/freqtrade/freqai/base_models/BasePyTorchRegressor.py b/freqtrade/freqai/base_models/BasePyTorchRegressor.py index ea6fabe49..d5a550f58 100644 --- a/freqtrade/freqai/base_models/BasePyTorchRegressor.py +++ b/freqtrade/freqai/base_models/BasePyTorchRegressor.py @@ -44,6 +44,7 @@ class BasePyTorchRegressor(BasePyTorchModel): dk.data_dictionary["prediction_features"], device=self.device ) + self.model.model.eval() y = self.model.model(x) y = y.cpu() pred_df = DataFrame(y.detach().numpy(), columns=[dk.label_list[0]]) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 3580963d4..6815e421c 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -80,6 +80,7 @@ class IFreqaiModel(ABC): if self.keras and self.ft_params.get("DI_threshold", 0): self.ft_params["DI_threshold"] = 0 logger.warning("DI threshold is not configured for Keras models yet. Deactivating.") + self.CONV_WIDTH = self.freqai_info.get('conv_width', 1) if self.ft_params.get("inlier_metric_window", 0): self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2 diff --git a/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py b/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py new file mode 100644 index 000000000..e760f6e68 --- /dev/null +++ b/freqtrade/freqai/prediction_models/PyTorchTransformerRegressor.py @@ -0,0 +1,139 @@ +from typing import Any, Dict, Tuple + +import numpy as np +import numpy.typing as npt +import pandas as pd +import torch + +from freqtrade.freqai.base_models.BasePyTorchRegressor import BasePyTorchRegressor +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.torch.PyTorchDataConvertor import (DefaultPyTorchDataConvertor, + PyTorchDataConvertor) +from freqtrade.freqai.torch.PyTorchModelTrainer import PyTorchTransformerTrainer +from freqtrade.freqai.torch.PyTorchTransformerModel import PyTorchTransformerModel + + +class PyTorchTransformerRegressor(BasePyTorchRegressor): + """ + This class implements the fit method of IFreqaiModel. + in the fit method we initialize the model and trainer objects. + the only requirement from the model is to be aligned to PyTorchRegressor + predict method that expects the model to predict tensor of type float. + the trainer defines the training loop. + + parameters are passed via `model_training_parameters` under the freqai + section in the config file. e.g: + { + ... + "freqai": { + ... + "model_training_parameters" : { + "learning_rate": 3e-4, + "trainer_kwargs": { + "max_iters": 5000, + "batch_size": 64, + "max_n_eval_batches": null, + "window_size": 10 + }, + "model_kwargs": { + "hidden_dim": 512, + "dropout_percent": 0.2, + "n_layer": 1, + }, + } + } + } + """ + + @property + def data_convertor(self) -> PyTorchDataConvertor: + return DefaultPyTorchDataConvertor(target_tensor_type=torch.float) + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + config = self.freqai_info.get("model_training_parameters", {}) + self.learning_rate: float = config.get("learning_rate", 3e-4) + self.model_kwargs: Dict[str, Any] = config.get("model_kwargs", {}) + self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs", {}) + + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: + """ + User sets up the training and test data to fit their desired model here + :param data_dictionary: the dictionary holding all data for train, test, + labels, weights + :param dk: The datakitchen object for the current coin/model + """ + + n_features = data_dictionary["train_features"].shape[-1] + n_labels = data_dictionary["train_labels"].shape[-1] + model = PyTorchTransformerModel( + input_dim=n_features, + output_dim=n_labels, + time_window=self.window_size, + **self.model_kwargs + ) + model.to(self.device) + optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate) + criterion = torch.nn.MSELoss() + init_model = self.get_init_model(dk.pair) + trainer = PyTorchTransformerTrainer( + model=model, + optimizer=optimizer, + criterion=criterion, + device=self.device, + init_model=init_model, + data_convertor=self.data_convertor, + window_size=self.window_size, + **self.trainer_kwargs, + ) + trainer.fit(data_dictionary, self.splits) + return trainer + + def predict( + self, unfiltered_df: pd.DataFrame, dk: FreqaiDataKitchen, **kwargs + ) -> Tuple[pd.DataFrame, npt.NDArray[np.int_]]: + """ + Filter the prediction features data and predict with it. + :param unfiltered_df: Full dataframe for the current backtest period. + :return: + :pred_df: dataframe containing the predictions + :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove + data (NaNs) or felt uncertain about data (PCA and DI index) + """ + + dk.find_features(unfiltered_df) + filtered_df, _ = dk.filter_features( + unfiltered_df, dk.training_features_list, training_filter=False + ) + filtered_df = dk.normalize_data_from_metadata(filtered_df) + dk.data_dictionary["prediction_features"] = filtered_df + + self.data_cleaning_predict(dk) + x = self.data_convertor.convert_x( + dk.data_dictionary["prediction_features"], + device=self.device + ) + # if user is asking for multiple predictions, slide the window + # along the tensor + x = x.unsqueeze(0) + # create empty torch tensor + self.model.model.eval() + yb = torch.empty(0) + if x.shape[1] > 1: + ws = self.window_size + for i in range(0, x.shape[1] - ws): + xb = x[:, i:i + ws, :] + y = self.model.model(xb) + yb = torch.cat((yb, y), dim=0) + else: + yb = self.model.model(x) + + yb = yb.cpu().squeeze() + pred_df = pd.DataFrame(yb.detach().numpy(), columns=dk.label_list) + pred_df = dk.denormalize_labels_from_metadata(pred_df) + + if x.shape[1] > 1: + zeros_df = pd.DataFrame(np.zeros((x.shape[1] - len(pred_df), len(pred_df.columns))), + columns=pred_df.columns) + pred_df = pd.concat([zeros_df, pred_df], axis=0, ignore_index=True) + return (pred_df, dk.do_predict) diff --git a/freqtrade/freqai/torch/PyTorchDataConvertor.py b/freqtrade/freqai/torch/PyTorchDataConvertor.py index a31ccdc79..e6b815373 100644 --- a/freqtrade/freqai/torch/PyTorchDataConvertor.py +++ b/freqtrade/freqai/torch/PyTorchDataConvertor.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import List, Optional +from typing import Optional import pandas as pd import torch @@ -12,14 +12,14 @@ class PyTorchDataConvertor(ABC): """ @abstractmethod - def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]: + def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> torch.Tensor: """ :param df: "*_features" dataframe. :param device: The device to use for training (e.g. 'cpu', 'cuda'). """ @abstractmethod - def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]: + def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> torch.Tensor: """ :param df: "*_labels" dataframe. :param device: The device to use for training (e.g. 'cpu', 'cuda'). @@ -45,14 +45,14 @@ class DefaultPyTorchDataConvertor(PyTorchDataConvertor): self._target_tensor_type = target_tensor_type self._squeeze_target_tensor = squeeze_target_tensor - def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]: + def convert_x(self, df: pd.DataFrame, device: Optional[str] = None) -> torch.Tensor: x = torch.from_numpy(df.values).float() if device: x = x.to(device) - return [x] + return x - def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> List[torch.Tensor]: + def convert_y(self, df: pd.DataFrame, device: Optional[str] = None) -> torch.Tensor: y = torch.from_numpy(df.values) if self._target_tensor_type: @@ -64,4 +64,4 @@ class DefaultPyTorchDataConvertor(PyTorchDataConvertor): if device: y = y.to(device) - return [y] + return y diff --git a/freqtrade/freqai/torch/PyTorchMLPModel.py b/freqtrade/freqai/torch/PyTorchMLPModel.py index 62d3216df..0093388f8 100644 --- a/freqtrade/freqai/torch/PyTorchMLPModel.py +++ b/freqtrade/freqai/torch/PyTorchMLPModel.py @@ -1,5 +1,4 @@ import logging -from typing import List import torch from torch import nn @@ -47,8 +46,8 @@ class PyTorchMLPModel(nn.Module): self.relu = nn.ReLU() self.dropout = nn.Dropout(p=dropout_percent) - def forward(self, tensors: List[torch.Tensor]) -> torch.Tensor: - x: torch.Tensor = tensors[0] + def forward(self, x: torch.Tensor) -> torch.Tensor: + # x: torch.Tensor = tensors[0] x = self.relu(self.input_layer(x)) x = self.dropout(x) x = self.blocks(x) diff --git a/freqtrade/freqai/torch/PyTorchModelTrainer.py b/freqtrade/freqai/torch/PyTorchModelTrainer.py index 8277ba937..a3b0d9b9c 100644 --- a/freqtrade/freqai/torch/PyTorchModelTrainer.py +++ b/freqtrade/freqai/torch/PyTorchModelTrainer.py @@ -12,6 +12,8 @@ from torch.utils.data import DataLoader, TensorDataset from freqtrade.freqai.torch.PyTorchDataConvertor import PyTorchDataConvertor from freqtrade.freqai.torch.PyTorchTrainerInterface import PyTorchTrainerInterface +from .datasets import WindowDataset + logger = logging.getLogger(__name__) @@ -26,6 +28,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): init_model: Dict, data_convertor: PyTorchDataConvertor, model_meta_data: Dict[str, Any] = {}, + window_size: int = 1, **kwargs ): """ @@ -52,6 +55,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): self.batch_size: int = kwargs.get("batch_size", 64) self.max_n_eval_batches: Optional[int] = kwargs.get("max_n_eval_batches", None) self.data_convertor = data_convertor + self.window_size: int = window_size if init_model: self.load_from_checkpoint(init_model) @@ -75,16 +79,15 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): batch_size=self.batch_size, n_iters=self.max_iters ) + self.model.train() for epoch in range(1, epochs + 1): # training losses = [] for i, batch_data in enumerate(data_loaders_dictionary["train"]): - for tensor in batch_data: - tensor.to(self.device) - - xb = batch_data[:-1] - yb = batch_data[-1] + xb, yb = batch_data + xb.to(self.device) + yb.to(self.device) yb_pred = self.model(xb) loss = self.criterion(yb_pred, yb) @@ -120,12 +123,10 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): if max_n_eval_batches and i > max_n_eval_batches: n_batches += 1 break + xb, yb = batch_data + xb.to(self.device) + yb.to(self.device) - for tensor in batch_data: - tensor.to(self.device) - - xb = batch_data[:-1] - yb = batch_data[-1] yb_pred = self.model(xb) loss = self.criterion(yb_pred, yb) losses.append(loss.item()) @@ -145,7 +146,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): for split in splits: x = self.data_convertor.convert_x(data_dictionary[f"{split}_features"], self.device) y = self.data_convertor.convert_y(data_dictionary[f"{split}_labels"], self.device) - dataset = TensorDataset(*x, *y) + dataset = TensorDataset(x, y) data_loader = DataLoader( dataset, batch_size=self.batch_size, @@ -206,3 +207,33 @@ class PyTorchModelTrainer(PyTorchTrainerInterface): self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) self.model_meta_data = checkpoint["model_meta_data"] return self + + +class PyTorchTransformerTrainer(PyTorchModelTrainer): + """ + Creating a trainer for the Transformer model. + """ + + def create_data_loaders_dictionary( + self, + data_dictionary: Dict[str, pd.DataFrame], + splits: List[str] + ) -> Dict[str, DataLoader]: + """ + Converts the input data to PyTorch tensors using a data loader. + """ + data_loader_dictionary = {} + for split in splits: + x = self.data_convertor.convert_x(data_dictionary[f"{split}_features"], self.device) + y = self.data_convertor.convert_y(data_dictionary[f"{split}_labels"], self.device) + dataset = WindowDataset(x, y, self.window_size) + data_loader = DataLoader( + dataset, + batch_size=self.batch_size, + shuffle=False, + drop_last=True, + num_workers=0, + ) + data_loader_dictionary[split] = data_loader + + return data_loader_dictionary diff --git a/freqtrade/freqai/torch/PyTorchTransformerModel.py b/freqtrade/freqai/torch/PyTorchTransformerModel.py new file mode 100644 index 000000000..0a252112a --- /dev/null +++ b/freqtrade/freqai/torch/PyTorchTransformerModel.py @@ -0,0 +1,91 @@ +import math + +import torch +import torch.nn as nn + + +""" +The architecture is based on the paper “Attention Is All You Need”. +Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, +Lukasz Kaiser, and Illia Polosukhin. 2017. +""" + + +class PyTorchTransformerModel(nn.Module): + """ + A transformer approach to time series modeling using positional encoding. + The architecture is based on the paper “Attention Is All You Need”. + Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez, + Lukasz Kaiser, and Illia Polosukhin. 2017. + """ + + def __init__(self, input_dim: int = 7, output_dim: int = 7, hidden_dim=1024, + n_layer=2, dropout_percent=0.1, time_window=10): + super().__init__() + self.time_window = time_window + self.input_net = nn.Sequential( + nn.Dropout(dropout_percent), nn.Linear(input_dim, hidden_dim) + ) + + # Encode the timeseries with Positional encoding + self.positional_encoding = PositionalEncoding(d_model=hidden_dim, max_len=hidden_dim) + + # Define the encoder block of the Transformer + self.encoder_layer = nn.TransformerEncoderLayer( + d_model=hidden_dim, nhead=8, dropout=dropout_percent, batch_first=True) + self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=n_layer) + + # Pseudo decoder + self.output_net = nn.Sequential( + nn.Linear(hidden_dim, hidden_dim), + nn.LayerNorm(hidden_dim), + nn.Tanh(), + nn.Dropout(dropout_percent), + ) + + self.output_layer = nn.Sequential( + nn.Linear(hidden_dim * time_window, output_dim), + nn.Tanh() + ) + + def forward(self, x, mask=None, add_positional_encoding=True): + """ + Args: + x: Input features of shape [Batch, SeqLen, input_dim] + mask: Mask to apply on the attention outputs (optional) + add_positional_encoding: If True, we add the positional encoding to the input. + Might not be desired for some tasks. + """ + x = self.input_net(x) + if add_positional_encoding: + x = self.positional_encoding(x) + x = self.transformer(x, mask=mask) + x = self.output_net(x) + x = x.reshape(-1, 1, self.time_window * x.shape[-1]) + x = self.output_layer(x) + return x + + +class PositionalEncoding(torch.nn.Module): + def __init__(self, d_model, max_len=5000): + """ + Args + d_model: Hidden dimensionality of the input. + max_len: Maximum length of a sequence to expect. + """ + super().__init__() + + # Create matrix of [SeqLen, HiddenDim] representing the positional encoding + # for max_len inputs + pe = torch.zeros(max_len, d_model) + position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) + div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) + pe[:, 0::2] = torch.sin(position * div_term) + pe[:, 1::2] = torch.cos(position * div_term) + pe = pe.unsqueeze(0) + + self.register_buffer("pe", pe, persistent=False) + + def forward(self, x): + x = x + self.pe[:, : x.size(1)] + return x diff --git a/freqtrade/freqai/torch/datasets.py b/freqtrade/freqai/torch/datasets.py new file mode 100644 index 000000000..120d8a116 --- /dev/null +++ b/freqtrade/freqai/torch/datasets.py @@ -0,0 +1,19 @@ +import torch + + +class WindowDataset(torch.utils.data.Dataset): + def __init__(self, xs, ys, window_size): + self.xs = xs + self.ys = ys + self.window_size = window_size + + def __len__(self): + return len(self.xs) - self.window_size + + def __getitem__(self, index): + idx_rev = len(self.xs) - self.window_size - index - 1 + window_x = self.xs[idx_rev:idx_rev + self.window_size, :] + # Beware of indexing, these two window_x and window_y are aimed at the same row! + # this is what happens when you use : + window_y = self.ys[idx_rev + self.window_size - 1, :].unsqueeze(0) + return window_x, window_y diff --git a/requirements-freqai-rl.txt b/requirements-freqai-rl.txt index 45ccc40cc..525c25229 100644 --- a/requirements-freqai-rl.txt +++ b/requirements-freqai-rl.txt @@ -2,7 +2,7 @@ -r requirements-freqai.txt # Required for freqai-rl -torch==1.13.1; python_version < '3.11' +torch==2.0.0; python_version < '3.11' #until these branches will be released we can use this gymnasium==0.28.1 stable_baselines3==2.0.0a5 diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 7346191db..ed0910089 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -50,7 +50,8 @@ def can_run_model(model: str) -> None: ('XGBoostRegressor', False, True, False, True, False, 10), ('XGBoostRFRegressor', False, False, False, True, False, 0), ('CatboostRegressor', False, False, False, True, True, 0), - ('PyTorchMLPRegressor', False, False, False, True, False, 0), + ('PyTorchMLPRegressor', False, False, False, False, False, 0), + ('PyTorchTransformerRegressor', False, False, False, False, False, 0), ('ReinforcementLearner', False, True, False, True, False, 0), ('ReinforcementLearner_multiproc', False, False, False, True, False, 0), ('ReinforcementLearner_test_3ac', False, False, False, False, False, 0), @@ -82,10 +83,13 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, freqai_conf["freqaimodel_path"] = str(Path(__file__).parents[1] / "freqai" / "test_models") freqai_conf["freqai"]["rl_config"]["drop_ohlc_from_features"] = True - if 'PyTorchMLPRegressor' in model: + if 'PyTorch' in model: model_save_ext = 'zip' pytorch_mlp_mtp = mock_pytorch_mlp_model_training_parameters() freqai_conf['freqai']['model_training_parameters'].update(pytorch_mlp_mtp) + if 'Transformer' in model: + # transformer model takes a window, unlike the MLP regressor + freqai_conf.update({"conv_width": 10}) strategy = get_patched_freqai_strategy(mocker, freqai_conf) exchange = get_patched_exchange(mocker, freqai_conf) @@ -228,6 +232,7 @@ def test_extract_data_and_train_model_Classifiers(mocker, freqai_conf, model): ("XGBoostRegressor", 2, "freqai_test_strat"), ("CatboostRegressor", 2, "freqai_test_strat"), ("PyTorchMLPRegressor", 2, "freqai_test_strat"), + ("PyTorchTransformerRegressor", 2, "freqai_test_strat"), ("ReinforcementLearner", 3, "freqai_rl_test_strat"), ("XGBoostClassifier", 2, "freqai_test_classifier"), ("LightGBMClassifier", 2, "freqai_test_classifier"), @@ -253,9 +258,12 @@ def test_start_backtesting(mocker, freqai_conf, model, num_files, strat, caplog) if 'test_4ac' in model: freqai_conf["freqaimodel_path"] = str(Path(__file__).parents[1] / "freqai" / "test_models") - if 'PyTorchMLP' in model: + if 'PyTorch' in model: pytorch_mlp_mtp = mock_pytorch_mlp_model_training_parameters() freqai_conf['freqai']['model_training_parameters'].update(pytorch_mlp_mtp) + if 'Transformer' in model: + # transformer model takes a window, unlike the MLP regressor + freqai_conf.update({"conv_width": 10}) freqai_conf.get("freqai", {}).get("feature_parameters", {}).update( {"indicator_periods_candles": [2]}) From 3bbb7e38ead75996f5ba1bf23098067f57ed313b Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 6 May 2023 16:12:10 +0000 Subject: [PATCH 2/3] improve transformer architecture, remove 3.10 install constraint, add documentation for torch.compile() --- docs/freqai-configuration.md | 18 +++++++++++ .../freqai/torch/PyTorchTransformerModel.py | 32 ++++++++++--------- requirements-freqai-rl.txt | 2 +- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/docs/freqai-configuration.md b/docs/freqai-configuration.md index e7aca20be..ad7cafd3d 100644 --- a/docs/freqai-configuration.md +++ b/docs/freqai-configuration.md @@ -395,3 +395,21 @@ Here we create a `PyTorchMLPRegressor` class that implements the `fit` method. T return dataframe ``` To see a full example, you can refer to the [classifier test strategy class](https://github.com/freqtrade/freqtrade/blob/develop/tests/strategy/strats/freqai_test_classifier.py). + + +#### Improving performance with `torch.compile()` + +Torch provides a `torch.compile()` method that can be used to improve performance for specific GPU hardware. More details can be found [here](https://pytorch.org/tutorials/intermediate/torch_compile_tutorial.html). In brief, you simply wrap your `model` in `torch.compile()`: + + +```python + model = PyTorchMLPModel( + input_dim=n_features, + output_dim=1, + **self.model_kwargs + ) + model.to(self.device) + model = torch.compile(model) +``` + +Then proceed to use the model as normal. Keep in mind that doing this will remove eager execution, which means errors and tracebacks will not be informative. diff --git a/freqtrade/freqai/torch/PyTorchTransformerModel.py b/freqtrade/freqai/torch/PyTorchTransformerModel.py index 0a252112a..2ab3ea434 100644 --- a/freqtrade/freqai/torch/PyTorchTransformerModel.py +++ b/freqtrade/freqai/torch/PyTorchTransformerModel.py @@ -20,32 +20,35 @@ class PyTorchTransformerModel(nn.Module): """ def __init__(self, input_dim: int = 7, output_dim: int = 7, hidden_dim=1024, - n_layer=2, dropout_percent=0.1, time_window=10): + n_layer=2, dropout_percent=0.1, time_window=10, nhead=8): super().__init__() self.time_window = time_window + # ensure the input dimension to the transformer is divisible by nhead + self.dim_val = input_dim - (input_dim % nhead) self.input_net = nn.Sequential( - nn.Dropout(dropout_percent), nn.Linear(input_dim, hidden_dim) + nn.Dropout(dropout_percent), nn.Linear(input_dim, self.dim_val) ) # Encode the timeseries with Positional encoding - self.positional_encoding = PositionalEncoding(d_model=hidden_dim, max_len=hidden_dim) + self.positional_encoding = PositionalEncoding(d_model=self.dim_val, max_len=self.dim_val) # Define the encoder block of the Transformer self.encoder_layer = nn.TransformerEncoderLayer( - d_model=hidden_dim, nhead=8, dropout=dropout_percent, batch_first=True) + d_model=self.dim_val, nhead=nhead, dropout=dropout_percent, batch_first=True) self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=n_layer) - # Pseudo decoder + # the pseudo decoding FC self.output_net = nn.Sequential( - nn.Linear(hidden_dim, hidden_dim), - nn.LayerNorm(hidden_dim), - nn.Tanh(), + nn.Linear(hidden_dim * time_window, int(hidden_dim)), + nn.ReLU(), nn.Dropout(dropout_percent), - ) - - self.output_layer = nn.Sequential( - nn.Linear(hidden_dim * time_window, output_dim), - nn.Tanh() + nn.Linear(int(hidden_dim), int(hidden_dim / 2)), + nn.ReLU(), + nn.Dropout(dropout_percent), + nn.Linear(int(hidden_dim / 2), int(hidden_dim / 4)), + nn.ReLU(), + nn.Dropout(dropout_percent), + nn.Linear(int(hidden_dim / 4), output_dim) ) def forward(self, x, mask=None, add_positional_encoding=True): @@ -60,9 +63,8 @@ class PyTorchTransformerModel(nn.Module): if add_positional_encoding: x = self.positional_encoding(x) x = self.transformer(x, mask=mask) - x = self.output_net(x) x = x.reshape(-1, 1, self.time_window * x.shape[-1]) - x = self.output_layer(x) + x = self.output_net(x) return x diff --git a/requirements-freqai-rl.txt b/requirements-freqai-rl.txt index 525c25229..6b9c1c298 100644 --- a/requirements-freqai-rl.txt +++ b/requirements-freqai-rl.txt @@ -2,7 +2,7 @@ -r requirements-freqai.txt # Required for freqai-rl -torch==2.0.0; python_version < '3.11' +torch==2.0.0 #until these branches will be released we can use this gymnasium==0.28.1 stable_baselines3==2.0.0a5 From 36e1e58dad0c63a41cf97792cea248942d590ca4 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 6 May 2023 17:40:04 +0000 Subject: [PATCH 3/3] fix arch --- freqtrade/freqai/torch/PyTorchTransformerModel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freqtrade/freqai/torch/PyTorchTransformerModel.py b/freqtrade/freqai/torch/PyTorchTransformerModel.py index 2ab3ea434..702a7a08b 100644 --- a/freqtrade/freqai/torch/PyTorchTransformerModel.py +++ b/freqtrade/freqai/torch/PyTorchTransformerModel.py @@ -39,7 +39,7 @@ class PyTorchTransformerModel(nn.Module): # the pseudo decoding FC self.output_net = nn.Sequential( - nn.Linear(hidden_dim * time_window, int(hidden_dim)), + nn.Linear(self.dim_val * time_window, int(hidden_dim)), nn.ReLU(), nn.Dropout(dropout_percent), nn.Linear(int(hidden_dim), int(hidden_dim / 2)),