freqtrade_origin/freqtrade/freqai/torch/PyTorchTransformerModel.py

import math

import torch
from torch import nn


"""
The architecture is based on the paper “Attention Is All You Need”.
Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
Lukasz Kaiser, and Illia Polosukhin. 2017.
"""


class PyTorchTransformerModel(nn.Module):
    """
    A transformer approach to time series modeling using positional encoding.
    The architecture is based on the paper “Attention Is All You Need”.
    Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
    Lukasz Kaiser, and Illia Polosukhin. 2017.
    """

    def __init__(self, input_dim: int = 7, output_dim: int = 7, hidden_dim=1024,
                 n_layer=2, dropout_percent=0.1, time_window=10, nhead=8):
        super().__init__()
        self.time_window = time_window
        # ensure the input dimension to the transformer is divisible by nhead
        self.dim_val = input_dim - (input_dim % nhead)
        self.input_net = nn.Sequential(
            nn.Dropout(dropout_percent), nn.Linear(input_dim, self.dim_val)
        )

        # Encode the timeseries with Positional encoding
        self.positional_encoding = PositionalEncoding(d_model=self.dim_val, max_len=self.dim_val)

        # Define the encoder block of the Transformer
        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=self.dim_val, nhead=nhead, dropout=dropout_percent, batch_first=True)
        self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=n_layer)

        # the pseudo decoding FC
        self.output_net = nn.Sequential(
            nn.Linear(self.dim_val * time_window, int(hidden_dim)),
            nn.ReLU(),
            nn.Dropout(dropout_percent),
            nn.Linear(int(hidden_dim), int(hidden_dim / 2)),
            nn.ReLU(),
            nn.Dropout(dropout_percent),
            nn.Linear(int(hidden_dim / 2), int(hidden_dim / 4)),
            nn.ReLU(),
            nn.Dropout(dropout_percent),
            nn.Linear(int(hidden_dim / 4), output_dim)
        )

    def forward(self, x, mask=None, add_positional_encoding=True):
        """
        Args:
            x: Input features of shape [Batch, SeqLen, input_dim]
            mask: Mask to apply on the attention outputs (optional)
            add_positional_encoding: If True, we add the positional encoding to the input.
                                      Might not be desired for some tasks.
        """
        x = self.input_net(x)
        if add_positional_encoding:
            x = self.positional_encoding(x)
        x = self.transformer(x, mask=mask)
        x = x.reshape(-1, 1, self.time_window * x.shape[-1])
        x = self.output_net(x)
        return x


class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        """
        Args
            d_model: Hidden dimensionality of the input.
            max_len: Maximum length of a sequence to expect.
        """
        super().__init__()

        # Create matrix of [SeqLen, HiddenDim] representing the positional encoding
        # for max_len inputs
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)

        self.register_buffer("pe", pe, persistent=False)

    def forward(self, x):
        x = x + self.pe[:, : x.size(1)]
        return x