mirror of
https://github.com/freqtrade/freqtrade.git
synced 2024-11-11 02:33:55 +00:00
92 lines
3.3 KiB
Python
92 lines
3.3 KiB
Python
|
import math
|
||
|
|
||
|
import torch
|
||
|
import torch.nn as nn
|
||
|
|
||
|
|
||
|
"""
|
||
|
The architecture is based on the paper “Attention Is All You Need”.
|
||
|
Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
|
||
|
Lukasz Kaiser, and Illia Polosukhin. 2017.
|
||
|
"""
|
||
|
|
||
|
|
||
|
class PyTorchTransformerModel(nn.Module):
|
||
|
"""
|
||
|
A transformer approach to time series modeling using positional encoding.
|
||
|
The architecture is based on the paper “Attention Is All You Need”.
|
||
|
Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,
|
||
|
Lukasz Kaiser, and Illia Polosukhin. 2017.
|
||
|
"""
|
||
|
|
||
|
def __init__(self, input_dim: int = 7, output_dim: int = 7, hidden_dim=1024,
|
||
|
n_layer=2, dropout_percent=0.1, time_window=10):
|
||
|
super().__init__()
|
||
|
self.time_window = time_window
|
||
|
self.input_net = nn.Sequential(
|
||
|
nn.Dropout(dropout_percent), nn.Linear(input_dim, hidden_dim)
|
||
|
)
|
||
|
|
||
|
# Encode the timeseries with Positional encoding
|
||
|
self.positional_encoding = PositionalEncoding(d_model=hidden_dim, max_len=hidden_dim)
|
||
|
|
||
|
# Define the encoder block of the Transformer
|
||
|
self.encoder_layer = nn.TransformerEncoderLayer(
|
||
|
d_model=hidden_dim, nhead=8, dropout=dropout_percent, batch_first=True)
|
||
|
self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=n_layer)
|
||
|
|
||
|
# Pseudo decoder
|
||
|
self.output_net = nn.Sequential(
|
||
|
nn.Linear(hidden_dim, hidden_dim),
|
||
|
nn.LayerNorm(hidden_dim),
|
||
|
nn.Tanh(),
|
||
|
nn.Dropout(dropout_percent),
|
||
|
)
|
||
|
|
||
|
self.output_layer = nn.Sequential(
|
||
|
nn.Linear(hidden_dim * time_window, output_dim),
|
||
|
nn.Tanh()
|
||
|
)
|
||
|
|
||
|
def forward(self, x, mask=None, add_positional_encoding=True):
|
||
|
"""
|
||
|
Args:
|
||
|
x: Input features of shape [Batch, SeqLen, input_dim]
|
||
|
mask: Mask to apply on the attention outputs (optional)
|
||
|
add_positional_encoding: If True, we add the positional encoding to the input.
|
||
|
Might not be desired for some tasks.
|
||
|
"""
|
||
|
x = self.input_net(x)
|
||
|
if add_positional_encoding:
|
||
|
x = self.positional_encoding(x)
|
||
|
x = self.transformer(x, mask=mask)
|
||
|
x = self.output_net(x)
|
||
|
x = x.reshape(-1, 1, self.time_window * x.shape[-1])
|
||
|
x = self.output_layer(x)
|
||
|
return x
|
||
|
|
||
|
|
||
|
class PositionalEncoding(torch.nn.Module):
|
||
|
def __init__(self, d_model, max_len=5000):
|
||
|
"""
|
||
|
Args
|
||
|
d_model: Hidden dimensionality of the input.
|
||
|
max_len: Maximum length of a sequence to expect.
|
||
|
"""
|
||
|
super().__init__()
|
||
|
|
||
|
# Create matrix of [SeqLen, HiddenDim] representing the positional encoding
|
||
|
# for max_len inputs
|
||
|
pe = torch.zeros(max_len, d_model)
|
||
|
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
|
||
|
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
|
||
|
pe[:, 0::2] = torch.sin(position * div_term)
|
||
|
pe[:, 1::2] = torch.cos(position * div_term)
|
||
|
pe = pe.unsqueeze(0)
|
||
|
|
||
|
self.register_buffer("pe", pe, persistent=False)
|
||
|
|
||
|
def forward(self, x):
|
||
|
x = x + self.pe[:, : x.size(1)]
|
||
|
return x
|