Compare commits

...

10 Commits

Author SHA1 Message Date
Robert Caulk
338142894b
Merge bb62b0fc5a into f50a633f87 2024-09-18 22:26:36 +02:00
Matthias
f50a633f87 docs: order table formatting
Some checks failed
Build Documentation / Deploy Docs through mike (push) Waiting to run
Binance Leverage tiers update / auto-update (push) Has been cancelled
2024-09-18 07:11:12 +02:00
Matthias
ad295946c0 fix: use precise calculation for decrease adjustment calculations 2024-09-17 20:19:22 +02:00
Shane
bb62b0fc5a
Update ReinforcementLearner_DDPG_TD3.py
Clean up set policy code.
2024-05-26 20:21:16 +10:00
Shane
3436e8aa1d
Update Base5ActionRLEnv.py
Fix init
2024-05-24 22:15:34 +10:00
Shane
1d5abe5b75
Update Base5ActionRLEnv.py
Fix init
2024-05-24 22:05:28 +10:00
Shane
ffd828b6ad
Create ReinforcementLearner_DDPG_TD3.py
Reinforcement Learning Model to support DDPG and TD3.
2024-05-24 21:45:09 +10:00
Shane
c83dd2d806
Update BaseReinforcementLearningModel.py
Add support for DDPG and TD3.
2024-05-24 21:29:38 +10:00
Shane
dc5766fb10
Update Base5ActionRLEnv.py
Addition of action_space_type to support Discrete and Box action spaces.
2024-05-24 21:12:56 +10:00
Shane
07fba3abb0
Update BaseEnvironment.py
Addition of action_space_type.
2024-05-24 21:10:06 +10:00
6 changed files with 594 additions and 23 deletions

View File

@ -130,20 +130,20 @@ Most properties here can be None as they are dependent on the exchange response.
| Attribute | DataType | Description |
|------------|-------------|-------------|
`trade` | Trade | Trade object this order is attached to
`ft_pair` | string | Pair this order is for
`ft_is_open` | boolean | is the order filled?
`order_type` | string | Order type as defined on the exchange - usually market, limit or stoploss
`status` | string | Status as defined by ccxt. Usually open, closed, expired or canceled
`side` | string | Buy or Sell
`price` | float | Price the order was placed at
`average` | float | Average price the order filled at
`amount` | float | Amount in base currency
`filled` | float | Filled amount (in base currency)
`remaining` | float | Remaining amount
`cost` | float | Cost of the order - usually average * filled (*Exchange dependent on futures, may contain the cost with or without leverage and may be in contracts.*)
`stake_amount` | float | Stake amount used for this order. *Added in 2023.7.*
`order_date` | datetime | Order creation date **use `order_date_utc` instead**
`order_date_utc` | datetime | Order creation date (in UTC)
`order_fill_date` | datetime | Order fill date **use `order_fill_utc` instead**
`order_fill_date_utc` | datetime | Order fill date
| `trade` | Trade | Trade object this order is attached to |
| `ft_pair` | string | Pair this order is for |
| `ft_is_open` | boolean | is the order filled? |
| `order_type` | string | Order type as defined on the exchange - usually market, limit or stoploss |
| `status` | string | Status as defined by ccxt. Usually open, closed, expired or canceled |
| `side` | string | Buy or Sell |
| `price` | float | Price the order was placed at |
| `average` | float | Average price the order filled at |
| `amount` | float | Amount in base currency |
| `filled` | float | Filled amount (in base currency) |
| `remaining` | float | Remaining amount |
| `cost` | float | Cost of the order - usually average * filled (*Exchange dependent on futures, may contain the cost with or without leverage and may be in contracts.*) |
| `stake_amount` | float | Stake amount used for this order. *Added in 2023.7.* |
| `order_date` | datetime | Order creation date **use `order_date_utc` instead** |
| `order_date_utc` | datetime | Order creation date (in UTC) |
| `order_fill_date` | datetime | Order fill date **use `order_fill_utc` instead** |
| `order_fill_date_utc` | datetime | Order fill date |

View File

@ -22,12 +22,18 @@ class Base5ActionRLEnv(BaseEnvironment):
Base class for a 5 action environment
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
def __init__(self, *args, action_space_type: str = "Discrete", **kwargs):
super().__init__(*args, **kwargs)
self.action_space_type = action_space_type
self.actions = Actions
def set_action_space(self):
if self.action_space_type == "Discrete":
self.action_space = spaces.Discrete(len(Actions))
elif self.action_space_type == "Box":
self.action_space = spaces.Box(low=-1, high=1, shape=(1,))
else:
raise ValueError(f"Unknown action space type: {self.action_space_type}")
def step(self, action: int):
"""

View File

@ -60,6 +60,7 @@ class BaseEnvironment(gym.Env):
can_short: bool = False,
pair: str = "",
df_raw: DataFrame = DataFrame(),
action_space_type: str = "Discrete"
):
"""
Initializes the training/eval environment.
@ -93,6 +94,7 @@ class BaseEnvironment(gym.Env):
self.tensorboard_metrics: dict = {}
self.can_short: bool = can_short
self.live: bool = live
self.action_space_type: str = action_space_type
if not self.live and self.add_state_info:
raise OperationalException(
"`add_state_info` is not available in backtesting. Change "

View File

@ -32,7 +32,7 @@ logger = logging.getLogger(__name__)
torch.multiprocessing.set_sharing_strategy("file_system")
SB3_MODELS = ["PPO", "A2C", "DQN"]
SB3_MODELS = ["PPO", "A2C", "DQN", "DDPG", "TD3"]
SB3_CONTRIB_MODELS = ["TRPO", "ARS", "RecurrentPPO", "MaskablePPO", "QRDQN"]

View File

@ -0,0 +1,556 @@
import copy
import logging
import gc
from pathlib import Path
from typing import Any, Dict, Type, Callable, List, Optional, Union
import numpy as np
import torch as th
import pandas as pd
from pandas import DataFrame
from gymnasium import spaces
import matplotlib
import matplotlib.transforms as mtransforms
import matplotlib.pyplot as plt
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
from stable_baselines3.common.logger import HParam, Figure
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv, Positions
from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, BaseActions
from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel
from freqtrade.freqai.tensorboard.TensorboardCallback import TensorboardCallback
logger = logging.getLogger(__name__)
class ReinforcementLearner_DDPG_TD3(BaseReinforcementLearningModel):
"""
Reinforcement Learning Model prediction model for DDPG and TD3.
Users can inherit from this class to make their own RL model with custom
environment/training controls. Define the file as follows:
```
from freqtrade.freqai.prediction_models.ReinforcementLearner import ReinforcementLearner
class MyCoolRLModel(ReinforcementLearner):
```
Save the file to `user_data/freqaimodels`, then run it with:
freqtrade trade --freqaimodel MyCoolRLModel --config config.json --strategy SomeCoolStrat
Here the users can override any of the functions
available in the `IFreqaiModel` inheritance tree. Most importantly for RL, this
is where the user overrides `MyRLEnv` (see below), to define custom
`calculate_reward()` function, or to override any other parts of the environment.
This class also allows users to override any other part of the IFreqaiModel tree.
For example, the user can override `def fit()` or `def train()` or `def predict()`
to take fine-tuned control over these processes.
Another common override may be `def data_cleaning_predict()` where the user can
take fine-tuned control over the data handling pipeline.
"""
def __init__(self, **kwargs) -> None:
"""
Model specific config
"""
super().__init__(**kwargs)
# Enable learning rate linear schedule
self.lr_schedule: bool = self.rl_config.get("lr_schedule", False)
# Enable tensorboard logging
self.activate_tensorboard: bool = self.rl_config.get("activate_tensorboard", True)
# TENSORBOARD CALLBACK DOES NOT RECOMMENDED TO USE WITH MULTIPLE ENVS,
# IT WILL RETURN FALSE INFORMATIONS, NEVERTHLESS NOT THREAD SAFE WITH SB3!!!
# Enable tensorboard rollout plot
self.tensorboard_plot: bool = self.rl_config.get("tensorboard_plot", False)
def get_model_params(self):
"""
Get the model specific parameters
"""
model_params = copy.deepcopy(self.freqai_info["model_training_parameters"])
if self.lr_schedule:
_lr = model_params.get('learning_rate', 0.0003)
model_params["learning_rate"] = linear_schedule(_lr)
logger.info(f"Learning rate linear schedule enabled, initial value: {_lr}")
model_params["policy_kwargs"] = dict(
net_arch=dict(vf=self.net_arch, pi=self.net_arch),
activation_fn=th.nn.ReLU,
optimizer_class=th.optim.Adam
return model_params
def get_callbacks(self, eval_freq, data_path) -> list:
"""
Get the model specific callbacks
"""
callbacks = []
callbacks.append(self.eval_callback)
if self.activate_tensorboard:
callbacks.append(CustomTensorboardCallback())
if self.tensorboard_plot:
callbacks.append(FigureRecorderCallback())
return callbacks
def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen, **kwargs):
"""
User customizable fit method
:param data_dictionary: dict = common data dictionary containing all train/test
features/labels/weights.
:param dk: FreqaiDatakitchen = data kitchen for current pair.
:return:
model Any = trained model to be used for inference in dry/live/backtesting
"""
train_df = data_dictionary["train_features"]
total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df)
policy_kwargs = dict(activation_fn=th.nn.ReLU,
net_arch=self.net_arch)
if self.activate_tensorboard:
tb_path = Path(dk.full_path / "tensorboard" / dk.pair.split('/')[0])
else:
tb_path = None
model_params = self.get_model_params()
logger.info(f"Params: {model_params}")
if dk.pair not in self.dd.model_dictionary or not self.continual_learning:
model = self.MODELCLASS(self.policy_type, self.train_env,
tensorboard_log=tb_path,
**model_params)
else:
logger.info("Continual training activated - starting training from previously "
"trained agent.")
model = self.dd.model_dictionary[dk.pair]
model.set_env(self.train_env)
model.learn(
total_timesteps=int(total_timesteps),
#callback=[self.eval_callback, self.tensorboard_callback],
callback=self.get_callbacks(len(train_df), str(dk.data_path)),
progress_bar=self.rl_config.get("progress_bar", False)
)
if Path(dk.data_path / "best_model.zip").is_file():
logger.info("Callback found a best model.")
best_model = self.MODELCLASS.load(dk.data_path / "best_model")
return best_model
logger.info("Couldnt find best model, using final model instead.")
return model
MyRLEnv: Type[BaseEnvironment]
class MyRLEnv(Base5ActionRLEnv): # type: ignore[no-redef]
"""
User can override any function in BaseRLEnv and gym.Env. Here the user
sets a custom reward based on profit and trade duration.
"""
def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, id="boxenv-1", seed=1, config={}, live=False, fee=0.0015, can_short=False, pair="", df_raw=None, action_space_type="Box"):
super().__init__(df, prices, reward_kwargs, window_size, starting_point, id, seed, config, live, fee, can_short, pair, df_raw)
# Define the action space as a continuous space between -1 and 1 for a single action dimension
self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)
# Define the observation space as before
self.observation_space = spaces.Box(
low=-np.inf,
high=np.inf,
shape=(window_size, self.total_features),
dtype=np.float32
)
def calculate_reward(self, action: int) -> float:
"""
An example reward function. This is the one function that users will likely
wish to inject their own creativity into.
Warning!
This is function is a showcase of functionality designed to show as many possible
environment control features as possible. It is also designed to run quickly
on small computers. This is a benchmark, it is *not* for live production.
:param action: int = The action made by the agent for the current candle.
:return:
float = the reward to give to the agent for current step (used for optimization
of weights in NN)
"""
# first, penalize if the action is not valid
if not self._is_valid(action):
self.tensorboard_log("invalid", category="actions")
return -2
pnl = self.get_unrealized_profit()
factor = 100.
# reward agent for entering trades
if (action == Actions.Long_enter.value
and self._position == Positions.Neutral):
return 25
if (action == Actions.Short_enter.value
and self._position == Positions.Neutral):
return 25
# discourage agent from not entering trades
if action == Actions.Neutral.value and self._position == Positions.Neutral:
return -1
max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
trade_duration = self._current_tick - self._last_trade_tick # type: ignore
if trade_duration <= max_trade_duration:
factor *= 1.5
elif trade_duration > max_trade_duration:
factor *= 0.5
# discourage sitting in position
if (self._position in (Positions.Short, Positions.Long) and
action == Actions.Neutral.value):
return -1 * trade_duration / max_trade_duration
# close long
if action == Actions.Long_exit.value and self._position == Positions.Long:
if pnl > self.profit_aim * self.rr:
factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
return float(pnl * factor)
# close short
if action == Actions.Short_exit.value and self._position == Positions.Short:
if pnl > self.profit_aim * self.rr:
factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
return float(pnl * factor)
return 0.
def step(self, action):
"""
Logic for a single step (incrementing one candle in time)
by the agent
:param: action: int = the action type that the agent plans
to take for the current step.
:returns:
observation = current state of environment
step_reward = the reward from `calculate_reward()`
_done = if the agent "died" or if the candles finished
info = dict passed back to openai gym lib
"""
# Ensure action is within the range [-1, 1]
action = np.clip(action, -1, 1)
# Apply noise for exploration
self.noise_std = 0.3 # Standard deviation for exploration noise
noise = np.random.normal(0, self.noise_std, size=action.shape)
action = np.tanh(action + noise) # Ensure action is within -1 to 1
# Map the continuous action to one of the five discrete actions
discrete_action = self._map_continuous_to_discrete(action)
#print(f"{self._current_tick} Action!!!: {action}")
#print(f"{self._current_tick} Discrete Action!!!: {discrete_action}")
self._done = False
self._current_tick += 1
if self._current_tick == self._end_tick:
self._done = True
self._update_unrealized_total_profit()
step_reward = self.calculate_reward(discrete_action)
self.total_reward += step_reward
self.tensorboard_log(self.actions._member_names_[discrete_action], category="actions")
trade_type = None
if self.is_tradesignal(discrete_action):
if discrete_action == Actions.Neutral.value:
self._position = Positions.Neutral
trade_type = "neutral"
self._last_trade_tick = None
elif discrete_action == Actions.Long_enter.value:
self._position = Positions.Long
trade_type = "enter_long"
self._last_trade_tick = self._current_tick
elif discrete_action == Actions.Short_enter.value:
self._position = Positions.Short
trade_type = "enter_short"
self._last_trade_tick = self._current_tick
elif discrete_action == Actions.Long_exit.value:
self._update_total_profit()
self._position = Positions.Neutral
trade_type = "exit_long"
self._last_trade_tick = None
elif discrete_action == Actions.Short_exit.value:
self._update_total_profit()
self._position = Positions.Neutral
trade_type = "exit_short"
self._last_trade_tick = None
else:
print("case not defined")
if trade_type is not None:
self.trade_history.append(
{"price": self.current_price(), "index": self._current_tick,
"type": trade_type, "profit": self.get_unrealized_profit()})
if (self._total_profit < self.max_drawdown or
self._total_unrealized_profit < self.max_drawdown):
self._done = True
self._position_history.append(self._position)
info = dict(
tick=self._current_tick,
action=discrete_action,
total_reward=self.total_reward,
total_profit=self._total_profit,
position=self._position.value,
trade_duration=self.get_trade_duration(),
current_profit_pct=self.get_unrealized_profit()
)
observation = self._get_observation()
# user can play with time if they want
truncated = False
self._update_history(info)
return observation, step_reward, self._done, truncated, info
def _map_continuous_to_discrete(self, action):
"""
Map the continuous action (a value between -1 and 1) to one of the discrete actions.
"""
action_value = action[0] # Extract the single continuous action value
# Define the number of discrete actions
num_discrete_actions = 5
# Calculate the step size for each interval
step_size = 2 / num_discrete_actions # (2 because range is from -1 to 1)
# Generate the boundaries dynamically
boundaries = th.linspace(-1 + step_size, 1 - step_size, steps=num_discrete_actions - 1)
# Find the bucket index for the action value
bucket_index = th.bucketize(th.tensor(action_value), boundaries, right=True)
# Map the bucket index to discrete actions
discrete_actions = [
BaseActions.Neutral,
BaseActions.Long_enter,
BaseActions.Long_exit,
BaseActions.Short_enter,
BaseActions.Short_exit
]
return discrete_actions[bucket_index].value
def get_rollout_history(self) -> DataFrame:
"""
Get environment data from the first to the last trade
"""
_history_df = pd.DataFrame.from_dict(self.history)
_trade_history_df = pd.DataFrame.from_dict(self.trade_history)
_rollout_history = _history_df.merge(_trade_history_df, left_on="tick", right_on="index", how="left")
_price_history = self.prices.iloc[_rollout_history.tick].copy().reset_index()
history = pd.merge(
_rollout_history,
_price_history,
left_index=True, right_index=True
)
return history
def get_rollout_plot(self):
"""
Plot trades and environment data
"""
def transform_y_offset(ax, offset):
return mtransforms.offset_copy(ax.transData, fig=fig, x=0, y=offset, units="inches")
def plot_markers(ax, ticks, marker, color, size, offset):
ax.plot(ticks, marker=marker, color=color, markersize=size, fillstyle="full",
transform=transform_y_offset(ax, offset), linestyle="none")
plt.style.use("dark_background")
fig, axs = plt.subplots(
nrows=5, ncols=1,
figsize=(16, 9),
height_ratios=[6, 1, 1, 1, 1],
sharex=True
)
# Return empty fig if no trades
if len(self.trade_history) == 0:
return fig
history = self.get_rollout_history()
enter_long_prices = history.loc[history["type"] == "enter_long"]["price"]
enter_short_prices = history.loc[history["type"] == "enter_short"]["price"]
exit_long_prices = history.loc[history["type"] == "exit_long"]["price"]
exit_short_prices = history.loc[history["type"] == "exit_short"]["price"]
axs[0].plot(history["open"], linewidth=1, color="#c28ce3")
plot_markers(axs[0], enter_long_prices, "^", "#4ae747", 5, -0.05)
plot_markers(axs[0], enter_short_prices, "v", "#f53580", 5, 0.05)
plot_markers(axs[0], exit_long_prices, "o", "#4ae747", 3, 0)
plot_markers(axs[0], exit_short_prices, "o", "#f53580", 3, 0)
axs[1].set_ylabel("pnl")
axs[1].plot(history["current_profit_pct"], linewidth=1, color="#a29db9")
axs[1].axhline(y=0, label='0', alpha=0.33)
axs[2].set_ylabel("duration")
axs[2].plot(history["trade_duration"], linewidth=1, color="#a29db9")
axs[3].set_ylabel("total_reward")
axs[3].plot(history["total_reward"], linewidth=1, color="#a29db9")
axs[3].axhline(y=0, label='0', alpha=0.33)
axs[4].set_ylabel("total_profit")
axs[4].set_xlabel("tick")
axs[4].plot(history["total_profit"], linewidth=1, color="#a29db9")
axs[4].axhline(y=1, label='1', alpha=0.33)
for _ax in axs:
for _border in ["top", "right", "bottom", "left"]:
_ax.spines[_border].set_color("#5b5e4b")
fig.suptitle(
"Total Reward: %.6f" % self.total_reward + " ~ " +
"Total Profit: %.6f" % self._total_profit
)
fig.tight_layout()
return fig
def close(self) -> None:
gc.collect()
th.cuda.empty_cache()
def linear_schedule(initial_value: float) -> Callable[[float], float]:
def func(progress_remaining: float) -> float:
return progress_remaining * initial_value
return func
class CustomTensorboardCallback(TensorboardCallback):
"""
Tensorboard callback
"""
def _on_training_start(self) -> None:
_lr = self.model.learning_rate
if self.model.__class__.__name__ == "DDPG":
hparam_dict = {
"algorithm": self.model.__class__.__name__,
"buffer_size": self.model.buffer_size,
"learning_rate": _lr if isinstance(_lr, float) else "lr_schedule",
"learning_starts": self.model.learning_starts,
"batch_size": self.model.batch_size,
"tau": self.model.tau,
"gamma": self.model.gamma,
"train_freq": self.model.train_freq,
"gradient_steps": self.model.gradient_steps,
}
elif self.model.__class__.__name__ == "TD3":
hparam_dict = {
"algorithm": self.model.__class__.__name__,
"learning_rate": _lr if isinstance(_lr, float) else "lr_schedule",
"buffer_size": self.model.buffer_size,
"learning_starts": self.model.learning_starts,
"batch_size": self.model.batch_size,
"tau": self.model.tau,
"gamma": self.model.gamma,
"train_freq": self.model.train_freq,
"gradient_steps": self.model.gradient_steps,
"policy_delay": self.model.policy_delay,
"target_policy_noise": self.model.target_policy_noise,
"target_noise_clip": self.model.target_noise_clip,
}
else:
hparam_dict = {
"algorithm": self.model.__class__.__name__,
"learning_rate": _lr if isinstance(_lr, float) else "lr_schedule",
"gamma": self.model.gamma,
"gae_lambda": self.model.gae_lambda,
"n_steps": self.model.n_steps,
"batch_size": self.model.batch_size,
}
# Convert hparam_dict values to str if they are not of type int, float, str, bool, or torch.Tensor
hparam_dict = {k: (str(v) if not isinstance(v, (int, float, str, bool, th.Tensor)) else v) for k, v in hparam_dict.items()}
metric_dict = {
"eval/mean_reward": 0,
"rollout/ep_rew_mean": 0,
"rollout/ep_len_mean": 0,
"info/total_profit": 1,
"info/trades_count": 0,
"info/trade_duration": 0,
}
self.logger.record(
"hparams",
HParam(hparam_dict, metric_dict),
exclude=("stdout", "log", "json", "csv"),
)
def _on_step(self) -> bool:
local_info = self.locals["infos"][0]
if self.training_env is None:
return True
tensorboard_metrics = self.training_env.env_method("get_wrapper_attr", "tensorboard_metrics")[0]
for metric in local_info:
if metric not in ["episode", "terminal_observation", "TimeLimit.truncated"]:
self.logger.record(f"info/{metric}", local_info[metric])
for category in tensorboard_metrics:
for metric in tensorboard_metrics[category]:
self.logger.record(f"{category}/{metric}", tensorboard_metrics[category][metric])
return True
class FigureRecorderCallback(BaseCallback):
"""
Tensorboard figures callback
"""
def __init__(self, verbose=0):
super().__init__(verbose)
def _on_step(self) -> bool:
return True
def _on_rollout_end(self):
try:
# Access the rollout plot directly from the base environment
figures = [env.unwrapped.get_rollout_plot() for env in self.training_env.envs]
except AttributeError:
# If the above fails, try getting it from the wrappers
figures = self.training_env.env_method("get_wrapper_attr", "get_rollout_plot")
for i, fig in enumerate(figures):
self.logger.record(
f"rollout/env_{i}",
Figure(fig, close=True),
exclude=("stdout", "log", "json", "csv")
)
plt.close(fig)
return True

View File

@ -62,7 +62,7 @@ from freqtrade.rpc.rpc_types import (
)
from freqtrade.strategy.interface import IStrategy
from freqtrade.strategy.strategy_wrapper import strategy_safe_wrapper
from freqtrade.util import MeasureTime
from freqtrade.util import FtPrecise, MeasureTime
from freqtrade.util.migrations.binance_mig import migrate_binance_futures_names
from freqtrade.wallets import Wallets
@ -784,7 +784,14 @@ class FreqtradeBot(LoggingMixin):
if stake_amount is not None and stake_amount < 0.0:
# We should decrease our position
amount = self.exchange.amount_to_contract_precision(
trade.pair, abs(float(stake_amount * trade.amount / trade.stake_amount))
trade.pair,
abs(
float(
FtPrecise(stake_amount)
* FtPrecise(trade.amount)
/ FtPrecise(trade.stake_amount)
)
),
)
if amount == 0.0: