mirror of
https://github.com/freqtrade/freqtrade.git
synced 2024-11-10 02:12:01 +00:00
Compare commits
32 Commits
4857d8c1ef
...
36e2e160f1
Author | SHA1 | Date | |
---|---|---|---|
|
36e2e160f1 | ||
|
2fe67edab3 | ||
|
167e43cbef | ||
|
d8cb407c25 | ||
|
9452afe3f7 | ||
|
8dc6d9ce7d | ||
|
bf4b8a318d | ||
|
20f6022050 | ||
|
65e6c737cd | ||
|
6d2572e347 | ||
|
52a35197c7 | ||
|
2b3a41db3e | ||
|
09c1459411 | ||
|
98f18b89da | ||
|
4249db4330 | ||
|
a7f46500ed | ||
|
c73fa2b0eb | ||
|
db4c4b971a | ||
|
e9ccc98ada | ||
|
11d6ec33b3 | ||
|
c3b6f4ca85 | ||
|
d37405a307 | ||
|
d7a9841328 | ||
|
cf3af42477 | ||
|
ad8e6e7d67 | ||
|
bb62b0fc5a | ||
|
3436e8aa1d | ||
|
1d5abe5b75 | ||
|
ffd828b6ad | ||
|
c83dd2d806 | ||
|
dc5766fb10 | ||
|
07fba3abb0 |
|
@ -16,7 +16,7 @@ repos:
|
|||
additional_dependencies:
|
||||
- types-cachetools==5.5.0.20240820
|
||||
- types-filelock==3.2.7
|
||||
- types-requests==2.32.0.20240907
|
||||
- types-requests==2.32.0.20240914
|
||||
- types-tabulate==0.9.0.20240106
|
||||
- types-python-dateutil==2.9.0.20240906
|
||||
- SQLAlchemy==2.0.34
|
||||
|
|
|
@ -3594,7 +3594,7 @@ class Exchange:
|
|||
Wherein, "+" or "-" depends on whether the contract goes long or short:
|
||||
"-" for long, and "+" for short.
|
||||
|
||||
okex: https://www.okex.com/support/hc/en-us/articles/
|
||||
okex: https://www.okx.com/support/hc/en-us/articles/
|
||||
360053909592-VI-Introduction-to-the-isolated-mode-of-Single-Multi-currency-Portfolio-margin
|
||||
|
||||
:param pair: Pair to calculate liquidation price for
|
||||
|
|
|
@ -22,12 +22,18 @@ class Base5ActionRLEnv(BaseEnvironment):
|
|||
Base class for a 5 action environment
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
def __init__(self, *args, action_space_type: str = "Discrete", **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.action_space_type = action_space_type
|
||||
self.actions = Actions
|
||||
|
||||
def set_action_space(self):
|
||||
if self.action_space_type == "Discrete":
|
||||
self.action_space = spaces.Discrete(len(Actions))
|
||||
elif self.action_space_type == "Box":
|
||||
self.action_space = spaces.Box(low=-1, high=1, shape=(1,))
|
||||
else:
|
||||
raise ValueError(f"Unknown action space type: {self.action_space_type}")
|
||||
|
||||
def step(self, action: int):
|
||||
"""
|
||||
|
|
|
@ -60,6 +60,7 @@ class BaseEnvironment(gym.Env):
|
|||
can_short: bool = False,
|
||||
pair: str = "",
|
||||
df_raw: DataFrame = DataFrame(),
|
||||
action_space_type: str = "Discrete"
|
||||
):
|
||||
"""
|
||||
Initializes the training/eval environment.
|
||||
|
@ -93,6 +94,7 @@ class BaseEnvironment(gym.Env):
|
|||
self.tensorboard_metrics: dict = {}
|
||||
self.can_short: bool = can_short
|
||||
self.live: bool = live
|
||||
self.action_space_type: str = action_space_type
|
||||
if not self.live and self.add_state_info:
|
||||
raise OperationalException(
|
||||
"`add_state_info` is not available in backtesting. Change "
|
||||
|
|
|
@ -32,7 +32,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
torch.multiprocessing.set_sharing_strategy("file_system")
|
||||
|
||||
SB3_MODELS = ["PPO", "A2C", "DQN"]
|
||||
SB3_MODELS = ["PPO", "A2C", "DQN", "DDPG", "TD3"]
|
||||
SB3_CONTRIB_MODELS = ["TRPO", "ARS", "RecurrentPPO", "MaskablePPO", "QRDQN"]
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,556 @@
|
|||
import copy
|
||||
import logging
|
||||
import gc
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Type, Callable, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import torch as th
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from gymnasium import spaces
|
||||
import matplotlib
|
||||
import matplotlib.transforms as mtransforms
|
||||
import matplotlib.pyplot as plt
|
||||
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
|
||||
from stable_baselines3.common.logger import HParam, Figure
|
||||
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
from freqtrade.freqai.RL.Base5ActionRLEnv import Actions, Base5ActionRLEnv, Positions
|
||||
from freqtrade.freqai.RL.BaseEnvironment import BaseEnvironment, BaseActions
|
||||
from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel
|
||||
from freqtrade.freqai.tensorboard.TensorboardCallback import TensorboardCallback
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ReinforcementLearner_DDPG_TD3(BaseReinforcementLearningModel):
|
||||
"""
|
||||
Reinforcement Learning Model prediction model for DDPG and TD3.
|
||||
|
||||
Users can inherit from this class to make their own RL model with custom
|
||||
environment/training controls. Define the file as follows:
|
||||
|
||||
```
|
||||
from freqtrade.freqai.prediction_models.ReinforcementLearner import ReinforcementLearner
|
||||
|
||||
class MyCoolRLModel(ReinforcementLearner):
|
||||
```
|
||||
|
||||
Save the file to `user_data/freqaimodels`, then run it with:
|
||||
|
||||
freqtrade trade --freqaimodel MyCoolRLModel --config config.json --strategy SomeCoolStrat
|
||||
|
||||
Here the users can override any of the functions
|
||||
available in the `IFreqaiModel` inheritance tree. Most importantly for RL, this
|
||||
is where the user overrides `MyRLEnv` (see below), to define custom
|
||||
`calculate_reward()` function, or to override any other parts of the environment.
|
||||
|
||||
This class also allows users to override any other part of the IFreqaiModel tree.
|
||||
For example, the user can override `def fit()` or `def train()` or `def predict()`
|
||||
to take fine-tuned control over these processes.
|
||||
|
||||
Another common override may be `def data_cleaning_predict()` where the user can
|
||||
take fine-tuned control over the data handling pipeline.
|
||||
"""
|
||||
|
||||
def __init__(self, **kwargs) -> None:
|
||||
"""
|
||||
Model specific config
|
||||
"""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
# Enable learning rate linear schedule
|
||||
self.lr_schedule: bool = self.rl_config.get("lr_schedule", False)
|
||||
|
||||
# Enable tensorboard logging
|
||||
self.activate_tensorboard: bool = self.rl_config.get("activate_tensorboard", True)
|
||||
# TENSORBOARD CALLBACK DOES NOT RECOMMENDED TO USE WITH MULTIPLE ENVS,
|
||||
# IT WILL RETURN FALSE INFORMATIONS, NEVERTHLESS NOT THREAD SAFE WITH SB3!!!
|
||||
|
||||
# Enable tensorboard rollout plot
|
||||
self.tensorboard_plot: bool = self.rl_config.get("tensorboard_plot", False)
|
||||
|
||||
def get_model_params(self):
|
||||
"""
|
||||
Get the model specific parameters
|
||||
"""
|
||||
model_params = copy.deepcopy(self.freqai_info["model_training_parameters"])
|
||||
|
||||
if self.lr_schedule:
|
||||
_lr = model_params.get('learning_rate', 0.0003)
|
||||
model_params["learning_rate"] = linear_schedule(_lr)
|
||||
logger.info(f"Learning rate linear schedule enabled, initial value: {_lr}")
|
||||
|
||||
model_params["policy_kwargs"] = dict(
|
||||
net_arch=dict(vf=self.net_arch, pi=self.net_arch),
|
||||
activation_fn=th.nn.ReLU,
|
||||
optimizer_class=th.optim.Adam
|
||||
|
||||
return model_params
|
||||
|
||||
def get_callbacks(self, eval_freq, data_path) -> list:
|
||||
"""
|
||||
Get the model specific callbacks
|
||||
"""
|
||||
callbacks = []
|
||||
callbacks.append(self.eval_callback)
|
||||
if self.activate_tensorboard:
|
||||
callbacks.append(CustomTensorboardCallback())
|
||||
if self.tensorboard_plot:
|
||||
callbacks.append(FigureRecorderCallback())
|
||||
return callbacks
|
||||
|
||||
def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen, **kwargs):
|
||||
"""
|
||||
User customizable fit method
|
||||
:param data_dictionary: dict = common data dictionary containing all train/test
|
||||
features/labels/weights.
|
||||
:param dk: FreqaiDatakitchen = data kitchen for current pair.
|
||||
:return:
|
||||
model Any = trained model to be used for inference in dry/live/backtesting
|
||||
"""
|
||||
train_df = data_dictionary["train_features"]
|
||||
total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df)
|
||||
|
||||
policy_kwargs = dict(activation_fn=th.nn.ReLU,
|
||||
net_arch=self.net_arch)
|
||||
|
||||
if self.activate_tensorboard:
|
||||
tb_path = Path(dk.full_path / "tensorboard" / dk.pair.split('/')[0])
|
||||
else:
|
||||
tb_path = None
|
||||
|
||||
model_params = self.get_model_params()
|
||||
logger.info(f"Params: {model_params}")
|
||||
|
||||
if dk.pair not in self.dd.model_dictionary or not self.continual_learning:
|
||||
model = self.MODELCLASS(self.policy_type, self.train_env,
|
||||
tensorboard_log=tb_path,
|
||||
**model_params)
|
||||
else:
|
||||
logger.info("Continual training activated - starting training from previously "
|
||||
"trained agent.")
|
||||
model = self.dd.model_dictionary[dk.pair]
|
||||
model.set_env(self.train_env)
|
||||
|
||||
model.learn(
|
||||
total_timesteps=int(total_timesteps),
|
||||
#callback=[self.eval_callback, self.tensorboard_callback],
|
||||
callback=self.get_callbacks(len(train_df), str(dk.data_path)),
|
||||
progress_bar=self.rl_config.get("progress_bar", False)
|
||||
)
|
||||
|
||||
if Path(dk.data_path / "best_model.zip").is_file():
|
||||
logger.info("Callback found a best model.")
|
||||
best_model = self.MODELCLASS.load(dk.data_path / "best_model")
|
||||
return best_model
|
||||
|
||||
logger.info("Couldnt find best model, using final model instead.")
|
||||
|
||||
return model
|
||||
|
||||
MyRLEnv: Type[BaseEnvironment]
|
||||
|
||||
class MyRLEnv(Base5ActionRLEnv): # type: ignore[no-redef]
|
||||
"""
|
||||
User can override any function in BaseRLEnv and gym.Env. Here the user
|
||||
sets a custom reward based on profit and trade duration.
|
||||
"""
|
||||
def __init__(self, df, prices, reward_kwargs, window_size=10, starting_point=True, id="boxenv-1", seed=1, config={}, live=False, fee=0.0015, can_short=False, pair="", df_raw=None, action_space_type="Box"):
|
||||
super().__init__(df, prices, reward_kwargs, window_size, starting_point, id, seed, config, live, fee, can_short, pair, df_raw)
|
||||
|
||||
# Define the action space as a continuous space between -1 and 1 for a single action dimension
|
||||
self.action_space = spaces.Box(low=-1, high=1, shape=(1,), dtype=np.float32)
|
||||
|
||||
# Define the observation space as before
|
||||
self.observation_space = spaces.Box(
|
||||
low=-np.inf,
|
||||
high=np.inf,
|
||||
shape=(window_size, self.total_features),
|
||||
dtype=np.float32
|
||||
)
|
||||
|
||||
def calculate_reward(self, action: int) -> float:
|
||||
"""
|
||||
An example reward function. This is the one function that users will likely
|
||||
wish to inject their own creativity into.
|
||||
|
||||
Warning!
|
||||
This is function is a showcase of functionality designed to show as many possible
|
||||
environment control features as possible. It is also designed to run quickly
|
||||
on small computers. This is a benchmark, it is *not* for live production.
|
||||
|
||||
:param action: int = The action made by the agent for the current candle.
|
||||
:return:
|
||||
float = the reward to give to the agent for current step (used for optimization
|
||||
of weights in NN)
|
||||
"""
|
||||
# first, penalize if the action is not valid
|
||||
if not self._is_valid(action):
|
||||
self.tensorboard_log("invalid", category="actions")
|
||||
return -2
|
||||
|
||||
pnl = self.get_unrealized_profit()
|
||||
factor = 100.
|
||||
|
||||
# reward agent for entering trades
|
||||
if (action == Actions.Long_enter.value
|
||||
and self._position == Positions.Neutral):
|
||||
return 25
|
||||
if (action == Actions.Short_enter.value
|
||||
and self._position == Positions.Neutral):
|
||||
return 25
|
||||
# discourage agent from not entering trades
|
||||
if action == Actions.Neutral.value and self._position == Positions.Neutral:
|
||||
return -1
|
||||
|
||||
max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
|
||||
trade_duration = self._current_tick - self._last_trade_tick # type: ignore
|
||||
|
||||
if trade_duration <= max_trade_duration:
|
||||
factor *= 1.5
|
||||
elif trade_duration > max_trade_duration:
|
||||
factor *= 0.5
|
||||
|
||||
# discourage sitting in position
|
||||
if (self._position in (Positions.Short, Positions.Long) and
|
||||
action == Actions.Neutral.value):
|
||||
return -1 * trade_duration / max_trade_duration
|
||||
|
||||
# close long
|
||||
if action == Actions.Long_exit.value and self._position == Positions.Long:
|
||||
if pnl > self.profit_aim * self.rr:
|
||||
factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
|
||||
return float(pnl * factor)
|
||||
|
||||
# close short
|
||||
if action == Actions.Short_exit.value and self._position == Positions.Short:
|
||||
if pnl > self.profit_aim * self.rr:
|
||||
factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
|
||||
return float(pnl * factor)
|
||||
|
||||
return 0.
|
||||
|
||||
def step(self, action):
|
||||
"""
|
||||
Logic for a single step (incrementing one candle in time)
|
||||
by the agent
|
||||
:param: action: int = the action type that the agent plans
|
||||
to take for the current step.
|
||||
:returns:
|
||||
observation = current state of environment
|
||||
step_reward = the reward from `calculate_reward()`
|
||||
_done = if the agent "died" or if the candles finished
|
||||
info = dict passed back to openai gym lib
|
||||
"""
|
||||
|
||||
# Ensure action is within the range [-1, 1]
|
||||
action = np.clip(action, -1, 1)
|
||||
|
||||
# Apply noise for exploration
|
||||
self.noise_std = 0.3 # Standard deviation for exploration noise
|
||||
noise = np.random.normal(0, self.noise_std, size=action.shape)
|
||||
action = np.tanh(action + noise) # Ensure action is within -1 to 1
|
||||
|
||||
# Map the continuous action to one of the five discrete actions
|
||||
discrete_action = self._map_continuous_to_discrete(action)
|
||||
|
||||
#print(f"{self._current_tick} Action!!!: {action}")
|
||||
#print(f"{self._current_tick} Discrete Action!!!: {discrete_action}")
|
||||
|
||||
self._done = False
|
||||
self._current_tick += 1
|
||||
|
||||
if self._current_tick == self._end_tick:
|
||||
self._done = True
|
||||
|
||||
self._update_unrealized_total_profit()
|
||||
step_reward = self.calculate_reward(discrete_action)
|
||||
self.total_reward += step_reward
|
||||
|
||||
self.tensorboard_log(self.actions._member_names_[discrete_action], category="actions")
|
||||
|
||||
trade_type = None
|
||||
if self.is_tradesignal(discrete_action):
|
||||
|
||||
if discrete_action == Actions.Neutral.value:
|
||||
self._position = Positions.Neutral
|
||||
trade_type = "neutral"
|
||||
self._last_trade_tick = None
|
||||
elif discrete_action == Actions.Long_enter.value:
|
||||
self._position = Positions.Long
|
||||
trade_type = "enter_long"
|
||||
self._last_trade_tick = self._current_tick
|
||||
elif discrete_action == Actions.Short_enter.value:
|
||||
self._position = Positions.Short
|
||||
trade_type = "enter_short"
|
||||
self._last_trade_tick = self._current_tick
|
||||
elif discrete_action == Actions.Long_exit.value:
|
||||
self._update_total_profit()
|
||||
self._position = Positions.Neutral
|
||||
trade_type = "exit_long"
|
||||
self._last_trade_tick = None
|
||||
elif discrete_action == Actions.Short_exit.value:
|
||||
self._update_total_profit()
|
||||
self._position = Positions.Neutral
|
||||
trade_type = "exit_short"
|
||||
self._last_trade_tick = None
|
||||
else:
|
||||
print("case not defined")
|
||||
|
||||
if trade_type is not None:
|
||||
self.trade_history.append(
|
||||
{"price": self.current_price(), "index": self._current_tick,
|
||||
"type": trade_type, "profit": self.get_unrealized_profit()})
|
||||
|
||||
if (self._total_profit < self.max_drawdown or
|
||||
self._total_unrealized_profit < self.max_drawdown):
|
||||
self._done = True
|
||||
|
||||
self._position_history.append(self._position)
|
||||
|
||||
info = dict(
|
||||
tick=self._current_tick,
|
||||
action=discrete_action,
|
||||
total_reward=self.total_reward,
|
||||
total_profit=self._total_profit,
|
||||
position=self._position.value,
|
||||
trade_duration=self.get_trade_duration(),
|
||||
current_profit_pct=self.get_unrealized_profit()
|
||||
)
|
||||
|
||||
observation = self._get_observation()
|
||||
# user can play with time if they want
|
||||
truncated = False
|
||||
|
||||
self._update_history(info)
|
||||
|
||||
return observation, step_reward, self._done, truncated, info
|
||||
|
||||
def _map_continuous_to_discrete(self, action):
|
||||
"""
|
||||
Map the continuous action (a value between -1 and 1) to one of the discrete actions.
|
||||
"""
|
||||
action_value = action[0] # Extract the single continuous action value
|
||||
|
||||
# Define the number of discrete actions
|
||||
num_discrete_actions = 5
|
||||
|
||||
# Calculate the step size for each interval
|
||||
step_size = 2 / num_discrete_actions # (2 because range is from -1 to 1)
|
||||
|
||||
# Generate the boundaries dynamically
|
||||
boundaries = th.linspace(-1 + step_size, 1 - step_size, steps=num_discrete_actions - 1)
|
||||
|
||||
# Find the bucket index for the action value
|
||||
bucket_index = th.bucketize(th.tensor(action_value), boundaries, right=True)
|
||||
|
||||
# Map the bucket index to discrete actions
|
||||
discrete_actions = [
|
||||
BaseActions.Neutral,
|
||||
BaseActions.Long_enter,
|
||||
BaseActions.Long_exit,
|
||||
BaseActions.Short_enter,
|
||||
BaseActions.Short_exit
|
||||
]
|
||||
|
||||
return discrete_actions[bucket_index].value
|
||||
|
||||
def get_rollout_history(self) -> DataFrame:
|
||||
"""
|
||||
Get environment data from the first to the last trade
|
||||
"""
|
||||
_history_df = pd.DataFrame.from_dict(self.history)
|
||||
_trade_history_df = pd.DataFrame.from_dict(self.trade_history)
|
||||
_rollout_history = _history_df.merge(_trade_history_df, left_on="tick", right_on="index", how="left")
|
||||
|
||||
_price_history = self.prices.iloc[_rollout_history.tick].copy().reset_index()
|
||||
|
||||
history = pd.merge(
|
||||
_rollout_history,
|
||||
_price_history,
|
||||
left_index=True, right_index=True
|
||||
)
|
||||
return history
|
||||
|
||||
def get_rollout_plot(self):
|
||||
"""
|
||||
Plot trades and environment data
|
||||
"""
|
||||
def transform_y_offset(ax, offset):
|
||||
return mtransforms.offset_copy(ax.transData, fig=fig, x=0, y=offset, units="inches")
|
||||
|
||||
def plot_markers(ax, ticks, marker, color, size, offset):
|
||||
ax.plot(ticks, marker=marker, color=color, markersize=size, fillstyle="full",
|
||||
transform=transform_y_offset(ax, offset), linestyle="none")
|
||||
|
||||
plt.style.use("dark_background")
|
||||
fig, axs = plt.subplots(
|
||||
nrows=5, ncols=1,
|
||||
figsize=(16, 9),
|
||||
height_ratios=[6, 1, 1, 1, 1],
|
||||
sharex=True
|
||||
)
|
||||
|
||||
# Return empty fig if no trades
|
||||
if len(self.trade_history) == 0:
|
||||
return fig
|
||||
|
||||
history = self.get_rollout_history()
|
||||
enter_long_prices = history.loc[history["type"] == "enter_long"]["price"]
|
||||
enter_short_prices = history.loc[history["type"] == "enter_short"]["price"]
|
||||
exit_long_prices = history.loc[history["type"] == "exit_long"]["price"]
|
||||
exit_short_prices = history.loc[history["type"] == "exit_short"]["price"]
|
||||
|
||||
axs[0].plot(history["open"], linewidth=1, color="#c28ce3")
|
||||
plot_markers(axs[0], enter_long_prices, "^", "#4ae747", 5, -0.05)
|
||||
plot_markers(axs[0], enter_short_prices, "v", "#f53580", 5, 0.05)
|
||||
plot_markers(axs[0], exit_long_prices, "o", "#4ae747", 3, 0)
|
||||
plot_markers(axs[0], exit_short_prices, "o", "#f53580", 3, 0)
|
||||
|
||||
axs[1].set_ylabel("pnl")
|
||||
axs[1].plot(history["current_profit_pct"], linewidth=1, color="#a29db9")
|
||||
axs[1].axhline(y=0, label='0', alpha=0.33)
|
||||
axs[2].set_ylabel("duration")
|
||||
axs[2].plot(history["trade_duration"], linewidth=1, color="#a29db9")
|
||||
axs[3].set_ylabel("total_reward")
|
||||
axs[3].plot(history["total_reward"], linewidth=1, color="#a29db9")
|
||||
axs[3].axhline(y=0, label='0', alpha=0.33)
|
||||
axs[4].set_ylabel("total_profit")
|
||||
axs[4].set_xlabel("tick")
|
||||
axs[4].plot(history["total_profit"], linewidth=1, color="#a29db9")
|
||||
axs[4].axhline(y=1, label='1', alpha=0.33)
|
||||
|
||||
for _ax in axs:
|
||||
for _border in ["top", "right", "bottom", "left"]:
|
||||
_ax.spines[_border].set_color("#5b5e4b")
|
||||
|
||||
fig.suptitle(
|
||||
"Total Reward: %.6f" % self.total_reward + " ~ " +
|
||||
"Total Profit: %.6f" % self._total_profit
|
||||
)
|
||||
fig.tight_layout()
|
||||
|
||||
return fig
|
||||
|
||||
def close(self) -> None:
|
||||
gc.collect()
|
||||
th.cuda.empty_cache()
|
||||
|
||||
def linear_schedule(initial_value: float) -> Callable[[float], float]:
|
||||
def func(progress_remaining: float) -> float:
|
||||
return progress_remaining * initial_value
|
||||
return func
|
||||
|
||||
class CustomTensorboardCallback(TensorboardCallback):
|
||||
"""
|
||||
Tensorboard callback
|
||||
"""
|
||||
|
||||
def _on_training_start(self) -> None:
|
||||
_lr = self.model.learning_rate
|
||||
|
||||
if self.model.__class__.__name__ == "DDPG":
|
||||
hparam_dict = {
|
||||
"algorithm": self.model.__class__.__name__,
|
||||
"buffer_size": self.model.buffer_size,
|
||||
"learning_rate": _lr if isinstance(_lr, float) else "lr_schedule",
|
||||
"learning_starts": self.model.learning_starts,
|
||||
"batch_size": self.model.batch_size,
|
||||
"tau": self.model.tau,
|
||||
"gamma": self.model.gamma,
|
||||
"train_freq": self.model.train_freq,
|
||||
"gradient_steps": self.model.gradient_steps,
|
||||
}
|
||||
|
||||
elif self.model.__class__.__name__ == "TD3":
|
||||
hparam_dict = {
|
||||
"algorithm": self.model.__class__.__name__,
|
||||
"learning_rate": _lr if isinstance(_lr, float) else "lr_schedule",
|
||||
"buffer_size": self.model.buffer_size,
|
||||
"learning_starts": self.model.learning_starts,
|
||||
"batch_size": self.model.batch_size,
|
||||
"tau": self.model.tau,
|
||||
"gamma": self.model.gamma,
|
||||
"train_freq": self.model.train_freq,
|
||||
"gradient_steps": self.model.gradient_steps,
|
||||
"policy_delay": self.model.policy_delay,
|
||||
"target_policy_noise": self.model.target_policy_noise,
|
||||
"target_noise_clip": self.model.target_noise_clip,
|
||||
}
|
||||
|
||||
else:
|
||||
hparam_dict = {
|
||||
"algorithm": self.model.__class__.__name__,
|
||||
"learning_rate": _lr if isinstance(_lr, float) else "lr_schedule",
|
||||
"gamma": self.model.gamma,
|
||||
"gae_lambda": self.model.gae_lambda,
|
||||
"n_steps": self.model.n_steps,
|
||||
"batch_size": self.model.batch_size,
|
||||
}
|
||||
|
||||
# Convert hparam_dict values to str if they are not of type int, float, str, bool, or torch.Tensor
|
||||
hparam_dict = {k: (str(v) if not isinstance(v, (int, float, str, bool, th.Tensor)) else v) for k, v in hparam_dict.items()}
|
||||
|
||||
metric_dict = {
|
||||
"eval/mean_reward": 0,
|
||||
"rollout/ep_rew_mean": 0,
|
||||
"rollout/ep_len_mean": 0,
|
||||
"info/total_profit": 1,
|
||||
"info/trades_count": 0,
|
||||
"info/trade_duration": 0,
|
||||
}
|
||||
|
||||
self.logger.record(
|
||||
"hparams",
|
||||
HParam(hparam_dict, metric_dict),
|
||||
exclude=("stdout", "log", "json", "csv"),
|
||||
)
|
||||
|
||||
def _on_step(self) -> bool:
|
||||
|
||||
local_info = self.locals["infos"][0]
|
||||
if self.training_env is None:
|
||||
return True
|
||||
|
||||
tensorboard_metrics = self.training_env.env_method("get_wrapper_attr", "tensorboard_metrics")[0]
|
||||
|
||||
for metric in local_info:
|
||||
if metric not in ["episode", "terminal_observation", "TimeLimit.truncated"]:
|
||||
self.logger.record(f"info/{metric}", local_info[metric])
|
||||
|
||||
for category in tensorboard_metrics:
|
||||
for metric in tensorboard_metrics[category]:
|
||||
self.logger.record(f"{category}/{metric}", tensorboard_metrics[category][metric])
|
||||
|
||||
return True
|
||||
|
||||
class FigureRecorderCallback(BaseCallback):
|
||||
"""
|
||||
Tensorboard figures callback
|
||||
"""
|
||||
|
||||
def __init__(self, verbose=0):
|
||||
super().__init__(verbose)
|
||||
|
||||
def _on_step(self) -> bool:
|
||||
return True
|
||||
|
||||
def _on_rollout_end(self):
|
||||
try:
|
||||
# Access the rollout plot directly from the base environment
|
||||
figures = [env.unwrapped.get_rollout_plot() for env in self.training_env.envs]
|
||||
except AttributeError:
|
||||
# If the above fails, try getting it from the wrappers
|
||||
figures = self.training_env.env_method("get_wrapper_attr", "get_rollout_plot")
|
||||
|
||||
for i, fig in enumerate(figures):
|
||||
self.logger.record(
|
||||
f"rollout/env_{i}",
|
||||
Figure(fig, close=True),
|
||||
exclude=("stdout", "log", "json", "csv")
|
||||
)
|
||||
plt.close(fig)
|
||||
return True
|
|
@ -7,10 +7,10 @@
|
|||
-r docs/requirements-docs.txt
|
||||
|
||||
coveralls==4.0.1
|
||||
ruff==0.6.4
|
||||
ruff==0.6.5
|
||||
mypy==1.11.2
|
||||
pre-commit==3.8.0
|
||||
pytest==8.3.2
|
||||
pytest==8.3.3
|
||||
pytest-asyncio==0.24.0
|
||||
pytest-cov==5.0.0
|
||||
pytest-mock==3.14.0
|
||||
|
@ -27,6 +27,6 @@ nbconvert==7.16.4
|
|||
# mypy types
|
||||
types-cachetools==5.5.0.20240820
|
||||
types-filelock==3.2.7
|
||||
types-requests==2.32.0.20240907
|
||||
types-requests==2.32.0.20240914
|
||||
types-tabulate==0.9.0.20240106
|
||||
types-python-dateutil==2.9.0.20240906
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
-r requirements-plot.txt
|
||||
|
||||
# Required for freqai
|
||||
scikit-learn==1.5.1
|
||||
scikit-learn==1.5.2
|
||||
joblib==1.4.2
|
||||
catboost==1.2.7; 'arm' not in platform_machine
|
||||
# Pin Matplotlib - it's depended on by catboost
|
||||
|
|
|
@ -4,6 +4,6 @@
|
|||
# Required for hyperopt
|
||||
scipy==1.14.1; python_version >= "3.10"
|
||||
scipy==1.13.1; python_version < "3.10"
|
||||
scikit-learn==1.5.1
|
||||
scikit-learn==1.5.2
|
||||
ft-scikit-optimize==0.9.2
|
||||
filelock==3.16.0
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Include all requirements to run the bot.
|
||||
-r requirements.txt
|
||||
|
||||
plotly==5.24.0
|
||||
plotly==5.24.1
|
||||
|
|
|
@ -4,7 +4,7 @@ bottleneck==1.4.0
|
|||
numexpr==2.10.1
|
||||
pandas-ta==0.3.14b
|
||||
|
||||
ccxt==4.3.98
|
||||
ccxt==4.4.3
|
||||
cryptography==42.0.8; platform_machine == 'armv7l'
|
||||
cryptography==43.0.1; platform_machine != 'armv7l'
|
||||
aiohttp==3.10.5
|
||||
|
@ -15,7 +15,7 @@ httpx>=0.24.1
|
|||
humanize==4.10.0
|
||||
cachetools==5.5.0
|
||||
requests==2.32.3
|
||||
urllib3==2.2.2
|
||||
urllib3==2.2.3
|
||||
jsonschema==4.23.0
|
||||
TA-Lib==0.4.32
|
||||
technical==1.4.4
|
||||
|
@ -26,7 +26,7 @@ jinja2==3.1.4
|
|||
tables==3.9.1; python_version < "3.10"
|
||||
tables==3.10.1; python_version >= "3.10"
|
||||
joblib==1.4.2
|
||||
rich==13.8.0
|
||||
rich==13.8.1
|
||||
pyarrow==17.0.0; platform_machine != 'armv7l'
|
||||
|
||||
# find first, C search in arrays
|
||||
|
@ -41,8 +41,8 @@ orjson==3.10.7
|
|||
sdnotify==0.3.2
|
||||
|
||||
# API Server
|
||||
fastapi==0.114.0
|
||||
pydantic==2.9.0
|
||||
fastapi==0.114.2
|
||||
pydantic==2.9.1
|
||||
uvicorn==0.30.6
|
||||
pyjwt==2.9.0
|
||||
aiofiles==24.1.0
|
||||
|
@ -53,7 +53,7 @@ questionary==2.0.1
|
|||
prompt-toolkit==3.0.36
|
||||
# Extensions to datetime library
|
||||
python-dateutil==2.9.0.post0
|
||||
pytz==2024.1
|
||||
pytz==2024.2
|
||||
|
||||
#Futures
|
||||
schedule==1.2.2
|
||||
|
|
Loading…
Reference in New Issue
Block a user