From 29f0e01c4a50e7b955a100ef49b47049eff3737a Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sun, 21 Aug 2022 20:33:09 +0200 Subject: [PATCH] expose environment reward parameters to the user config --- config_examples/config_freqai-rl.example.json | 3 +- freqtrade/freqai/RL/Base5ActionRLEnv.py | 7 +++-- .../RL/BaseReinforcementLearningModel.py | 16 +++++----- .../prediction_models/ReinforcementLearner.py | 30 ++++++++----------- .../ReinforcementLearner_multiproc.py | 4 +-- 5 files changed, 28 insertions(+), 32 deletions(-) diff --git a/config_examples/config_freqai-rl.example.json b/config_examples/config_freqai-rl.example.json index fa08cdd60..07ddb04d3 100644 --- a/config_examples/config_freqai-rl.example.json +++ b/config_examples/config_freqai-rl.example.json @@ -92,7 +92,8 @@ "policy_type": "MlpPolicy", "model_reward_parameters": { "rr": 1, - "profit_aim": 0.02 + "profit_aim": 0.02, + "win_reward_factor": 2 } } }, diff --git a/freqtrade/freqai/RL/Base5ActionRLEnv.py b/freqtrade/freqai/RL/Base5ActionRLEnv.py index 94de259a9..84a82c5de 100644 --- a/freqtrade/freqai/RL/Base5ActionRLEnv.py +++ b/freqtrade/freqai/RL/Base5ActionRLEnv.py @@ -42,9 +42,10 @@ class Base5ActionRLEnv(gym.Env): def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), reward_kwargs: dict = {}, window_size=10, starting_point=True, - id: str = 'baseenv-1', seed: int = 1): + id: str = 'baseenv-1', seed: int = 1, config: dict = {}): assert df.ndim == 2 + self.rl_config = config['freqai']['rl_config'] self.id = id self.seed(seed) self.reset_env(df, prices, window_size, reward_kwargs, starting_point) @@ -268,7 +269,7 @@ class Base5ActionRLEnv(gym.Env): current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open) factor = 1 if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr: - factor = 2 + factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) return float((np.log(current_price) - np.log(last_trade_price)) * factor) # close short @@ -277,7 +278,7 @@ class Base5ActionRLEnv(gym.Env): current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open) factor = 1 if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr: - factor = 2 + factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) return float(np.log(last_trade_price) - np.log(current_price) * factor) return 0. diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py index bb858f3cf..0618a91ed 100644 --- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -110,10 +110,10 @@ class BaseReinforcementLearningModel(IFreqaiModel): # environments if not self.train_env: self.train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH, - reward_kwargs=self.reward_params) + reward_kwargs=self.reward_params, config=self.config) self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test, window_size=self.CONV_WIDTH, - reward_kwargs=self.reward_params), ".") + reward_kwargs=self.reward_params, config=self.config), ".") self.eval_callback = EvalCallback(self.eval_env, deterministic=True, render=False, eval_freq=eval_freq, best_model_save_path=dk.data_path) @@ -239,7 +239,7 @@ class BaseReinforcementLearningModel(IFreqaiModel): def make_env(env_id: str, rank: int, seed: int, train_df, price, - reward_params, window_size, monitor=False) -> Callable: + reward_params, window_size, monitor=False, config={}) -> Callable: """ Utility function for multiprocessed env. @@ -252,7 +252,7 @@ def make_env(env_id: str, rank: int, seed: int, train_df, price, def _init() -> gym.Env: env = MyRLEnv(df=train_df, prices=price, window_size=window_size, - reward_kwargs=reward_params, id=env_id, seed=seed + rank) + reward_kwargs=reward_params, id=env_id, seed=seed + rank, config=config) if monitor: env = Monitor(env, ".") return env @@ -277,16 +277,16 @@ class MyRLEnv(Base5ActionRLEnv): current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open) factor = 1 if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr: - factor = 2 + factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) return float((np.log(current_price) - np.log(last_trade_price)) * factor) # close short if action == Actions.Short_exit.value and self._position == Positions.Short: - last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open) + last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open) factor = 1 if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr: - factor = 2 + factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) return float(np.log(last_trade_price) - np.log(current_price) * factor) return 0. diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner.py b/freqtrade/freqai/prediction_models/ReinforcementLearner.py index 2faa6eb3a..5f22971e1 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearner.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearner.py @@ -57,26 +57,20 @@ class MyRLEnv(Base5ActionRLEnv): # close long if action == Actions.Long_exit.value and self._position == Positions.Long: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(current_price) - np.log(last_trade_price)) - - if action == Actions.Long_exit.value and self._position == Positions.Long: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open) - return float((np.log(current_price) - np.log(last_trade_price)) * 2) + last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open) + factor = 1 + if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr: + factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) + return float((np.log(current_price) - np.log(last_trade_price)) * factor) # close short if action == Actions.Short_exit.value and self._position == Positions.Short: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float(np.log(last_trade_price) - np.log(current_price)) - - if action == Actions.Short_exit.value and self._position == Positions.Short: - if self.close_trade_profit[-1] > self.profit_aim * self.rr: - last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open) - current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open) - return float((np.log(last_trade_price) - np.log(current_price)) * 2) + last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open) + current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open) + factor = 1 + if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr: + factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) + return float(np.log(last_trade_price) - np.log(current_price) * factor) return 0. diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py b/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py index 1854bb1a5..ee9a407c9 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py @@ -62,12 +62,12 @@ class ReinforcementLearner_multiproc(BaseReinforcementLearningModel): env_id = "train_env" num_cpu = int(self.freqai_info["data_kitchen_thread_count"] / 2) self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, - self.reward_params, self.CONV_WIDTH) for i + self.reward_params, self.CONV_WIDTH, config=self.config) for i in range(num_cpu)]) eval_env_id = 'eval_env' self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, - self.reward_params, self.CONV_WIDTH, monitor=True) for i + self.reward_params, self.CONV_WIDTH, monitor=True, config=self.config) for i in range(num_cpu)]) self.eval_callback = EvalCallback(self.eval_env, deterministic=True, render=False, eval_freq=eval_freq,