expose environment reward parameters to the user config

This commit is contained in:
robcaulk 2022-08-21 20:33:09 +02:00
parent d88a0dbf82
commit 29f0e01c4a
5 changed files with 28 additions and 32 deletions

View File

@ -92,7 +92,8 @@
"policy_type": "MlpPolicy",
"model_reward_parameters": {
"rr": 1,
"profit_aim": 0.02
"profit_aim": 0.02,
"win_reward_factor": 2
}
}
},

View File

@ -42,9 +42,10 @@ class Base5ActionRLEnv(gym.Env):
def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(),
reward_kwargs: dict = {}, window_size=10, starting_point=True,
id: str = 'baseenv-1', seed: int = 1):
id: str = 'baseenv-1', seed: int = 1, config: dict = {}):
assert df.ndim == 2
self.rl_config = config['freqai']['rl_config']
self.id = id
self.seed(seed)
self.reset_env(df, prices, window_size, reward_kwargs, starting_point)
@ -268,7 +269,7 @@ class Base5ActionRLEnv(gym.Env):
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
factor = 1
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
factor = 2
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
return float((np.log(current_price) - np.log(last_trade_price)) * factor)
# close short
@ -277,7 +278,7 @@ class Base5ActionRLEnv(gym.Env):
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
factor = 1
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
factor = 2
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
return float(np.log(last_trade_price) - np.log(current_price) * factor)
return 0.

View File

@ -110,10 +110,10 @@ class BaseReinforcementLearningModel(IFreqaiModel):
# environments
if not self.train_env:
self.train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH,
reward_kwargs=self.reward_params)
reward_kwargs=self.reward_params, config=self.config)
self.eval_env = Monitor(MyRLEnv(df=test_df, prices=prices_test,
window_size=self.CONV_WIDTH,
reward_kwargs=self.reward_params), ".")
reward_kwargs=self.reward_params, config=self.config), ".")
self.eval_callback = EvalCallback(self.eval_env, deterministic=True,
render=False, eval_freq=eval_freq,
best_model_save_path=dk.data_path)
@ -239,7 +239,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
def make_env(env_id: str, rank: int, seed: int, train_df, price,
reward_params, window_size, monitor=False) -> Callable:
reward_params, window_size, monitor=False, config={}) -> Callable:
"""
Utility function for multiprocessed env.
@ -252,7 +252,7 @@ def make_env(env_id: str, rank: int, seed: int, train_df, price,
def _init() -> gym.Env:
env = MyRLEnv(df=train_df, prices=price, window_size=window_size,
reward_kwargs=reward_params, id=env_id, seed=seed + rank)
reward_kwargs=reward_params, id=env_id, seed=seed + rank, config=config)
if monitor:
env = Monitor(env, ".")
return env
@ -277,16 +277,16 @@ class MyRLEnv(Base5ActionRLEnv):
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
factor = 1
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
factor = 2
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
return float((np.log(current_price) - np.log(last_trade_price)) * factor)
# close short
if action == Actions.Short_exit.value and self._position == Positions.Short:
last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
factor = 1
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
factor = 2
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
return float(np.log(last_trade_price) - np.log(current_price) * factor)
return 0.

View File

@ -57,26 +57,20 @@ class MyRLEnv(Base5ActionRLEnv):
# close long
if action == Actions.Long_exit.value and self._position == Positions.Long:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(current_price) - np.log(last_trade_price))
if action == Actions.Long_exit.value and self._position == Positions.Long:
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_buy_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_sell_fee(self.prices.iloc[self._current_tick].open)
return float((np.log(current_price) - np.log(last_trade_price)) * 2)
last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_exit_fee(self.prices.iloc[self._current_tick].open)
factor = 1
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
return float((np.log(current_price) - np.log(last_trade_price)) * factor)
# close short
if action == Actions.Short_exit.value and self._position == Positions.Short:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
return float(np.log(last_trade_price) - np.log(current_price))
if action == Actions.Short_exit.value and self._position == Positions.Short:
if self.close_trade_profit[-1] > self.profit_aim * self.rr:
last_trade_price = self.add_sell_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_buy_fee(self.prices.iloc[self._current_tick].open)
return float((np.log(last_trade_price) - np.log(current_price)) * 2)
last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open)
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
factor = 1
if self.close_trade_profit and self.close_trade_profit[-1] > self.profit_aim * self.rr:
factor = self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
return float(np.log(last_trade_price) - np.log(current_price) * factor)
return 0.

View File

@ -62,12 +62,12 @@ class ReinforcementLearner_multiproc(BaseReinforcementLearningModel):
env_id = "train_env"
num_cpu = int(self.freqai_info["data_kitchen_thread_count"] / 2)
self.train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train,
self.reward_params, self.CONV_WIDTH) for i
self.reward_params, self.CONV_WIDTH, config=self.config) for i
in range(num_cpu)])
eval_env_id = 'eval_env'
self.eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test,
self.reward_params, self.CONV_WIDTH, monitor=True) for i
self.reward_params, self.CONV_WIDTH, monitor=True, config=self.config) for i
in range(num_cpu)])
self.eval_callback = EvalCallback(self.eval_env, deterministic=True,
render=False, eval_freq=eval_freq,