add pair to environment for access inside calculate_reward

2024-11-10 02:12:01 +00:00 · 2023-02-10 14:45:50 +01:00 · 2023-02-10 14:45:50 +01:00 · 4fc0edb8b7
commit 4fc0edb8b7
parent d47d8c135b
5 changed files with 48 additions and 25 deletions
--- a/docs/freqai-reinforcement-learning.md
+++ b/docs/freqai-reinforcement-learning.md
@ -175,10 +175,20 @@ As you begin to modify the strategy and the prediction model, you will quickly r
                pnl = self.get_unrealized_profit()

                factor = 100
-                # reward agent for entering trades
-                if action in (Actions.Long_enter.value, Actions.Short_enter.value) \
-                        and self._position == Positions.Neutral:
-                    return 25
+
+            # you can use feature values from dataframe
+            rsi_now = self.df[f"%-rsi-period-10_shift-1_{self.pair}_"
+                              f"{self.config['timeframe']}"].iloc[self._current_tick]
+
+            # reward agent for entering trades
+            if (action in (Actions.Long_enter.value, Actions.Short_enter.value)
+                    and self._position == Positions.Neutral):
+                if rsi_now < 40:
+                    factor = 40 / rsi_now
+                else:
+                    factor = 1
+                return 25 * factor
+
                # discourage agent from not entering trades
                if action == Actions.Neutral.value and self._position == Positions.Neutral:
                    return -1
--- a/freqtrade/freqai/RL/BaseEnvironment.py
+++ b/freqtrade/freqai/RL/BaseEnvironment.py
@ -45,7 +45,7 @@ class BaseEnvironment(gym.Env):
    def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(),
                 reward_kwargs: dict = {}, window_size=10, starting_point=True,
                 id: str = 'baseenv-1', seed: int = 1, config: dict = {}, live: bool = False,
-                 fee: float = 0.0015, can_short: bool = False):
+                 fee: float = 0.0015, can_short: bool = False, pair: str = ""):
        """
        Initializes the training/eval environment.
        :param df: dataframe of features
@ -60,12 +60,13 @@ class BaseEnvironment(gym.Env):
        :param fee: The fee to use for environmental interactions.
        :param can_short: Whether or not the environment can short
        """
-        self.config = config
-        self.rl_config = config['freqai']['rl_config']
-        self.add_state_info = self.rl_config.get('add_state_info', False)
-        self.id = id
-        self.max_drawdown = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8)
-        self.compound_trades = config['stake_amount'] == 'unlimited'
+        self.config: dict = config
+        self.rl_config: dict = config['freqai']['rl_config']
+        self.add_state_info: bool = self.rl_config.get('add_state_info', False)
+        self.id: str = id
+        self.max_drawdown: float = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8)
+        self.compound_trades: bool = config['stake_amount'] == 'unlimited'
+        self.pair: str = pair
        if self.config.get('fee', None) is not None:
            self.fee = self.config['fee']
        else:
@ -74,8 +75,8 @@ class BaseEnvironment(gym.Env):
        # set here to default 5Ac, but all children envs can override this
        self.actions: Type[Enum] = BaseActions
        self.tensorboard_metrics: dict = {}
-        self.can_short = can_short
-        self.live = live
+        self.can_short: bool = can_short
+        self.live: bool = live
        if not self.live and self.add_state_info:
            self.add_state_info = False
            logger.warning("add_state_info is not available in backtesting. Deactivating.")
@ -93,13 +94,13 @@ class BaseEnvironment(gym.Env):
        :param reward_kwargs: extra config settings assigned by user in `rl_config`
        :param starting_point: start at edge of window or not
        """
-        self.df = df
-        self.signal_features = self.df
-        self.prices = prices
-        self.window_size = window_size
-        self.starting_point = starting_point
-        self.rr = reward_kwargs["rr"]
-        self.profit_aim = reward_kwargs["profit_aim"]
+        self.df: DataFrame = df
+        self.signal_features: DataFrame = self.df
+        self.prices: DataFrame = prices
+        self.window_size: int = window_size
+        self.starting_point: bool = starting_point
+        self.rr: float = reward_kwargs["rr"]
+        self.profit_aim: float = reward_kwargs["profit_aim"]

        # # spaces
        if self.add_state_info:
--- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
+++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py
@ -143,7 +143,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
        train_df = data_dictionary["train_features"]
        test_df = data_dictionary["test_features"]

-        env_info = self.pack_env_dict()
+        env_info = self.pack_env_dict(dk.pair)

        self.train_env = self.MyRLEnv(df=train_df,
                                      prices=prices_train,
@ -158,7 +158,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
        actions = self.train_env.get_actions()
        self.tensorboard_callback = TensorboardCallback(verbose=1, actions=actions)

-    def pack_env_dict(self) -> Dict[str, Any]:
+    def pack_env_dict(self, pair: str) -> Dict[str, Any]:
        """
        Create dictionary of environment arguments
        """
@ -166,7 +166,8 @@ class BaseReinforcementLearningModel(IFreqaiModel):
                    "reward_kwargs": self.reward_params,
                    "config": self.config,
                    "live": self.live,
-                    "can_short": self.can_short}
+                    "can_short": self.can_short,
+                    "pair": pair}
        if self.data_provider:
            env_info["fee"] = self.data_provider._exchange \
                .get_fee(symbol=self.data_provider.current_whitelist()[0])  # type: ignore
@ -363,10 +364,19 @@ class BaseReinforcementLearningModel(IFreqaiModel):
            pnl = self.get_unrealized_profit()
            factor = 100.

+            # you can use feature values from dataframe
+            rsi_now = self.df[f"%-rsi-period-10_shift-1_{self.pair}_"
+                              f"{self.config['timeframe']}"].iloc[self._current_tick]
+
            # reward agent for entering trades
            if (action in (Actions.Long_enter.value, Actions.Short_enter.value)
                    and self._position == Positions.Neutral):
-                return 25
+                if rsi_now < 40:
+                    factor = 40 / rsi_now
+                else:
+                    factor = 1
+                return 25 * factor
+
            # discourage agent from not entering trades
            if action == Actions.Neutral.value and self._position == Positions.Neutral:
                return -1
--- a/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py
+++ b/freqtrade/freqai/prediction_models/ReinforcementLearner_multiproc.py
@ -34,7 +34,7 @@ class ReinforcementLearner_multiproc(ReinforcementLearner):
        train_df = data_dictionary["train_features"]
        test_df = data_dictionary["test_features"]

-        env_info = self.pack_env_dict()
+        env_info = self.pack_env_dict(dk.pair)

        env_id = "train_env"
        self.train_env = SubprocVecEnv([make_env(self.MyRLEnv, env_id, i, 1,
--- a/setup.cfg
+++ b/setup.cfg
@ -49,3 +49,5 @@ exclude =
    __pycache__,
    .eggs,
    user_data,
+per-file-ignores =
+    freqtrade/freqai/RL/BaseReinforcementLearningModel.py:C901