From 936ca244821960ed0b7fc8ae92588f9819aaffd0 Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Wed, 5 Oct 2022 15:58:54 +0200
Subject: [PATCH] separate RL install from general FAI install, update docs

---
 docs/freqai-reinforcement-learning.md                 | 11 +++++++----
 .../freqai/prediction_models/ReinforcementLearner.py  |  6 ++----
 requirements-freqai-rl.txt                            |  8 ++++++++
 requirements-freqai.txt                               | 10 ++++------
 setup.sh                                              |  9 ++++++++-
 tests/freqai/test_freqai_interface.py                 |  1 -
 6 files changed, 29 insertions(+), 16 deletions(-)
 create mode 100644 requirements-freqai-rl.txt

diff --git a/docs/freqai-reinforcement-learning.md b/docs/freqai-reinforcement-learning.md
index 87a4a7646..8a390ac34 100644
--- a/docs/freqai-reinforcement-learning.md
+++ b/docs/freqai-reinforcement-learning.md
@@ -1,5 +1,8 @@
 # Reinforcement Learning
 
+!!! Note
+    Reinforcement learning dependencies include large packages such as `torch`, which should be explicitly requested during `./setup.sh -i` by answering "y" to the question "Do you also want dependencies for freqai-rl (~700mb additional space required) [y/N]?" Users who prefer docker should ensure they use the docker image appended with `_freqaiRL`. 
+
 Setting up and running a Reinforcement Learning model is the same as running a Regressor or Classifier. The same two flags, `--freqaimodel` and `--strategy`, must be defined on the command line:
 
 ```bash
@@ -143,7 +146,7 @@ As users begin to modify the strategy and the prediction model, they will quickl
             if not self._is_valid(action):
                 return -2
             pnl = self.get_unrealized_profit()
-            rew = np.sign(pnl) * (pnl + 1)
+
             factor = 100
             # reward agent for entering trades
             if action in (Actions.Long_enter.value, Actions.Short_enter.value) \
@@ -166,12 +169,12 @@ As users begin to modify the strategy and the prediction model, they will quickl
             if action == Actions.Long_exit.value and self._position == Positions.Long:
                 if pnl > self.profit_aim * self.rr:
                     factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
-                return float(rew * factor)
+                return float(pnl * factor)
             # close short
             if action == Actions.Short_exit.value and self._position == Positions.Short:
                 if pnl > self.profit_aim * self.rr:
                     factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
-                return float(rew * factor)
+                return float(pnl * factor)
             return 0.
 ```
 
@@ -194,6 +197,6 @@ cd freqtrade
 tensorboard --logdir user_data/models/unique-id
 ```
 
-where `unique-id` is the `identifier` set in the `freqai` configuration file. 
+where `unique-id` is the `identifier` set in the `freqai` configuration file. This command must be run in a separate shell if the user wishes to view the output in their browser at 127.0.0.1:6060 (6060 is the default port used by Tensorboard).
 
 ![tensorboard](assets/tensorboard.png)
diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner.py b/freqtrade/freqai/prediction_models/ReinforcementLearner.py
index 00afd61d4..48519c34c 100644
--- a/freqtrade/freqai/prediction_models/ReinforcementLearner.py
+++ b/freqtrade/freqai/prediction_models/ReinforcementLearner.py
@@ -2,7 +2,6 @@ import logging
 from pathlib import Path
 from typing import Any, Dict
 
-import numpy as np
 import torch as th
 
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
@@ -81,7 +80,6 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
                 return -2
 
             pnl = self.get_unrealized_profit()
-            rew = np.sign(pnl) * (pnl + 1)
             factor = 100
 
             # reward agent for entering trades
@@ -109,12 +107,12 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
             if action == Actions.Long_exit.value and self._position == Positions.Long:
                 if pnl > self.profit_aim * self.rr:
                     factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
-                return float(rew * factor)
+                return float(pnl * factor)
 
             # close short
             if action == Actions.Short_exit.value and self._position == Positions.Short:
                 if pnl > self.profit_aim * self.rr:
                     factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
-                return float(rew * factor)
+                return float(pnl * factor)
 
             return 0.
diff --git a/requirements-freqai-rl.txt b/requirements-freqai-rl.txt
new file mode 100644
index 000000000..e29df34ac
--- /dev/null
+++ b/requirements-freqai-rl.txt
@@ -0,0 +1,8 @@
+# Include all requirements to run the bot.
+-r requirements-freqai.txt
+
+# Required for freqai-rl
+torch==1.12.1
+stable-baselines3==1.6.1
+gym==0.26.2
+sb3-contrib==1.6.1
diff --git a/requirements-freqai.txt b/requirements-freqai.txt
index dae13ced0..d4a741c29 100644
--- a/requirements-freqai.txt
+++ b/requirements-freqai.txt
@@ -1,5 +1,5 @@
 # Include all requirements to run the bot.
--r requirements-hyperopt.txt
+-r requirements.txt
 
 # Required for freqai
 scikit-learn==1.1.2
@@ -8,8 +8,6 @@ catboost==1.1; platform_machine != 'aarch64'
 lightgbm==3.3.2
 xgboost==1.6.2
 torch==1.12.1
-stable-baselines3==1.6.0
-gym==0.21.0
-tensorboard==2.9.1
-optuna==2.10.1
-sb3-contrib==1.6.0
\ No newline at end of file
+stable-baselines3==1.6.1
+gym==0.26.2
+sb3-contrib==1.6.1
diff --git a/setup.sh b/setup.sh
index 1a4a285a3..f57e820af 100755
--- a/setup.sh
+++ b/setup.sh
@@ -78,14 +78,21 @@ function updateenv() {
     fi
 
     REQUIREMENTS_FREQAI=""
+    REQUIREMENTS_FREQAI_RL=""
     read -p "Do you want to install dependencies for freqai [y/N]? "
     dev=$REPLY
     if [[ $REPLY =~ ^[Yy]$ ]]
     then
         REQUIREMENTS_FREQAI="-r requirements-freqai.txt"
+        read -p "Do you also want dependencies for freqai-rl (~700mb additional space required) [y/N]? "
+        dev=$REPLY
+        if [[ $REPLY =~ ^[Yy]$ ]]
+        then
+            REQUIREMENTS_FREQAI="-r requirements-freqai-rl.txt"
+        fi
     fi
 
-    ${PYTHON} -m pip install --upgrade -r ${REQUIREMENTS} ${REQUIREMENTS_HYPEROPT} ${REQUIREMENTS_PLOT} ${REQUIREMENTS_FREQAI}
+    ${PYTHON} -m pip install --upgrade -r ${REQUIREMENTS} ${REQUIREMENTS_HYPEROPT} ${REQUIREMENTS_PLOT} ${REQUIREMENTS_FREQAI} ${REQUIREMENTS_FREQAI_RL}
     if [ $? -ne 0 ]; then
         echo "Failed installing dependencies"
         exit 1
diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py
index 1f05f881e..b3e61b590 100644
--- a/tests/freqai/test_freqai_interface.py
+++ b/tests/freqai/test_freqai_interface.py
@@ -8,7 +8,6 @@ import pytest
 from freqtrade.configuration import TimeRange
 from freqtrade.data.dataprovider import DataProvider
 from freqtrade.enums import RunMode
-from freqtrade.enums import RunMode
 from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
 from freqtrade.freqai.utils import download_all_data_for_training, get_required_data_timerange
 from freqtrade.optimize.backtesting import Backtesting