From 936ca244821960ed0b7fc8ae92588f9819aaffd0 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Wed, 5 Oct 2022 15:58:54 +0200 Subject: [PATCH] separate RL install from general FAI install, update docs --- docs/freqai-reinforcement-learning.md | 11 +++++++---- .../freqai/prediction_models/ReinforcementLearner.py | 6 ++---- requirements-freqai-rl.txt | 8 ++++++++ requirements-freqai.txt | 10 ++++------ setup.sh | 9 ++++++++- tests/freqai/test_freqai_interface.py | 1 - 6 files changed, 29 insertions(+), 16 deletions(-) create mode 100644 requirements-freqai-rl.txt diff --git a/docs/freqai-reinforcement-learning.md b/docs/freqai-reinforcement-learning.md index 87a4a7646..8a390ac34 100644 --- a/docs/freqai-reinforcement-learning.md +++ b/docs/freqai-reinforcement-learning.md @@ -1,5 +1,8 @@ # Reinforcement Learning +!!! Note + Reinforcement learning dependencies include large packages such as `torch`, which should be explicitly requested during `./setup.sh -i` by answering "y" to the question "Do you also want dependencies for freqai-rl (~700mb additional space required) [y/N]?" Users who prefer docker should ensure they use the docker image appended with `_freqaiRL`. + Setting up and running a Reinforcement Learning model is the same as running a Regressor or Classifier. The same two flags, `--freqaimodel` and `--strategy`, must be defined on the command line: ```bash @@ -143,7 +146,7 @@ As users begin to modify the strategy and the prediction model, they will quickl if not self._is_valid(action): return -2 pnl = self.get_unrealized_profit() - rew = np.sign(pnl) * (pnl + 1) + factor = 100 # reward agent for entering trades if action in (Actions.Long_enter.value, Actions.Short_enter.value) \ @@ -166,12 +169,12 @@ As users begin to modify the strategy and the prediction model, they will quickl if action == Actions.Long_exit.value and self._position == Positions.Long: if pnl > self.profit_aim * self.rr: factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) - return float(rew * factor) + return float(pnl * factor) # close short if action == Actions.Short_exit.value and self._position == Positions.Short: if pnl > self.profit_aim * self.rr: factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) - return float(rew * factor) + return float(pnl * factor) return 0. ``` @@ -194,6 +197,6 @@ cd freqtrade tensorboard --logdir user_data/models/unique-id ``` -where `unique-id` is the `identifier` set in the `freqai` configuration file. +where `unique-id` is the `identifier` set in the `freqai` configuration file. This command must be run in a separate shell if the user wishes to view the output in their browser at 127.0.0.1:6060 (6060 is the default port used by Tensorboard). ![tensorboard](assets/tensorboard.png) diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearner.py b/freqtrade/freqai/prediction_models/ReinforcementLearner.py index 00afd61d4..48519c34c 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearner.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearner.py @@ -2,7 +2,6 @@ import logging from pathlib import Path from typing import Any, Dict -import numpy as np import torch as th from freqtrade.freqai.data_kitchen import FreqaiDataKitchen @@ -81,7 +80,6 @@ class ReinforcementLearner(BaseReinforcementLearningModel): return -2 pnl = self.get_unrealized_profit() - rew = np.sign(pnl) * (pnl + 1) factor = 100 # reward agent for entering trades @@ -109,12 +107,12 @@ class ReinforcementLearner(BaseReinforcementLearningModel): if action == Actions.Long_exit.value and self._position == Positions.Long: if pnl > self.profit_aim * self.rr: factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) - return float(rew * factor) + return float(pnl * factor) # close short if action == Actions.Short_exit.value and self._position == Positions.Short: if pnl > self.profit_aim * self.rr: factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) - return float(rew * factor) + return float(pnl * factor) return 0. diff --git a/requirements-freqai-rl.txt b/requirements-freqai-rl.txt new file mode 100644 index 000000000..e29df34ac --- /dev/null +++ b/requirements-freqai-rl.txt @@ -0,0 +1,8 @@ +# Include all requirements to run the bot. +-r requirements-freqai.txt + +# Required for freqai-rl +torch==1.12.1 +stable-baselines3==1.6.1 +gym==0.26.2 +sb3-contrib==1.6.1 diff --git a/requirements-freqai.txt b/requirements-freqai.txt index dae13ced0..d4a741c29 100644 --- a/requirements-freqai.txt +++ b/requirements-freqai.txt @@ -1,5 +1,5 @@ # Include all requirements to run the bot. --r requirements-hyperopt.txt +-r requirements.txt # Required for freqai scikit-learn==1.1.2 @@ -8,8 +8,6 @@ catboost==1.1; platform_machine != 'aarch64' lightgbm==3.3.2 xgboost==1.6.2 torch==1.12.1 -stable-baselines3==1.6.0 -gym==0.21.0 -tensorboard==2.9.1 -optuna==2.10.1 -sb3-contrib==1.6.0 \ No newline at end of file +stable-baselines3==1.6.1 +gym==0.26.2 +sb3-contrib==1.6.1 diff --git a/setup.sh b/setup.sh index 1a4a285a3..f57e820af 100755 --- a/setup.sh +++ b/setup.sh @@ -78,14 +78,21 @@ function updateenv() { fi REQUIREMENTS_FREQAI="" + REQUIREMENTS_FREQAI_RL="" read -p "Do you want to install dependencies for freqai [y/N]? " dev=$REPLY if [[ $REPLY =~ ^[Yy]$ ]] then REQUIREMENTS_FREQAI="-r requirements-freqai.txt" + read -p "Do you also want dependencies for freqai-rl (~700mb additional space required) [y/N]? " + dev=$REPLY + if [[ $REPLY =~ ^[Yy]$ ]] + then + REQUIREMENTS_FREQAI="-r requirements-freqai-rl.txt" + fi fi - ${PYTHON} -m pip install --upgrade -r ${REQUIREMENTS} ${REQUIREMENTS_HYPEROPT} ${REQUIREMENTS_PLOT} ${REQUIREMENTS_FREQAI} + ${PYTHON} -m pip install --upgrade -r ${REQUIREMENTS} ${REQUIREMENTS_HYPEROPT} ${REQUIREMENTS_PLOT} ${REQUIREMENTS_FREQAI} ${REQUIREMENTS_FREQAI_RL} if [ $? -ne 0 ]; then echo "Failed installing dependencies" exit 1 diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 1f05f881e..b3e61b590 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -8,7 +8,6 @@ import pytest from freqtrade.configuration import TimeRange from freqtrade.data.dataprovider import DataProvider from freqtrade.enums import RunMode -from freqtrade.enums import RunMode from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.utils import download_all_data_for_training, get_required_data_timerange from freqtrade.optimize.backtesting import Backtesting