improve price df handling to enable backtesting

This commit is contained in:
robcaulk 2022-08-17 12:51:14 +02:00
parent 2080ff86ed
commit b90da46b1b
8 changed files with 77 additions and 59 deletions

View File

@ -73,16 +73,12 @@
"5m", "5m",
"30m" "30m"
], ],
"label_period_candles": 80,
"include_shifted_candles": 0, "include_shifted_candles": 0,
"DI_threshold": 0,
"weight_factor": 0.9, "weight_factor": 0.9,
"principal_component_analysis": false, "principal_component_analysis": false,
"use_SVM_to_remove_outliers": false, "use_SVM_to_remove_outliers": false,
"svm_params": {"shuffle": true, "nu": 0.1},
"stratify_training_data": 0,
"indicator_max_period_candles": 10, "indicator_max_period_candles": 10,
"indicator_periods_candles": [5] "indicator_periods_candles": [5, 10]
}, },
"data_split_parameters": { "data_split_parameters": {
"test_size": 0.5, "test_size": 0.5,
@ -90,7 +86,6 @@
"shuffle": false "shuffle": false
}, },
"model_training_parameters": { "model_training_parameters": {
"n_steps": 2048,
"ent_coef": 0.005, "ent_coef": 0.005,
"learning_rate": 0.000025, "learning_rate": 0.000025,
"batch_size": 256, "batch_size": 256,

View File

@ -10,8 +10,11 @@ from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.freqai_interface import IFreqaiModel from freqtrade.freqai.freqai_interface import IFreqaiModel
from freqtrade.freqai.RL.Base3ActionRLEnv import Base3ActionRLEnv, Actions, Positions from freqtrade.freqai.RL.Base3ActionRLEnv import Base3ActionRLEnv, Actions, Positions
from freqtrade.persistence import Trade from freqtrade.persistence import Trade
import torch.multiprocessing
import torch as th
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
th.set_num_threads(8)
torch.multiprocessing.set_sharing_strategy('file_system')
class BaseReinforcementLearningModel(IFreqaiModel): class BaseReinforcementLearningModel(IFreqaiModel):
@ -46,6 +49,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
dk.fit_labels() # useless for now, but just satiating append methods dk.fit_labels() # useless for now, but just satiating append methods
# normalize all data based on train_dataset only # normalize all data based on train_dataset only
prices_train, prices_test = self.build_ohlc_price_dataframes(dk.data_dictionary, pair, dk)
data_dictionary = dk.normalize_data(data_dictionary) data_dictionary = dk.normalize_data(data_dictionary)
# optional additional data cleaning/analysis # optional additional data cleaning/analysis
@ -56,7 +60,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
) )
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
model = self.fit_rl(data_dictionary, pair, dk) model = self.fit_rl(data_dictionary, pair, dk, prices_train, prices_test)
if pair not in self.dd.historic_predictions: if pair not in self.dd.historic_predictions:
self.set_initial_historic_predictions( self.set_initial_historic_predictions(
@ -69,7 +73,8 @@ class BaseReinforcementLearningModel(IFreqaiModel):
return model return model
@abstractmethod @abstractmethod
def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen): def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen,
prices_train: DataFrame, prices_test: DataFrame):
""" """
Agent customizations and abstract Reinforcement Learning customizations Agent customizations and abstract Reinforcement Learning customizations
go in here. Abstract method, so this function must be overridden by go in here. Abstract method, so this function must be overridden by
@ -141,6 +146,34 @@ class BaseReinforcementLearningModel(IFreqaiModel):
return output return output
def build_ohlc_price_dataframes(self, data_dictionary: dict,
pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame,
DataFrame]:
"""
Builds the train prices and test prices for the environment.
"""
coin = pair.split('/')[0]
train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"]
# price data for model training and evaluation
tf = self.config['timeframe']
ohlc_list = [f'%-{coin}raw_open_{tf}', f'%-{coin}raw_low_{tf}',
f'%-{coin}raw_high_{tf}', f'%-{coin}raw_close_{tf}']
rename_dict = {f'%-{coin}raw_open_{tf}': 'open', f'%-{coin}raw_low_{tf}': 'low',
f'%-{coin}raw_high_{tf}': ' high', f'%-{coin}raw_close_{tf}': 'close'}
prices_train = train_df.filter(ohlc_list, axis=1)
prices_train.rename(columns=rename_dict, inplace=True)
prices_train.reset_index(drop=True)
prices_test = test_df.filter(ohlc_list, axis=1)
prices_test.rename(columns=rename_dict, inplace=True)
prices_test.reset_index(drop=True)
return prices_train, prices_test
def set_initial_historic_predictions( def set_initial_historic_predictions(
self, df: DataFrame, model: Any, dk: FreqaiDataKitchen, pair: str self, df: DataFrame, model: Any, dk: FreqaiDataKitchen, pair: str
) -> None: ) -> None:

View File

@ -36,7 +36,7 @@ class ReinforcementLearningExample3ac(IStrategy):
stoploss = -0.05 stoploss = -0.05
use_exit_signal = True use_exit_signal = True
startup_candle_count: int = 300 startup_candle_count: int = 300
can_short = False can_short = True
linear_roi_offset = DecimalParameter( linear_roi_offset = DecimalParameter(
0.00, 0.02, default=0.005, space="sell", optimize=False, load=True 0.00, 0.02, default=0.005, space="sell", optimize=False, load=True
@ -76,8 +76,11 @@ class ReinforcementLearningExample3ac(IStrategy):
informative[f"%-{coin}pct-change"] = informative["close"].pct_change() informative[f"%-{coin}pct-change"] = informative["close"].pct_change()
informative[f"%-{coin}raw_volume"] = informative["volume"] informative[f"%-{coin}raw_volume"] = informative["volume"]
# Raw price currently necessary for RL models: # The following features are necessary for RL models
informative[f"%-{coin}raw_price"] = informative["close"] informative[f"%-{coin}raw_close"] = informative["close"]
informative[f"%-{coin}raw_open"] = informative["open"]
informative[f"%-{coin}raw_high"] = informative["high"]
informative[f"%-{coin}raw_low"] = informative["low"]
indicators = [col for col in informative if col.startswith("%")] indicators = [col for col in informative if col.startswith("%")]
# This loop duplicates and shifts all indicators to add a sense of recency to data # This loop duplicates and shifts all indicators to add a sense of recency to data
@ -101,9 +104,9 @@ class ReinforcementLearningExample3ac(IStrategy):
df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7 df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7
df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25 df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25
# user adds targets here by prepending them with &- (see convention below) # For RL, this is not a target, it is simply a filler until actions come out
# If user wishes to use multiple targets, a multioutput prediction model # of the model.
# needs to be used such as templates/CatboostPredictionMultiModel.py # for Base3ActionEnv, 2 is netural (hold)
df["&-action"] = 2 df["&-action"] = 2
return df return df

View File

@ -76,8 +76,11 @@ class ReinforcementLearningExample5ac(IStrategy):
informative[f"%-{coin}pct-change"] = informative["close"].pct_change() informative[f"%-{coin}pct-change"] = informative["close"].pct_change()
informative[f"%-{coin}raw_volume"] = informative["volume"] informative[f"%-{coin}raw_volume"] = informative["volume"]
# Raw price currently necessary for RL models: # The following features are necessary for RL models
informative[f"%-{coin}raw_price"] = informative["close"] informative[f"%-{coin}raw_close"] = informative["close"]
informative[f"%-{coin}raw_open"] = informative["open"]
informative[f"%-{coin}raw_high"] = informative["high"]
informative[f"%-{coin}raw_low"] = informative["low"]
indicators = [col for col in informative if col.startswith("%")] indicators = [col for col in informative if col.startswith("%")]
# This loop duplicates and shifts all indicators to add a sense of recency to data # This loop duplicates and shifts all indicators to add a sense of recency to data
@ -101,9 +104,8 @@ class ReinforcementLearningExample5ac(IStrategy):
df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7 df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7
df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25 df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25
# user adds targets here by prepending them with &- (see convention below) # For RL, there are no direct targets to set. This is filler (neutral)
# If user wishes to use multiple targets, a multioutput prediction model # until the agent sends an action.
# needs to be used such as templates/CatboostPredictionMultiModel.py
df["&-action"] = 2 df["&-action"] = 2
return df return df

View File

@ -3,9 +3,8 @@ from typing import Any, Dict # , Tuple
import numpy as np import numpy as np
# import numpy.typing as npt # import numpy.typing as npt
# import pandas as pd
import torch as th import torch as th
# from pandas import DataFrame from pandas import DataFrame
from stable_baselines3 import PPO from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import EvalCallback from stable_baselines3.common.callbacks import EvalCallback
from stable_baselines3.common.monitor import Monitor from stable_baselines3.common.monitor import Monitor
@ -22,7 +21,8 @@ class ReinforcementLearningPPO(BaseReinforcementLearningModel):
User created Reinforcement Learning Model prediction model. User created Reinforcement Learning Model prediction model.
""" """
def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen): def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen,
prices_train: DataFrame, prices_test: DataFrame):
agent_params = self.freqai_info['model_training_parameters'] agent_params = self.freqai_info['model_training_parameters']
reward_params = self.freqai_info['model_reward_parameters'] reward_params = self.freqai_info['model_reward_parameters']
@ -31,18 +31,12 @@ class ReinforcementLearningPPO(BaseReinforcementLearningModel):
eval_freq = agent_params.get("eval_cycles", 4) * len(test_df) eval_freq = agent_params.get("eval_cycles", 4) * len(test_df)
total_timesteps = agent_params["train_cycles"] * len(train_df) total_timesteps = agent_params["train_cycles"] * len(train_df)
# price data for model training and evaluation
price = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(len(train_df.index))
price_test = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(
len(test_df.index))
# environments # environments
train_env = MyRLEnv(df=train_df, prices=price, window_size=self.CONV_WIDTH, train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH,
reward_kwargs=reward_params) reward_kwargs=reward_params)
eval = MyRLEnv(df=test_df, prices=price_test, eval = MyRLEnv(df=test_df, prices=prices_test,
window_size=self.CONV_WIDTH, reward_kwargs=reward_params) window_size=self.CONV_WIDTH, reward_kwargs=reward_params)
eval_env = Monitor(eval, ".") eval_env = Monitor(eval, ".")
eval_env.reset()
path = dk.data_path path = dk.data_path
eval_callback = EvalCallback(eval_env, best_model_save_path=f"{path}/", eval_callback = EvalCallback(eval_env, best_model_save_path=f"{path}/",
@ -63,7 +57,7 @@ class ReinforcementLearningPPO(BaseReinforcementLearningModel):
callback=eval_callback callback=eval_callback
) )
best_model = PPO.load(dk.data_path / "best_model.zip") best_model = PPO.load(dk.data_path / "best_model")
print('Training finished!') print('Training finished!')

View File

@ -16,6 +16,7 @@ from freqtrade.freqai.RL.Base3ActionRLEnv import Base3ActionRLEnv, Actions, Posi
from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
import gym import gym
from pandas import DataFrame
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -47,7 +48,8 @@ class ReinforcementLearningPPO_multiproc(BaseReinforcementLearningModel):
User created Reinforcement Learning Model prediction model. User created Reinforcement Learning Model prediction model.
""" """
def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen): def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen,
prices_train: DataFrame, prices_test: DataFrame):
agent_params = self.freqai_info['model_training_parameters'] agent_params = self.freqai_info['model_training_parameters']
reward_params = self.freqai_info['model_reward_parameters'] reward_params = self.freqai_info['model_reward_parameters']
@ -57,18 +59,14 @@ class ReinforcementLearningPPO_multiproc(BaseReinforcementLearningModel):
total_timesteps = agent_params["train_cycles"] * len(train_df) total_timesteps = agent_params["train_cycles"] * len(train_df)
learning_rate = agent_params["learning_rate"] learning_rate = agent_params["learning_rate"]
# price data for model training and evaluation
price = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(len(train_df.index))
price_test = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(
len(test_df.index))
env_id = "train_env" env_id = "train_env"
th.set_num_threads(dk.thread_count)
num_cpu = int(dk.thread_count / 2) num_cpu = int(dk.thread_count / 2)
train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, price, reward_params, train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, reward_params,
self.CONV_WIDTH) for i in range(num_cpu)]) self.CONV_WIDTH) for i in range(num_cpu)])
eval_env_id = 'eval_env' eval_env_id = 'eval_env'
eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, price_test, reward_params, eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, reward_params,
self.CONV_WIDTH, monitor=True) for i in range(num_cpu)]) self.CONV_WIDTH, monitor=True) for i in range(num_cpu)])
path = dk.data_path path = dk.data_path
@ -92,7 +90,7 @@ class ReinforcementLearningPPO_multiproc(BaseReinforcementLearningModel):
callback=eval_callback callback=eval_callback
) )
best_model = PPO.load(dk.data_path / "best_model.zip") best_model = PPO.load(dk.data_path / "best_model")
print('Training finished!') print('Training finished!')
eval_env.close() eval_env.close()

View File

@ -10,6 +10,7 @@ from freqtrade.freqai.RL.TDQNagent import TDQN
from stable_baselines3 import DQN from stable_baselines3 import DQN
from stable_baselines3.common.buffers import ReplayBuffer from stable_baselines3.common.buffers import ReplayBuffer
import numpy as np import numpy as np
from pandas import DataFrame
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
@ -21,7 +22,8 @@ class ReinforcementLearningTDQN(BaseReinforcementLearningModel):
User created Reinforcement Learning Model prediction model. User created Reinforcement Learning Model prediction model.
""" """
def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen): def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen,
prices_train: DataFrame, prices_test: DataFrame):
agent_params = self.freqai_info['model_training_parameters'] agent_params = self.freqai_info['model_training_parameters']
reward_params = self.freqai_info['model_reward_parameters'] reward_params = self.freqai_info['model_reward_parameters']
@ -30,15 +32,10 @@ class ReinforcementLearningTDQN(BaseReinforcementLearningModel):
eval_freq = agent_params["eval_cycles"] * len(test_df) eval_freq = agent_params["eval_cycles"] * len(test_df)
total_timesteps = agent_params["train_cycles"] * len(train_df) total_timesteps = agent_params["train_cycles"] * len(train_df)
# price data for model training and evaluation
price = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(len(train_df.index))
price_test = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(
len(test_df.index))
# environments # environments
train_env = MyRLEnv(df=train_df, prices=price, window_size=self.CONV_WIDTH, train_env = MyRLEnv(df=train_df, prices=prices_train, window_size=self.CONV_WIDTH,
reward_kwargs=reward_params) reward_kwargs=reward_params)
eval = MyRLEnv(df=test_df, prices=price_test, eval = MyRLEnv(df=test_df, prices=prices_test,
window_size=self.CONV_WIDTH, reward_kwargs=reward_params) window_size=self.CONV_WIDTH, reward_kwargs=reward_params)
eval_env = Monitor(eval, ".") eval_env = Monitor(eval, ".")
eval_env.reset() eval_env.reset()
@ -66,7 +63,7 @@ class ReinforcementLearningTDQN(BaseReinforcementLearningModel):
callback=eval_callback callback=eval_callback
) )
best_model = DQN.load(dk.data_path / "best_model.zip") best_model = DQN.load(dk.data_path / "best_model")
print('Training finished!') print('Training finished!')

View File

@ -15,7 +15,7 @@ from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcement
from freqtrade.freqai.RL.TDQNagent import TDQN from freqtrade.freqai.RL.TDQNagent import TDQN
from stable_baselines3.common.buffers import ReplayBuffer from stable_baselines3.common.buffers import ReplayBuffer
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from pandas import DataFrame
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -47,7 +47,8 @@ class ReinforcementLearningTDQN_multiproc(BaseReinforcementLearningModel):
User created Reinforcement Learning Model prediction model. User created Reinforcement Learning Model prediction model.
""" """
def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen): def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen,
prices_train: DataFrame, prices_test: DataFrame):
agent_params = self.freqai_info['model_training_parameters'] agent_params = self.freqai_info['model_training_parameters']
reward_params = self.freqai_info['model_reward_parameters'] reward_params = self.freqai_info['model_reward_parameters']
@ -57,18 +58,13 @@ class ReinforcementLearningTDQN_multiproc(BaseReinforcementLearningModel):
total_timesteps = agent_params["train_cycles"] * len(train_df) total_timesteps = agent_params["train_cycles"] * len(train_df)
learning_rate = agent_params["learning_rate"] learning_rate = agent_params["learning_rate"]
# price data for model training and evaluation
price = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(len(train_df.index))
price_test = self.dd.historic_data[pair][f"{self.config['timeframe']}"].tail(
len(test_df.index))
env_id = "train_env" env_id = "train_env"
num_cpu = int(dk.thread_count / 2) num_cpu = int(dk.thread_count / 2)
train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, price, reward_params, train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, prices_train, reward_params,
self.CONV_WIDTH) for i in range(num_cpu)]) self.CONV_WIDTH) for i in range(num_cpu)])
eval_env_id = 'eval_env' eval_env_id = 'eval_env'
eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, price_test, reward_params, eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, prices_test, reward_params,
self.CONV_WIDTH, monitor=True) for i in range(num_cpu)]) self.CONV_WIDTH, monitor=True) for i in range(num_cpu)])
path = dk.data_path path = dk.data_path