ruff format: freqai

This commit is contained in:
Matthias 2024-05-12 17:12:20 +02:00
parent e4e8c3967c
commit d1db43dee0
44 changed files with 1111 additions and 900 deletions

View File

@ -19,6 +19,7 @@ class Base3ActionRLEnv(BaseEnvironment):
""" """
Base class for a 3 action environment Base class for a 3 action environment
""" """
def __init__(self, **kwargs): def __init__(self, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.actions = Actions self.actions = Actions
@ -73,11 +74,18 @@ class Base3ActionRLEnv(BaseEnvironment):
if trade_type is not None: if trade_type is not None:
self.trade_history.append( self.trade_history.append(
{'price': self.current_price(), 'index': self._current_tick, {
'type': trade_type, 'profit': self.get_unrealized_profit()}) "price": self.current_price(),
"index": self._current_tick,
"type": trade_type,
"profit": self.get_unrealized_profit(),
}
)
if (self._total_profit < self.max_drawdown or if (
self._total_unrealized_profit < self.max_drawdown): self._total_profit < self.max_drawdown
or self._total_unrealized_profit < self.max_drawdown
):
self._done = True self._done = True
self._position_history.append(self._position) self._position_history.append(self._position)
@ -89,7 +97,7 @@ class Base3ActionRLEnv(BaseEnvironment):
total_profit=self._total_profit, total_profit=self._total_profit,
position=self._position.value, position=self._position.value,
trade_duration=self.get_trade_duration(), trade_duration=self.get_trade_duration(),
current_profit_pct=self.get_unrealized_profit() current_profit_pct=self.get_unrealized_profit(),
) )
observation = self._get_observation() observation = self._get_observation()
@ -109,10 +117,14 @@ class Base3ActionRLEnv(BaseEnvironment):
return ( return (
(action == Actions.Buy.value and self._position == Positions.Neutral) (action == Actions.Buy.value and self._position == Positions.Neutral)
or (action == Actions.Sell.value and self._position == Positions.Long) or (action == Actions.Sell.value and self._position == Positions.Long)
or (action == Actions.Sell.value and self._position == Positions.Neutral or (
and self.can_short) action == Actions.Sell.value
or (action == Actions.Buy.value and self._position == Positions.Short and self._position == Positions.Neutral
and self.can_short) and self.can_short
)
or (
action == Actions.Buy.value and self._position == Positions.Short and self.can_short
)
) )
def _is_valid(self, action: int) -> bool: def _is_valid(self, action: int) -> bool:

View File

@ -20,6 +20,7 @@ class Base4ActionRLEnv(BaseEnvironment):
""" """
Base class for a 4 action environment Base class for a 4 action environment
""" """
def __init__(self, **kwargs): def __init__(self, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.actions = Actions self.actions = Actions
@ -52,7 +53,6 @@ class Base4ActionRLEnv(BaseEnvironment):
trade_type = None trade_type = None
if self.is_tradesignal(action): if self.is_tradesignal(action):
if action == Actions.Neutral.value: if action == Actions.Neutral.value:
self._position = Positions.Neutral self._position = Positions.Neutral
trade_type = "neutral" trade_type = "neutral"
@ -75,11 +75,18 @@ class Base4ActionRLEnv(BaseEnvironment):
if trade_type is not None: if trade_type is not None:
self.trade_history.append( self.trade_history.append(
{'price': self.current_price(), 'index': self._current_tick, {
'type': trade_type, 'profit': self.get_unrealized_profit()}) "price": self.current_price(),
"index": self._current_tick,
"type": trade_type,
"profit": self.get_unrealized_profit(),
}
)
if (self._total_profit < self.max_drawdown or if (
self._total_unrealized_profit < self.max_drawdown): self._total_profit < self.max_drawdown
or self._total_unrealized_profit < self.max_drawdown
):
self._done = True self._done = True
self._position_history.append(self._position) self._position_history.append(self._position)
@ -91,7 +98,7 @@ class Base4ActionRLEnv(BaseEnvironment):
total_profit=self._total_profit, total_profit=self._total_profit,
position=self._position.value, position=self._position.value,
trade_duration=self.get_trade_duration(), trade_duration=self.get_trade_duration(),
current_profit_pct=self.get_unrealized_profit() current_profit_pct=self.get_unrealized_profit(),
) )
observation = self._get_observation() observation = self._get_observation()
@ -108,14 +115,16 @@ class Base4ActionRLEnv(BaseEnvironment):
Determine if the signal is a trade signal Determine if the signal is a trade signal
e.g.: agent wants a Actions.Long_exit while it is in a Positions.short e.g.: agent wants a Actions.Long_exit while it is in a Positions.short
""" """
return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) or return not (
(action == Actions.Neutral.value and self._position == Positions.Short) or (action == Actions.Neutral.value and self._position == Positions.Neutral)
(action == Actions.Neutral.value and self._position == Positions.Long) or or (action == Actions.Neutral.value and self._position == Positions.Short)
(action == Actions.Short_enter.value and self._position == Positions.Short) or or (action == Actions.Neutral.value and self._position == Positions.Long)
(action == Actions.Short_enter.value and self._position == Positions.Long) or or (action == Actions.Short_enter.value and self._position == Positions.Short)
(action == Actions.Exit.value and self._position == Positions.Neutral) or or (action == Actions.Short_enter.value and self._position == Positions.Long)
(action == Actions.Long_enter.value and self._position == Positions.Long) or or (action == Actions.Exit.value and self._position == Positions.Neutral)
(action == Actions.Long_enter.value and self._position == Positions.Short)) or (action == Actions.Long_enter.value and self._position == Positions.Long)
or (action == Actions.Long_enter.value and self._position == Positions.Short)
)
def _is_valid(self, action: int) -> bool: def _is_valid(self, action: int) -> bool:
""" """

View File

@ -21,6 +21,7 @@ class Base5ActionRLEnv(BaseEnvironment):
""" """
Base class for a 5 action environment Base class for a 5 action environment
""" """
def __init__(self, **kwargs): def __init__(self, **kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
self.actions = Actions self.actions = Actions
@ -53,7 +54,6 @@ class Base5ActionRLEnv(BaseEnvironment):
trade_type = None trade_type = None
if self.is_tradesignal(action): if self.is_tradesignal(action):
if action == Actions.Neutral.value: if action == Actions.Neutral.value:
self._position = Positions.Neutral self._position = Positions.Neutral
trade_type = "neutral" trade_type = "neutral"
@ -81,11 +81,18 @@ class Base5ActionRLEnv(BaseEnvironment):
if trade_type is not None: if trade_type is not None:
self.trade_history.append( self.trade_history.append(
{'price': self.current_price(), 'index': self._current_tick, {
'type': trade_type, 'profit': self.get_unrealized_profit()}) "price": self.current_price(),
"index": self._current_tick,
"type": trade_type,
"profit": self.get_unrealized_profit(),
}
)
if (self._total_profit < self.max_drawdown or if (
self._total_unrealized_profit < self.max_drawdown): self._total_profit < self.max_drawdown
or self._total_unrealized_profit < self.max_drawdown
):
self._done = True self._done = True
self._position_history.append(self._position) self._position_history.append(self._position)
@ -97,7 +104,7 @@ class Base5ActionRLEnv(BaseEnvironment):
total_profit=self._total_profit, total_profit=self._total_profit,
position=self._position.value, position=self._position.value,
trade_duration=self.get_trade_duration(), trade_duration=self.get_trade_duration(),
current_profit_pct=self.get_unrealized_profit() current_profit_pct=self.get_unrealized_profit(),
) )
observation = self._get_observation() observation = self._get_observation()
@ -113,17 +120,19 @@ class Base5ActionRLEnv(BaseEnvironment):
Determine if the signal is a trade signal Determine if the signal is a trade signal
e.g.: agent wants a Actions.Long_exit while it is in a Positions.short e.g.: agent wants a Actions.Long_exit while it is in a Positions.short
""" """
return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) or return not (
(action == Actions.Neutral.value and self._position == Positions.Short) or (action == Actions.Neutral.value and self._position == Positions.Neutral)
(action == Actions.Neutral.value and self._position == Positions.Long) or or (action == Actions.Neutral.value and self._position == Positions.Short)
(action == Actions.Short_enter.value and self._position == Positions.Short) or or (action == Actions.Neutral.value and self._position == Positions.Long)
(action == Actions.Short_enter.value and self._position == Positions.Long) or or (action == Actions.Short_enter.value and self._position == Positions.Short)
(action == Actions.Short_exit.value and self._position == Positions.Long) or or (action == Actions.Short_enter.value and self._position == Positions.Long)
(action == Actions.Short_exit.value and self._position == Positions.Neutral) or or (action == Actions.Short_exit.value and self._position == Positions.Long)
(action == Actions.Long_enter.value and self._position == Positions.Long) or or (action == Actions.Short_exit.value and self._position == Positions.Neutral)
(action == Actions.Long_enter.value and self._position == Positions.Short) or or (action == Actions.Long_enter.value and self._position == Positions.Long)
(action == Actions.Long_exit.value and self._position == Positions.Short) or or (action == Actions.Long_enter.value and self._position == Positions.Short)
(action == Actions.Long_exit.value and self._position == Positions.Neutral)) or (action == Actions.Long_exit.value and self._position == Positions.Short)
or (action == Actions.Long_exit.value and self._position == Positions.Neutral)
)
def _is_valid(self, action: int) -> bool: def _is_valid(self, action: int) -> bool:
# trade signal # trade signal

View File

@ -21,6 +21,7 @@ class BaseActions(Enum):
""" """
Default action space, mostly used for type handling. Default action space, mostly used for type handling.
""" """
Neutral = 0 Neutral = 0
Long_enter = 1 Long_enter = 1
Long_exit = 2 Long_exit = 2
@ -44,11 +45,22 @@ class BaseEnvironment(gym.Env):
See RL/Base5ActionRLEnv.py and RL/Base4ActionRLEnv.py See RL/Base5ActionRLEnv.py and RL/Base4ActionRLEnv.py
""" """
def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(), def __init__(
reward_kwargs: dict = {}, window_size=10, starting_point=True, self,
id: str = 'baseenv-1', seed: int = 1, config: dict = {}, live: bool = False, df: DataFrame = DataFrame(),
fee: float = 0.0015, can_short: bool = False, pair: str = "", prices: DataFrame = DataFrame(),
df_raw: DataFrame = DataFrame()): reward_kwargs: dict = {},
window_size=10,
starting_point=True,
id: str = "baseenv-1",
seed: int = 1,
config: dict = {},
live: bool = False,
fee: float = 0.0015,
can_short: bool = False,
pair: str = "",
df_raw: DataFrame = DataFrame(),
):
""" """
Initializes the training/eval environment. Initializes the training/eval environment.
:param df: dataframe of features :param df: dataframe of features
@ -64,15 +76,15 @@ class BaseEnvironment(gym.Env):
:param can_short: Whether or not the environment can short :param can_short: Whether or not the environment can short
""" """
self.config: dict = config self.config: dict = config
self.rl_config: dict = config['freqai']['rl_config'] self.rl_config: dict = config["freqai"]["rl_config"]
self.add_state_info: bool = self.rl_config.get('add_state_info', False) self.add_state_info: bool = self.rl_config.get("add_state_info", False)
self.id: str = id self.id: str = id
self.max_drawdown: float = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8) self.max_drawdown: float = 1 - self.rl_config.get("max_training_drawdown_pct", 0.8)
self.compound_trades: bool = config['stake_amount'] == 'unlimited' self.compound_trades: bool = config["stake_amount"] == "unlimited"
self.pair: str = pair self.pair: str = pair
self.raw_features: DataFrame = df_raw self.raw_features: DataFrame = df_raw
if self.config.get('fee', None) is not None: if self.config.get("fee", None) is not None:
self.fee = self.config['fee'] self.fee = self.config["fee"]
else: else:
self.fee = fee self.fee = fee
@ -82,14 +94,22 @@ class BaseEnvironment(gym.Env):
self.can_short: bool = can_short self.can_short: bool = can_short
self.live: bool = live self.live: bool = live
if not self.live and self.add_state_info: if not self.live and self.add_state_info:
raise OperationalException("`add_state_info` is not available in backtesting. Change " raise OperationalException(
"`add_state_info` is not available in backtesting. Change "
"parameter to false in your rl_config. See `add_state_info` " "parameter to false in your rl_config. See `add_state_info` "
"docs for more info.") "docs for more info."
)
self.seed(seed) self.seed(seed)
self.reset_env(df, prices, window_size, reward_kwargs, starting_point) self.reset_env(df, prices, window_size, reward_kwargs, starting_point)
def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int, def reset_env(
reward_kwargs: dict, starting_point=True): self,
df: DataFrame,
prices: DataFrame,
window_size: int,
reward_kwargs: dict,
starting_point=True,
):
""" """
Resets the environment when the agent fails (in our case, if the drawdown Resets the environment when the agent fails (in our case, if the drawdown
exceeds the user set max_training_drawdown_pct) exceeds the user set max_training_drawdown_pct)
@ -113,8 +133,7 @@ class BaseEnvironment(gym.Env):
self.total_features = self.signal_features.shape[1] self.total_features = self.signal_features.shape[1]
self.shape = (window_size, self.total_features) self.shape = (window_size, self.total_features)
self.set_action_space() self.set_action_space()
self.observation_space = spaces.Box( self.observation_space = spaces.Box(low=-1, high=1, shape=self.shape, dtype=np.float32)
low=-1, high=1, shape=self.shape, dtype=np.float32)
# episode # episode
self._start_tick: int = self.window_size self._start_tick: int = self.window_size
@ -151,8 +170,13 @@ class BaseEnvironment(gym.Env):
self.np_random, seed = seeding.np_random(seed) self.np_random, seed = seeding.np_random(seed)
return [seed] return [seed]
def tensorboard_log(self, metric: str, value: Optional[Union[int, float]] = None, def tensorboard_log(
inc: Optional[bool] = None, category: str = "custom"): self,
metric: str,
value: Optional[Union[int, float]] = None,
inc: Optional[bool] = None,
category: str = "custom",
):
""" """
Function builds the tensorboard_metrics dictionary Function builds the tensorboard_metrics dictionary
to be parsed by the TensorboardCallback. This to be parsed by the TensorboardCallback. This
@ -195,7 +219,7 @@ class BaseEnvironment(gym.Env):
self._done = False self._done = False
if self.starting_point is True: if self.starting_point is True:
if self.rl_config.get('randomize_starting_position', False): if self.rl_config.get("randomize_starting_position", False):
length_of_data = int(self._end_tick / 4) length_of_data = int(self._end_tick / 4)
start_tick = random.randint(self.window_size + 1, length_of_data) start_tick = random.randint(self.window_size + 1, length_of_data)
self._start_tick = start_tick self._start_tick = start_tick
@ -207,8 +231,8 @@ class BaseEnvironment(gym.Env):
self._last_trade_tick = None self._last_trade_tick = None
self._position = Positions.Neutral self._position = Positions.Neutral
self.total_reward = 0. self.total_reward = 0.0
self._total_profit = 1. # unit self._total_profit = 1.0 # unit
self.history = {} self.history = {}
self.trade_history = [] self.trade_history = []
self.portfolio_log_returns = np.zeros(len(self.prices)) self.portfolio_log_returns = np.zeros(len(self.prices))
@ -231,18 +255,19 @@ class BaseEnvironment(gym.Env):
This may or may not be independent of action types, user can inherit This may or may not be independent of action types, user can inherit
this in their custom "MyRLEnv" this in their custom "MyRLEnv"
""" """
features_window = self.signal_features[( features_window = self.signal_features[
self._current_tick - self.window_size):self._current_tick] (self._current_tick - self.window_size) : self._current_tick
]
if self.add_state_info: if self.add_state_info:
features_and_state = DataFrame(np.zeros((len(features_window), 3)), features_and_state = DataFrame(
columns=['current_profit_pct', np.zeros((len(features_window), 3)),
'position', columns=["current_profit_pct", "position", "trade_duration"],
'trade_duration'], index=features_window.index,
index=features_window.index) )
features_and_state['current_profit_pct'] = self.get_unrealized_profit() features_and_state["current_profit_pct"] = self.get_unrealized_profit()
features_and_state['position'] = self._position.value features_and_state["position"] = self._position.value
features_and_state['trade_duration'] = self.get_trade_duration() features_and_state["trade_duration"] = self.get_trade_duration()
features_and_state = pd.concat([features_window, features_and_state], axis=1) features_and_state = pd.concat([features_window, features_and_state], axis=1)
return features_and_state return features_and_state
else: else:
@ -262,10 +287,10 @@ class BaseEnvironment(gym.Env):
Get the unrealized profit if the agent is in a trade Get the unrealized profit if the agent is in a trade
""" """
if self._last_trade_tick is None: if self._last_trade_tick is None:
return 0. return 0.0
if self._position == Positions.Neutral: if self._position == Positions.Neutral:
return 0. return 0.0
elif self._position == Positions.Short: elif self._position == Positions.Short:
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open) current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open) last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open)
@ -275,7 +300,7 @@ class BaseEnvironment(gym.Env):
last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open) last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open)
return (current_price - last_trade_price) / last_trade_price return (current_price - last_trade_price) / last_trade_price
else: else:
return 0. return 0.0
@abstractmethod @abstractmethod
def is_tradesignal(self, action: int) -> bool: def is_tradesignal(self, action: int) -> bool:

View File

@ -30,10 +30,10 @@ from freqtrade.persistence import Trade
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
torch.multiprocessing.set_sharing_strategy('file_system') torch.multiprocessing.set_sharing_strategy("file_system")
SB3_MODELS = ['PPO', 'A2C', 'DQN'] SB3_MODELS = ["PPO", "A2C", "DQN"]
SB3_CONTRIB_MODELS = ['TRPO', 'ARS', 'RecurrentPPO', 'MaskablePPO', 'QRDQN'] SB3_CONTRIB_MODELS = ["TRPO", "ARS", "RecurrentPPO", "MaskablePPO", "QRDQN"]
class BaseReinforcementLearningModel(IFreqaiModel): class BaseReinforcementLearningModel(IFreqaiModel):
@ -42,57 +42,60 @@ class BaseReinforcementLearningModel(IFreqaiModel):
""" """
def __init__(self, **kwargs) -> None: def __init__(self, **kwargs) -> None:
super().__init__(config=kwargs['config']) super().__init__(config=kwargs["config"])
self.max_threads = min(self.freqai_info['rl_config'].get( self.max_threads = min(
'cpu_count', 1), max(int(self.max_system_threads / 2), 1)) self.freqai_info["rl_config"].get("cpu_count", 1),
max(int(self.max_system_threads / 2), 1),
)
th.set_num_threads(self.max_threads) th.set_num_threads(self.max_threads)
self.reward_params = self.freqai_info['rl_config']['model_reward_parameters'] self.reward_params = self.freqai_info["rl_config"]["model_reward_parameters"]
self.train_env: Union[VecMonitor, SubprocVecEnv, gym.Env] = gym.Env() self.train_env: Union[VecMonitor, SubprocVecEnv, gym.Env] = gym.Env()
self.eval_env: Union[VecMonitor, SubprocVecEnv, gym.Env] = gym.Env() self.eval_env: Union[VecMonitor, SubprocVecEnv, gym.Env] = gym.Env()
self.eval_callback: Optional[MaskableEvalCallback] = None self.eval_callback: Optional[MaskableEvalCallback] = None
self.model_type = self.freqai_info['rl_config']['model_type'] self.model_type = self.freqai_info["rl_config"]["model_type"]
self.rl_config = self.freqai_info['rl_config'] self.rl_config = self.freqai_info["rl_config"]
self.df_raw: DataFrame = DataFrame() self.df_raw: DataFrame = DataFrame()
self.continual_learning = self.freqai_info.get('continual_learning', False) self.continual_learning = self.freqai_info.get("continual_learning", False)
if self.model_type in SB3_MODELS: if self.model_type in SB3_MODELS:
import_str = 'stable_baselines3' import_str = "stable_baselines3"
elif self.model_type in SB3_CONTRIB_MODELS: elif self.model_type in SB3_CONTRIB_MODELS:
import_str = 'sb3_contrib' import_str = "sb3_contrib"
else: else:
raise OperationalException(f'{self.model_type} not available in stable_baselines3 or ' raise OperationalException(
f'sb3_contrib. please choose one of {SB3_MODELS} or ' f"{self.model_type} not available in stable_baselines3 or "
f'{SB3_CONTRIB_MODELS}') f"sb3_contrib. please choose one of {SB3_MODELS} or "
f"{SB3_CONTRIB_MODELS}"
)
mod = importlib.import_module(import_str, self.model_type) mod = importlib.import_module(import_str, self.model_type)
self.MODELCLASS = getattr(mod, self.model_type) self.MODELCLASS = getattr(mod, self.model_type)
self.policy_type = self.freqai_info['rl_config']['policy_type'] self.policy_type = self.freqai_info["rl_config"]["policy_type"]
self.unset_outlier_removal() self.unset_outlier_removal()
self.net_arch = self.rl_config.get('net_arch', [128, 128]) self.net_arch = self.rl_config.get("net_arch", [128, 128])
self.dd.model_type = import_str self.dd.model_type = import_str
self.tensorboard_callback: TensorboardCallback = \ self.tensorboard_callback: TensorboardCallback = TensorboardCallback(
TensorboardCallback(verbose=1, actions=BaseActions) verbose=1, actions=BaseActions
)
def unset_outlier_removal(self): def unset_outlier_removal(self):
""" """
If user has activated any function that may remove training points, this If user has activated any function that may remove training points, this
function will set them to false and warn them function will set them to false and warn them
""" """
if self.ft_params.get('use_SVM_to_remove_outliers', False): if self.ft_params.get("use_SVM_to_remove_outliers", False):
self.ft_params.update({'use_SVM_to_remove_outliers': False}) self.ft_params.update({"use_SVM_to_remove_outliers": False})
logger.warning('User tried to use SVM with RL. Deactivating SVM.') logger.warning("User tried to use SVM with RL. Deactivating SVM.")
if self.ft_params.get('use_DBSCAN_to_remove_outliers', False): if self.ft_params.get("use_DBSCAN_to_remove_outliers", False):
self.ft_params.update({'use_DBSCAN_to_remove_outliers': False}) self.ft_params.update({"use_DBSCAN_to_remove_outliers": False})
logger.warning('User tried to use DBSCAN with RL. Deactivating DBSCAN.') logger.warning("User tried to use DBSCAN with RL. Deactivating DBSCAN.")
if self.ft_params.get('DI_threshold', False): if self.ft_params.get("DI_threshold", False):
self.ft_params.update({'DI_threshold': False}) self.ft_params.update({"DI_threshold": False})
logger.warning('User tried to use DI_threshold with RL. Deactivating DI_threshold.') logger.warning("User tried to use DI_threshold with RL. Deactivating DI_threshold.")
if self.freqai_info['data_split_parameters'].get('shuffle', False): if self.freqai_info["data_split_parameters"].get("shuffle", False):
self.freqai_info['data_split_parameters'].update({'shuffle': False}) self.freqai_info["data_split_parameters"].update({"shuffle": False})
logger.warning('User tried to shuffle training data. Setting shuffle to False') logger.warning("User tried to shuffle training data. Setting shuffle to False")
def train( def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any:
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
""" """
Filter the training data and train a model to it. Train makes heavy use of the datakitchen Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data. for storing, saving, loading, and analyzing the data.
@ -111,8 +114,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
training_filter=True, training_filter=True,
) )
dd: Dict[str, Any] = dk.make_train_test_datasets( dd: Dict[str, Any] = dk.make_train_test_datasets(features_filtered, labels_filtered)
features_filtered, labels_filtered)
self.df_raw = copy.deepcopy(dd["train_features"]) self.df_raw = copy.deepcopy(dd["train_features"])
dk.fit_labels() # FIXME useless for now, but just satiating append methods dk.fit_labels() # FIXME useless for now, but just satiating append methods
@ -121,18 +123,18 @@ class BaseReinforcementLearningModel(IFreqaiModel):
dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count) dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
(dd["train_features"], (dd["train_features"], dd["train_labels"], dd["train_weights"]) = (
dd["train_labels"], dk.feature_pipeline.fit_transform(
dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"], dd["train_features"], dd["train_labels"], dd["train_weights"]
dd["train_labels"], )
dd["train_weights"]) )
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
(dd["test_features"], (dd["test_features"], dd["test_labels"], dd["test_weights"]) = (
dd["test_labels"], dk.feature_pipeline.transform(
dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"], dd["test_features"], dd["test_labels"], dd["test_weights"]
dd["test_labels"], )
dd["test_weights"]) )
logger.info( logger.info(
f'Training model on {len(dk.data_dictionary["train_features"].columns)}' f'Training model on {len(dk.data_dictionary["train_features"].columns)}'
@ -147,9 +149,13 @@ class BaseReinforcementLearningModel(IFreqaiModel):
return model return model
def set_train_and_eval_environments(self, data_dictionary: Dict[str, DataFrame], def set_train_and_eval_environments(
prices_train: DataFrame, prices_test: DataFrame, self,
dk: FreqaiDataKitchen): data_dictionary: Dict[str, DataFrame],
prices_train: DataFrame,
prices_test: DataFrame,
dk: FreqaiDataKitchen,
):
""" """
User can override this if they are using a custom MyRLEnv User can override this if they are using a custom MyRLEnv
:param data_dictionary: dict = common data dictionary containing train and test :param data_dictionary: dict = common data dictionary containing train and test
@ -165,11 +171,14 @@ class BaseReinforcementLearningModel(IFreqaiModel):
self.train_env = self.MyRLEnv(df=train_df, prices=prices_train, **env_info) self.train_env = self.MyRLEnv(df=train_df, prices=prices_train, **env_info)
self.eval_env = Monitor(self.MyRLEnv(df=test_df, prices=prices_test, **env_info)) self.eval_env = Monitor(self.MyRLEnv(df=test_df, prices=prices_test, **env_info))
self.eval_callback = MaskableEvalCallback(self.eval_env, deterministic=True, self.eval_callback = MaskableEvalCallback(
render=False, eval_freq=len(train_df), self.eval_env,
deterministic=True,
render=False,
eval_freq=len(train_df),
best_model_save_path=str(dk.data_path), best_model_save_path=str(dk.data_path),
use_masking=(self.model_type == 'MaskablePPO' and use_masking=(self.model_type == "MaskablePPO" and is_masking_supported(self.eval_env)),
is_masking_supported(self.eval_env))) )
actions = self.train_env.get_actions() actions = self.train_env.get_actions()
self.tensorboard_callback = TensorboardCallback(verbose=1, actions=actions) self.tensorboard_callback = TensorboardCallback(verbose=1, actions=actions)
@ -178,16 +187,19 @@ class BaseReinforcementLearningModel(IFreqaiModel):
""" """
Create dictionary of environment arguments Create dictionary of environment arguments
""" """
env_info = {"window_size": self.CONV_WIDTH, env_info = {
"window_size": self.CONV_WIDTH,
"reward_kwargs": self.reward_params, "reward_kwargs": self.reward_params,
"config": self.config, "config": self.config,
"live": self.live, "live": self.live,
"can_short": self.can_short, "can_short": self.can_short,
"pair": pair, "pair": pair,
"df_raw": self.df_raw} "df_raw": self.df_raw,
}
if self.data_provider: if self.data_provider:
env_info["fee"] = self.data_provider._exchange \ env_info["fee"] = self.data_provider._exchange.get_fee(
.get_fee(symbol=self.data_provider.current_whitelist()[0]) # type: ignore symbol=self.data_provider.current_whitelist()[0]
) # type: ignore
return env_info return env_info
@ -219,11 +231,12 @@ class BaseReinforcementLearningModel(IFreqaiModel):
for trade in open_trades: for trade in open_trades:
if trade.pair == pair: if trade.pair == pair:
if self.data_provider._exchange is None: # type: ignore if self.data_provider._exchange is None: # type: ignore
logger.error('No exchange available.') logger.error("No exchange available.")
return 0, 0, 0 return 0, 0, 0
else: else:
current_rate = self.data_provider._exchange.get_rate( # type: ignore current_rate = self.data_provider._exchange.get_rate( # type: ignore
pair, refresh=False, side="exit", is_short=trade.is_short) pair, refresh=False, side="exit", is_short=trade.is_short
)
now = datetime.now(timezone.utc).timestamp() now = datetime.now(timezone.utc).timestamp()
trade_duration = int((now - trade.open_date_utc.timestamp()) / self.base_tf_seconds) trade_duration = int((now - trade.open_date_utc.timestamp()) / self.base_tf_seconds)
@ -255,16 +268,17 @@ class BaseReinforcementLearningModel(IFreqaiModel):
dk.data_dictionary["prediction_features"] = self.drop_ohlc_from_df(filtered_dataframe, dk) dk.data_dictionary["prediction_features"] = self.drop_ohlc_from_df(filtered_dataframe, dk)
dk.data_dictionary["prediction_features"], _, _ = dk.feature_pipeline.transform( dk.data_dictionary["prediction_features"], _, _ = dk.feature_pipeline.transform(
dk.data_dictionary["prediction_features"], outlier_check=True) dk.data_dictionary["prediction_features"], outlier_check=True
)
pred_df = self.rl_model_predict( pred_df = self.rl_model_predict(dk.data_dictionary["prediction_features"], dk, self.model)
dk.data_dictionary["prediction_features"], dk, self.model)
pred_df.fillna(0, inplace=True) pred_df.fillna(0, inplace=True)
return (pred_df, dk.do_predict) return (pred_df, dk.do_predict)
def rl_model_predict(self, dataframe: DataFrame, def rl_model_predict(
dk: FreqaiDataKitchen, model: Any) -> DataFrame: self, dataframe: DataFrame, dk: FreqaiDataKitchen, model: Any
) -> DataFrame:
""" """
A helper function to make predictions in the Reinforcement learning module. A helper function to make predictions in the Reinforcement learning module.
:param dataframe: DataFrame = the dataframe of features to make the predictions on :param dataframe: DataFrame = the dataframe of features to make the predictions on
@ -275,11 +289,11 @@ class BaseReinforcementLearningModel(IFreqaiModel):
def _predict(window): def _predict(window):
observations = dataframe.iloc[window.index] observations = dataframe.iloc[window.index]
if self.live and self.rl_config.get('add_state_info', False): if self.live and self.rl_config.get("add_state_info", False):
market_side, current_profit, trade_duration = self.get_state_info(dk.pair) market_side, current_profit, trade_duration = self.get_state_info(dk.pair)
observations['current_profit_pct'] = current_profit observations["current_profit_pct"] = current_profit
observations['position'] = market_side observations["position"] = market_side
observations['trade_duration'] = trade_duration observations["trade_duration"] = trade_duration
res, _ = model.predict(observations, deterministic=True) res, _ = model.predict(observations, deterministic=True)
return res return res
@ -287,23 +301,31 @@ class BaseReinforcementLearningModel(IFreqaiModel):
return output return output
def build_ohlc_price_dataframes(self, data_dictionary: dict, def build_ohlc_price_dataframes(
pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame, self, data_dictionary: dict, pair: str, dk: FreqaiDataKitchen
DataFrame]: ) -> Tuple[DataFrame, DataFrame]:
""" """
Builds the train prices and test prices for the environment. Builds the train prices and test prices for the environment.
""" """
pair = pair.replace(':', '') pair = pair.replace(":", "")
train_df = data_dictionary["train_features"] train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"] test_df = data_dictionary["test_features"]
# price data for model training and evaluation # price data for model training and evaluation
tf = self.config['timeframe'] tf = self.config["timeframe"]
rename_dict = {'%-raw_open': 'open', '%-raw_low': 'low', rename_dict = {
'%-raw_high': ' high', '%-raw_close': 'close'} "%-raw_open": "open",
rename_dict_old = {f'%-{pair}raw_open_{tf}': 'open', f'%-{pair}raw_low_{tf}': 'low', "%-raw_low": "low",
f'%-{pair}raw_high_{tf}': ' high', f'%-{pair}raw_close_{tf}': 'close'} "%-raw_high": " high",
"%-raw_close": "close",
}
rename_dict_old = {
f"%-{pair}raw_open_{tf}": "open",
f"%-{pair}raw_low_{tf}": "low",
f"%-{pair}raw_high_{tf}": " high",
f"%-{pair}raw_close_{tf}": "close",
}
prices_train = train_df.filter(rename_dict.keys(), axis=1) prices_train = train_df.filter(rename_dict.keys(), axis=1)
prices_train_old = train_df.filter(rename_dict_old.keys(), axis=1) prices_train_old = train_df.filter(rename_dict_old.keys(), axis=1)
@ -311,17 +333,21 @@ class BaseReinforcementLearningModel(IFreqaiModel):
if not prices_train_old.empty: if not prices_train_old.empty:
prices_train = prices_train_old prices_train = prices_train_old
rename_dict = rename_dict_old rename_dict = rename_dict_old
logger.warning('Reinforcement learning module didn\'t find the correct raw prices ' logger.warning(
'assigned in feature_engineering_standard(). ' "Reinforcement learning module didn't find the correct raw prices "
'Please assign them with:\n' "assigned in feature_engineering_standard(). "
"Please assign them with:\n"
'dataframe["%-raw_close"] = dataframe["close"]\n' 'dataframe["%-raw_close"] = dataframe["close"]\n'
'dataframe["%-raw_open"] = dataframe["open"]\n' 'dataframe["%-raw_open"] = dataframe["open"]\n'
'dataframe["%-raw_high"] = dataframe["high"]\n' 'dataframe["%-raw_high"] = dataframe["high"]\n'
'dataframe["%-raw_low"] = dataframe["low"]\n' 'dataframe["%-raw_low"] = dataframe["low"]\n'
'inside `feature_engineering_standard()') "inside `feature_engineering_standard()"
)
elif prices_train.empty: elif prices_train.empty:
raise OperationalException("No prices found, please follow log warning " raise OperationalException(
"instructions to correct the strategy.") "No prices found, please follow log warning "
"instructions to correct the strategy."
)
prices_train.rename(columns=rename_dict, inplace=True) prices_train.rename(columns=rename_dict, inplace=True)
prices_train.reset_index(drop=True) prices_train.reset_index(drop=True)
@ -339,7 +365,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
""" """
Given a dataframe, drop the ohlc data Given a dataframe, drop the ohlc data
""" """
drop_list = ['%-raw_open', '%-raw_low', '%-raw_high', '%-raw_close'] drop_list = ["%-raw_open", "%-raw_low", "%-raw_high", "%-raw_close"]
if self.rl_config["drop_ohlc_from_features"]: if self.rl_config["drop_ohlc_from_features"]:
df.drop(drop_list, axis=1, inplace=True) df.drop(drop_list, axis=1, inplace=True)
@ -358,7 +384,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
if exists: if exists:
model = self.MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model") model = self.MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model")
else: else:
logger.info('No model file on disk to continue learning from.') logger.info("No model file on disk to continue learning from.")
return model return model
@ -400,15 +426,18 @@ class BaseReinforcementLearningModel(IFreqaiModel):
return -2 return -2
pnl = self.get_unrealized_profit() pnl = self.get_unrealized_profit()
factor = 100. factor = 100.0
# you can use feature values from dataframe # you can use feature values from dataframe
rsi_now = self.raw_features[f"%-rsi-period-10_shift-1_{self.pair}_" rsi_now = self.raw_features[
f"{self.config['timeframe']}"].iloc[self._current_tick] f"%-rsi-period-10_shift-1_{self.pair}_" f"{self.config['timeframe']}"
].iloc[self._current_tick]
# reward agent for entering trades # reward agent for entering trades
if (action in (Actions.Long_enter.value, Actions.Short_enter.value) if (
and self._position == Positions.Neutral): action in (Actions.Long_enter.value, Actions.Short_enter.value)
and self._position == Positions.Neutral
):
if rsi_now < 40: if rsi_now < 40:
factor = 40 / rsi_now factor = 40 / rsi_now
else: else:
@ -419,7 +448,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
if action == Actions.Neutral.value and self._position == Positions.Neutral: if action == Actions.Neutral.value and self._position == Positions.Neutral:
return -1 return -1
max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300) max_trade_duration = self.rl_config.get("max_trade_duration_candles", 300)
if self._last_trade_tick: if self._last_trade_tick:
trade_duration = self._current_tick - self._last_trade_tick trade_duration = self._current_tick - self._last_trade_tick
else: else:
@ -431,28 +460,36 @@ class BaseReinforcementLearningModel(IFreqaiModel):
factor *= 0.5 factor *= 0.5
# discourage sitting in position # discourage sitting in position
if (self._position in (Positions.Short, Positions.Long) and if (
action == Actions.Neutral.value): self._position in (Positions.Short, Positions.Long)
and action == Actions.Neutral.value
):
return -1 * trade_duration / max_trade_duration return -1 * trade_duration / max_trade_duration
# close long # close long
if action == Actions.Long_exit.value and self._position == Positions.Long: if action == Actions.Long_exit.value and self._position == Positions.Long:
if pnl > self.profit_aim * self.rr: if pnl > self.profit_aim * self.rr:
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
return float(pnl * factor) return float(pnl * factor)
# close short # close short
if action == Actions.Short_exit.value and self._position == Positions.Short: if action == Actions.Short_exit.value and self._position == Positions.Short:
if pnl > self.profit_aim * self.rr: if pnl > self.profit_aim * self.rr:
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
return float(pnl * factor) return float(pnl * factor)
return 0. return 0.0
def make_env(MyRLEnv: Type[BaseEnvironment], env_id: str, rank: int, def make_env(
seed: int, train_df: DataFrame, price: DataFrame, MyRLEnv: Type[BaseEnvironment],
env_info: Dict[str, Any] = {}) -> Callable: env_id: str,
rank: int,
seed: int,
train_df: DataFrame,
price: DataFrame,
env_info: Dict[str, Any] = {},
) -> Callable:
""" """
Utility function for multiprocessed env. Utility function for multiprocessed env.
@ -465,10 +502,9 @@ def make_env(MyRLEnv: Type[BaseEnvironment], env_id: str, rank: int,
""" """
def _init() -> gym.Env: def _init() -> gym.Env:
env = MyRLEnv(df=train_df, prices=price, id=env_id, seed=seed + rank, **env_info)
env = MyRLEnv(df=train_df, prices=price, id=env_id, seed=seed + rank,
**env_info)
return env return env
set_random_seed(seed) set_random_seed(seed)
return _init return _init

View File

@ -21,9 +21,7 @@ class BaseClassifierModel(IFreqaiModel):
such as prediction_models/CatboostClassifier.py for guidance. such as prediction_models/CatboostClassifier.py for guidance.
""" """
def train( def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any:
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
""" """
Filter the training data and train a model to it. Train makes heavy use of the datakitchen Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data. for storing, saving, loading, and analyzing the data.
@ -47,26 +45,28 @@ class BaseClassifierModel(IFreqaiModel):
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d") start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d") end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
logger.info(f"-------------------- Training on data from {start_date} to " logger.info(
f"{end_date} --------------------") f"-------------------- Training on data from {start_date} to "
f"{end_date} --------------------"
)
# split data into train/test data. # split data into train/test data.
dd = dk.make_train_test_datasets(features_filtered, labels_filtered) dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live: if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
dk.fit_labels() dk.fit_labels()
dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count) dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
(dd["train_features"], (dd["train_features"], dd["train_labels"], dd["train_weights"]) = (
dd["train_labels"], dk.feature_pipeline.fit_transform(
dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"], dd["train_features"], dd["train_labels"], dd["train_weights"]
dd["train_labels"], )
dd["train_weights"]) )
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
(dd["test_features"], (dd["test_features"], dd["test_labels"], dd["test_weights"]) = (
dd["test_labels"], dk.feature_pipeline.transform(
dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"], dd["test_features"], dd["test_labels"], dd["test_weights"]
dd["test_labels"], )
dd["test_weights"]) )
logger.info( logger.info(
f"Training model on {len(dk.data_dictionary['train_features'].columns)} features" f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
@ -77,8 +77,10 @@ class BaseClassifierModel(IFreqaiModel):
end_time = time() end_time = time()
logger.info(f"-------------------- Done training {pair} " logger.info(
f"({end_time - start_time:.2f} secs) --------------------") f"-------------------- Done training {pair} "
f"({end_time - start_time:.2f} secs) --------------------"
)
return model return model
@ -102,7 +104,8 @@ class BaseClassifierModel(IFreqaiModel):
dk.data_dictionary["prediction_features"] = filtered_df dk.data_dictionary["prediction_features"] = filtered_df
dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform( dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
dk.data_dictionary["prediction_features"], outlier_check=True) dk.data_dictionary["prediction_features"], outlier_check=True
)
predictions = self.model.predict(dk.data_dictionary["prediction_features"]) predictions = self.model.predict(dk.data_dictionary["prediction_features"])
if self.CONV_WIDTH == 1: if self.CONV_WIDTH == 1:

View File

@ -59,8 +59,7 @@ class BasePyTorchClassifier(BasePyTorchModel):
class_names = self.model.model_meta_data.get("class_names", None) class_names = self.model.model_meta_data.get("class_names", None)
if not class_names: if not class_names:
raise ValueError( raise ValueError(
"Missing class names. " "Missing class names. " "self.model.model_meta_data['class_names'] is None."
"self.model.model_meta_data['class_names'] is None."
) )
if not self.class_name_to_index: if not self.class_name_to_index:
@ -74,11 +73,11 @@ class BasePyTorchClassifier(BasePyTorchModel):
dk.data_dictionary["prediction_features"] = filtered_df dk.data_dictionary["prediction_features"] = filtered_df
dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform( dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
dk.data_dictionary["prediction_features"], outlier_check=True) dk.data_dictionary["prediction_features"], outlier_check=True
)
x = self.data_convertor.convert_x( x = self.data_convertor.convert_x(
dk.data_dictionary["prediction_features"], dk.data_dictionary["prediction_features"], device=self.device
device=self.device
) )
self.model.model.eval() self.model.model.eval()
logits = self.model.model(x) logits = self.model.model(x)
@ -120,15 +119,12 @@ class BasePyTorchClassifier(BasePyTorchModel):
) )
@staticmethod @staticmethod
def assert_valid_class_names( def assert_valid_class_names(target_column: pd.Series, class_names: List[str]):
target_column: pd.Series,
class_names: List[str]
):
non_defined_labels = set(target_column) - set(class_names) non_defined_labels = set(target_column) - set(class_names)
if len(non_defined_labels) != 0: if len(non_defined_labels) != 0:
raise OperationalException( raise OperationalException(
f"Found non defined labels: {non_defined_labels}, ", f"Found non defined labels: {non_defined_labels}, ",
f"expecting labels: {class_names}" f"expecting labels: {class_names}",
) )
def decode_class_names(self, class_ints: torch.Tensor) -> List[str]: def decode_class_names(self, class_ints: torch.Tensor) -> List[str]:
@ -147,7 +143,7 @@ class BasePyTorchClassifier(BasePyTorchModel):
self, self,
data_dictionary: Dict[str, pd.DataFrame], data_dictionary: Dict[str, pd.DataFrame],
dk: FreqaiDataKitchen, dk: FreqaiDataKitchen,
class_names: List[str] class_names: List[str],
): ):
self.init_class_names_to_index_mapping(class_names) self.init_class_names_to_index_mapping(class_names)
self.encode_class_names(data_dictionary, dk, class_names) self.encode_class_names(data_dictionary, dk, class_names)
@ -162,9 +158,7 @@ class BasePyTorchClassifier(BasePyTorchModel):
return self.class_names return self.class_names
def train( def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any:
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
""" """
Filter the training data and train a model to it. Train makes heavy use of the datakitchen Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data. for storing, saving, loading, and analyzing the data.
@ -191,18 +185,18 @@ class BasePyTorchClassifier(BasePyTorchModel):
dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count) dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
(dd["train_features"], (dd["train_features"], dd["train_labels"], dd["train_weights"]) = (
dd["train_labels"], dk.feature_pipeline.fit_transform(
dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"], dd["train_features"], dd["train_labels"], dd["train_weights"]
dd["train_labels"], )
dd["train_weights"]) )
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
(dd["test_features"], (dd["test_features"], dd["test_labels"], dd["test_weights"]) = (
dd["test_labels"], dk.feature_pipeline.transform(
dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"], dd["test_features"], dd["test_labels"], dd["test_weights"]
dd["test_labels"], )
dd["test_weights"]) )
logger.info( logger.info(
f"Training model on {len(dk.data_dictionary['train_features'].columns)} features" f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
@ -212,7 +206,9 @@ class BasePyTorchClassifier(BasePyTorchModel):
model = self.fit(dd, dk) model = self.fit(dd, dk)
end_time = time() end_time = time()
logger.info(f"-------------------- Done training {pair} " logger.info(
f"({end_time - start_time:.2f} secs) --------------------") f"-------------------- Done training {pair} "
f"({end_time - start_time:.2f} secs) --------------------"
)
return model return model

View File

@ -21,7 +21,7 @@ class BasePyTorchModel(IFreqaiModel, ABC):
super().__init__(config=kwargs["config"]) super().__init__(config=kwargs["config"])
self.dd.model_type = "pytorch" self.dd.model_type = "pytorch"
self.device = "cuda" if torch.cuda.is_available() else "cpu" self.device = "cuda" if torch.cuda.is_available() else "cpu"
test_size = self.freqai_info.get('data_split_parameters', {}).get('test_size') test_size = self.freqai_info.get("data_split_parameters", {}).get("test_size")
self.splits = ["train", "test"] if test_size != 0 else ["train"] self.splits = ["train", "test"] if test_size != 0 else ["train"]
self.window_size = self.freqai_info.get("conv_width", 1) self.window_size = self.freqai_info.get("conv_width", 1)

View File

@ -41,11 +41,11 @@ class BasePyTorchRegressor(BasePyTorchModel):
dk.data_dictionary["prediction_features"] = filtered_df dk.data_dictionary["prediction_features"] = filtered_df
dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform( dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
dk.data_dictionary["prediction_features"], outlier_check=True) dk.data_dictionary["prediction_features"], outlier_check=True
)
x = self.data_convertor.convert_x( x = self.data_convertor.convert_x(
dk.data_dictionary["prediction_features"], dk.data_dictionary["prediction_features"], device=self.device
device=self.device
) )
self.model.model.eval() self.model.model.eval()
y = self.model.model(x) y = self.model.model(x)
@ -59,9 +59,7 @@ class BasePyTorchRegressor(BasePyTorchModel):
dk.do_predict = outliers dk.do_predict = outliers
return (pred_df, dk.do_predict) return (pred_df, dk.do_predict)
def train( def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any:
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
""" """
Filter the training data and train a model to it. Train makes heavy use of the datakitchen Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data. for storing, saving, loading, and analyzing the data.
@ -91,19 +89,19 @@ class BasePyTorchRegressor(BasePyTorchModel):
dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"]) dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"]) dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
(dd["train_features"], (dd["train_features"], dd["train_labels"], dd["train_weights"]) = (
dd["train_labels"], dk.feature_pipeline.fit_transform(
dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"], dd["train_features"], dd["train_labels"], dd["train_weights"]
dd["train_labels"], )
dd["train_weights"]) )
dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"]) dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
(dd["test_features"], (dd["test_features"], dd["test_labels"], dd["test_weights"]) = (
dd["test_labels"], dk.feature_pipeline.transform(
dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"], dd["test_features"], dd["test_labels"], dd["test_weights"]
dd["test_labels"], )
dd["test_weights"]) )
dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"]) dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
logger.info( logger.info(
@ -114,7 +112,9 @@ class BasePyTorchRegressor(BasePyTorchModel):
model = self.fit(dd, dk) model = self.fit(dd, dk)
end_time = time() end_time = time()
logger.info(f"-------------------- Done training {pair} " logger.info(
f"({end_time - start_time:.2f} secs) --------------------") f"-------------------- Done training {pair} "
f"({end_time - start_time:.2f} secs) --------------------"
)
return model return model

View File

@ -20,9 +20,7 @@ class BaseRegressionModel(IFreqaiModel):
such as prediction_models/CatboostRegressor.py for guidance. such as prediction_models/CatboostRegressor.py for guidance.
""" """
def train( def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any:
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
""" """
Filter the training data and train a model to it. Train makes heavy use of the datakitchen Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data. for storing, saving, loading, and analyzing the data.
@ -46,8 +44,10 @@ class BaseRegressionModel(IFreqaiModel):
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d") start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d") end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
logger.info(f"-------------------- Training on data from {start_date} to " logger.info(
f"{end_date} --------------------") f"-------------------- Training on data from {start_date} to "
f"{end_date} --------------------"
)
# split data into train/test data. # split data into train/test data.
dd = dk.make_train_test_datasets(features_filtered, labels_filtered) dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live: if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
@ -55,19 +55,19 @@ class BaseRegressionModel(IFreqaiModel):
dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count) dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count) dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count)
(dd["train_features"], (dd["train_features"], dd["train_labels"], dd["train_weights"]) = (
dd["train_labels"], dk.feature_pipeline.fit_transform(
dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"], dd["train_features"], dd["train_labels"], dd["train_weights"]
dd["train_labels"], )
dd["train_weights"]) )
dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"]) dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
(dd["test_features"], (dd["test_features"], dd["test_labels"], dd["test_weights"]) = (
dd["test_labels"], dk.feature_pipeline.transform(
dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"], dd["test_features"], dd["test_labels"], dd["test_weights"]
dd["test_labels"], )
dd["test_weights"]) )
dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"]) dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
logger.info( logger.info(
@ -79,8 +79,10 @@ class BaseRegressionModel(IFreqaiModel):
end_time = time() end_time = time()
logger.info(f"-------------------- Done training {pair} " logger.info(
f"({end_time - start_time:.2f} secs) --------------------") f"-------------------- Done training {pair} "
f"({end_time - start_time:.2f} secs) --------------------"
)
return model return model
@ -102,7 +104,8 @@ class BaseRegressionModel(IFreqaiModel):
) )
dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform( dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
dk.data_dictionary["prediction_features"], outlier_check=True) dk.data_dictionary["prediction_features"], outlier_check=True
)
predictions = self.model.predict(dk.data_dictionary["prediction_features"]) predictions = self.model.predict(dk.data_dictionary["prediction_features"])
if self.CONV_WIDTH == 1: if self.CONV_WIDTH == 1:

View File

@ -9,7 +9,6 @@ from freqtrade.exceptions import OperationalException
class FreqaiMultiOutputClassifier(MultiOutputClassifier): class FreqaiMultiOutputClassifier(MultiOutputClassifier):
def fit(self, X, y, sample_weight=None, fit_params=None): def fit(self, X, y, sample_weight=None, fit_params=None):
"""Fit the model to data, separately for each output variable. """Fit the model to data, separately for each output variable.
Parameters Parameters
@ -48,18 +47,14 @@ class FreqaiMultiOutputClassifier(MultiOutputClassifier):
"multi-output regression but has only one." "multi-output regression but has only one."
) )
if sample_weight is not None and not has_fit_parameter( if sample_weight is not None and not has_fit_parameter(self.estimator, "sample_weight"):
self.estimator, "sample_weight"
):
raise ValueError("Underlying estimator does not support sample weights.") raise ValueError("Underlying estimator does not support sample weights.")
if not fit_params: if not fit_params:
fit_params = [None] * y.shape[1] fit_params = [None] * y.shape[1]
self.estimators_ = Parallel(n_jobs=self.n_jobs)( self.estimators_ = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_estimator)( delayed(_fit_estimator)(self.estimator, X, y[:, i], sample_weight, **fit_params[i])
self.estimator, X, y[:, i], sample_weight, **fit_params[i]
)
for i in range(y.shape[1]) for i in range(y.shape[1])
) )
@ -67,8 +62,9 @@ class FreqaiMultiOutputClassifier(MultiOutputClassifier):
for estimator in self.estimators_: for estimator in self.estimators_:
self.classes_.extend(estimator.classes_) self.classes_.extend(estimator.classes_)
if len(set(self.classes_)) != len(self.classes_): if len(set(self.classes_)) != len(self.classes_):
raise OperationalException(f"Class labels must be unique across targets: " raise OperationalException(
f"{self.classes_}") f"Class labels must be unique across targets: " f"{self.classes_}"
)
if hasattr(self.estimators_[0], "n_features_in_"): if hasattr(self.estimators_[0], "n_features_in_"):
self.n_features_in_ = self.estimators_[0].n_features_in_ self.n_features_in_ = self.estimators_[0].n_features_in_

View File

@ -4,7 +4,6 @@ from sklearn.utils.validation import has_fit_parameter
class FreqaiMultiOutputRegressor(MultiOutputRegressor): class FreqaiMultiOutputRegressor(MultiOutputRegressor):
def fit(self, X, y, sample_weight=None, fit_params=None): def fit(self, X, y, sample_weight=None, fit_params=None):
"""Fit the model to data, separately for each output variable. """Fit the model to data, separately for each output variable.
Parameters Parameters
@ -40,18 +39,14 @@ class FreqaiMultiOutputRegressor(MultiOutputRegressor):
"multi-output regression but has only one." "multi-output regression but has only one."
) )
if sample_weight is not None and not has_fit_parameter( if sample_weight is not None and not has_fit_parameter(self.estimator, "sample_weight"):
self.estimator, "sample_weight"
):
raise ValueError("Underlying estimator does not support sample weights.") raise ValueError("Underlying estimator does not support sample weights.")
if not fit_params: if not fit_params:
fit_params = [None] * y.shape[1] fit_params = [None] * y.shape[1]
self.estimators_ = Parallel(n_jobs=self.n_jobs)( self.estimators_ = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_estimator)( delayed(_fit_estimator)(self.estimator, X, y[:, i], sample_weight, **fit_params[i])
self.estimator, X, y[:, i], sample_weight, **fit_params[i]
)
for i in range(y.shape[1]) for i in range(y.shape[1])
) )

View File

@ -66,7 +66,6 @@ class FreqaiDataDrawer:
""" """
def __init__(self, full_path: Path, config: Config): def __init__(self, full_path: Path, config: Config):
self.config = config self.config = config
self.freqai_info = config.get("freqai", {}) self.freqai_info = config.get("freqai", {})
# dictionary holding all pair metadata necessary to load in from disk # dictionary holding all pair metadata necessary to load in from disk
@ -81,7 +80,8 @@ class FreqaiDataDrawer:
self.full_path = full_path self.full_path = full_path
self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl") self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl")
self.historic_predictions_bkp_path = Path( self.historic_predictions_bkp_path = Path(
self.full_path / "historic_predictions.backup.pkl") self.full_path / "historic_predictions.backup.pkl"
)
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json") self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
self.global_metadata_path = Path(self.full_path / "global_metadata.json") self.global_metadata_path = Path(self.full_path / "global_metadata.json")
self.metric_tracker_path = Path(self.full_path / "metric_tracker.json") self.metric_tracker_path = Path(self.full_path / "metric_tracker.json")
@ -96,9 +96,12 @@ class FreqaiDataDrawer:
self.metric_tracker_lock = threading.Lock() self.metric_tracker_lock = threading.Lock()
self.old_DBSCAN_eps: Dict[str, float] = {} self.old_DBSCAN_eps: Dict[str, float] = {}
self.empty_pair_dict: pair_info = { self.empty_pair_dict: pair_info = {
"model_filename": "", "trained_timestamp": 0, "model_filename": "",
"data_path": "", "extras": {}} "trained_timestamp": 0,
self.model_type = self.freqai_info.get('model_save_type', 'joblib') "data_path": "",
"extras": {},
}
self.model_type = self.freqai_info.get("model_save_type", "joblib")
def update_metric_tracker(self, metric: str, value: float, pair: str) -> None: def update_metric_tracker(self, metric: str, value: float, pair: str) -> None:
""" """
@ -109,11 +112,11 @@ class FreqaiDataDrawer:
if pair not in self.metric_tracker: if pair not in self.metric_tracker:
self.metric_tracker[pair] = {} self.metric_tracker[pair] = {}
if metric not in self.metric_tracker[pair]: if metric not in self.metric_tracker[pair]:
self.metric_tracker[pair][metric] = {'timestamp': [], 'value': []} self.metric_tracker[pair][metric] = {"timestamp": [], "value": []}
timestamp = int(datetime.now(timezone.utc).timestamp()) timestamp = int(datetime.now(timezone.utc).timestamp())
self.metric_tracker[pair][metric]['value'].append(value) self.metric_tracker[pair][metric]["value"].append(value)
self.metric_tracker[pair][metric]['timestamp'].append(timestamp) self.metric_tracker[pair][metric]["timestamp"].append(timestamp)
def collect_metrics(self, time_spent: float, pair: str): def collect_metrics(self, time_spent: float, pair: str):
""" """
@ -121,10 +124,10 @@ class FreqaiDataDrawer:
""" """
load1, load5, load15 = psutil.getloadavg() load1, load5, load15 = psutil.getloadavg()
cpus = psutil.cpu_count() cpus = psutil.cpu_count()
self.update_metric_tracker('train_time', time_spent, pair) self.update_metric_tracker("train_time", time_spent, pair)
self.update_metric_tracker('cpu_load1min', load1 / cpus, pair) self.update_metric_tracker("cpu_load1min", load1 / cpus, pair)
self.update_metric_tracker('cpu_load5min', load5 / cpus, pair) self.update_metric_tracker("cpu_load5min", load5 / cpus, pair)
self.update_metric_tracker('cpu_load15min', load15 / cpus, pair) self.update_metric_tracker("cpu_load15min", load15 / cpus, pair)
def load_global_metadata_from_disk(self): def load_global_metadata_from_disk(self):
""" """
@ -155,7 +158,7 @@ class FreqaiDataDrawer:
Tries to load an existing metrics dictionary if the user Tries to load an existing metrics dictionary if the user
wants to collect metrics. wants to collect metrics.
""" """
if self.freqai_info.get('write_metrics_to_disk', False): if self.freqai_info.get("write_metrics_to_disk", False):
exists = self.metric_tracker_path.is_file() exists = self.metric_tracker_path.is_file()
if exists: if exists:
with self.metric_tracker_path.open("r") as fp: with self.metric_tracker_path.open("r") as fp:
@ -181,10 +184,11 @@ class FreqaiDataDrawer:
) )
except EOFError: except EOFError:
logger.warning( logger.warning(
'Historical prediction file was corrupted. Trying to load backup file.') "Historical prediction file was corrupted. Trying to load backup file."
)
with self.historic_predictions_bkp_path.open("rb") as fp: with self.historic_predictions_bkp_path.open("rb") as fp:
self.historic_predictions = cloudpickle.load(fp) self.historic_predictions = cloudpickle.load(fp)
logger.warning('FreqAI successfully loaded the backup historical predictions file.') logger.warning("FreqAI successfully loaded the backup historical predictions file.")
else: else:
logger.info("Could not find existing historic_predictions, starting from scratch") logger.info("Could not find existing historic_predictions, starting from scratch")
@ -206,27 +210,33 @@ class FreqaiDataDrawer:
Save metric tracker of all pair metrics collected. Save metric tracker of all pair metrics collected.
""" """
with self.save_lock: with self.save_lock:
with self.metric_tracker_path.open('w') as fp: with self.metric_tracker_path.open("w") as fp:
rapidjson.dump(self.metric_tracker, fp, default=self.np_encoder, rapidjson.dump(
number_mode=rapidjson.NM_NATIVE) self.metric_tracker,
fp,
default=self.np_encoder,
number_mode=rapidjson.NM_NATIVE,
)
def save_drawer_to_disk(self) -> None: def save_drawer_to_disk(self) -> None:
""" """
Save data drawer full of all pair model metadata in present model folder. Save data drawer full of all pair model metadata in present model folder.
""" """
with self.save_lock: with self.save_lock:
with self.pair_dictionary_path.open('w') as fp: with self.pair_dictionary_path.open("w") as fp:
rapidjson.dump(self.pair_dict, fp, default=self.np_encoder, rapidjson.dump(
number_mode=rapidjson.NM_NATIVE) self.pair_dict, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE
)
def save_global_metadata_to_disk(self, metadata: Dict[str, Any]): def save_global_metadata_to_disk(self, metadata: Dict[str, Any]):
""" """
Save global metadata json to disk Save global metadata json to disk
""" """
with self.save_lock: with self.save_lock:
with self.global_metadata_path.open('w') as fp: with self.global_metadata_path.open("w") as fp:
rapidjson.dump(metadata, fp, default=self.np_encoder, rapidjson.dump(
number_mode=rapidjson.NM_NATIVE) metadata, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE
)
def np_encoder(self, object): def np_encoder(self, object):
if isinstance(object, np.generic): if isinstance(object, np.generic):
@ -264,9 +274,7 @@ class FreqaiDataDrawer:
return return
def set_initial_return_values( def set_initial_return_values(
self, pair: str, self, pair: str, pred_df: DataFrame, dataframe: DataFrame
pred_df: DataFrame,
dataframe: DataFrame
) -> None: ) -> None:
""" """
Set the initial return values to the historical predictions dataframe. This avoids needing Set the initial return values to the historical predictions dataframe. This avoids needing
@ -285,7 +293,7 @@ class FreqaiDataDrawer:
new_pred["date_pred"] = dataframe["date"] new_pred["date_pred"] = dataframe["date"]
# set everything to nan except date_pred # set everything to nan except date_pred
columns_to_nan = new_pred.columns.difference(['date_pred', 'date']) columns_to_nan = new_pred.columns.difference(["date_pred", "date"])
new_pred[columns_to_nan] = None new_pred[columns_to_nan] = None
hist_preds = self.historic_predictions[pair].copy() hist_preds = self.historic_predictions[pair].copy()
@ -296,14 +304,15 @@ class FreqaiDataDrawer:
# find the closest common date between new_pred and historic predictions # find the closest common date between new_pred and historic predictions
# and cut off the new_pred dataframe at that date # and cut off the new_pred dataframe at that date
common_dates = pd.merge(new_pred, hist_preds, common_dates = pd.merge(new_pred, hist_preds, on="date_pred", how="inner")
on="date_pred", how="inner")
if len(common_dates.index) > 0: if len(common_dates.index) > 0:
new_pred = new_pred.iloc[len(common_dates) :] new_pred = new_pred.iloc[len(common_dates) :]
else: else:
logger.warning("No common dates found between new predictions and historic " logger.warning(
"No common dates found between new predictions and historic "
"predictions. You likely left your FreqAI instance offline " "predictions. You likely left your FreqAI instance offline "
f"for more than {len(dataframe.index)} candles.") f"for more than {len(dataframe.index)} candles."
)
# Pandas warns that its keeping dtypes of non NaN columns... # Pandas warns that its keeping dtypes of non NaN columns...
# yea we know and we already want that behavior. Ignoring. # yea we know and we already want that behavior. Ignoring.
@ -311,21 +320,22 @@ class FreqaiDataDrawer:
warnings.filterwarnings("ignore", category=FutureWarning) warnings.filterwarnings("ignore", category=FutureWarning)
# reindex new_pred columns to match the historic predictions dataframe # reindex new_pred columns to match the historic predictions dataframe
new_pred_reindexed = new_pred.reindex(columns=hist_preds.columns) new_pred_reindexed = new_pred.reindex(columns=hist_preds.columns)
df_concat = pd.concat( df_concat = pd.concat([hist_preds, new_pred_reindexed], ignore_index=True)
[hist_preds, new_pred_reindexed],
ignore_index=True
)
# any missing values will get zeroed out so users can see the exact # any missing values will get zeroed out so users can see the exact
# downtime in FreqUI # downtime in FreqUI
df_concat = df_concat.fillna(0) df_concat = df_concat.fillna(0)
self.historic_predictions[pair] = df_concat self.historic_predictions[pair] = df_concat
self.model_return_values[pair] = df_concat.tail( self.model_return_values[pair] = df_concat.tail(len(dataframe.index)).reset_index(drop=True)
len(dataframe.index)).reset_index(drop=True)
def append_model_predictions(self, pair: str, predictions: DataFrame, def append_model_predictions(
self,
pair: str,
predictions: DataFrame,
do_preds: NDArray[np.int_], do_preds: NDArray[np.int_],
dk: FreqaiDataKitchen, strat_df: DataFrame) -> None: dk: FreqaiDataKitchen,
strat_df: DataFrame,
) -> None:
""" """
Append model predictions to historic predictions dataframe, then set the Append model predictions to historic predictions dataframe, then set the
strategy return dataframe to the tail of the historic predictions. The length of strategy return dataframe to the tail of the historic predictions. The length of
@ -338,15 +348,9 @@ class FreqaiDataDrawer:
index = self.historic_predictions[pair].index[-1:] index = self.historic_predictions[pair].index[-1:]
columns = self.historic_predictions[pair].columns columns = self.historic_predictions[pair].columns
zeros_df = pd.DataFrame( zeros_df = pd.DataFrame(np.zeros((1, len(columns))), index=index, columns=columns)
np.zeros((1, len(columns))),
index=index,
columns=columns
)
self.historic_predictions[pair] = pd.concat( self.historic_predictions[pair] = pd.concat(
[self.historic_predictions[pair], zeros_df], [self.historic_predictions[pair], zeros_df], ignore_index=True, axis=0
ignore_index=True,
axis=0
) )
df = self.historic_predictions[pair] df = self.historic_predictions[pair]
@ -370,8 +374,8 @@ class FreqaiDataDrawer:
df.iloc[-1, DI_values_loc] = dk.DI_values[-1] df.iloc[-1, DI_values_loc] = dk.DI_values[-1]
# extra values the user added within custom prediction model # extra values the user added within custom prediction model
if dk.data['extra_returns_per_train']: if dk.data["extra_returns_per_train"]:
rets = dk.data['extra_returns_per_train'] rets = dk.data["extra_returns_per_train"]
for return_str in rets: for return_str in rets:
return_loc = df.columns.get_loc(return_str) return_loc = df.columns.get_loc(return_str)
df.iloc[-1, return_loc] = rets[return_str] df.iloc[-1, return_loc] = rets[return_str]
@ -392,7 +396,8 @@ class FreqaiDataDrawer:
self.model_return_values[pair] = df.tail(len_df).reset_index(drop=True) self.model_return_values[pair] = df.tail(len_df).reset_index(drop=True)
def attach_return_values_to_return_dataframe( def attach_return_values_to_return_dataframe(
self, pair: str, dataframe: DataFrame) -> DataFrame: self, pair: str, dataframe: DataFrame
) -> DataFrame:
""" """
Attach the return values to the strat dataframe Attach the return values to the strat dataframe
:param dataframe: DataFrame = strategy dataframe :param dataframe: DataFrame = strategy dataframe
@ -423,15 +428,14 @@ class FreqaiDataDrawer:
if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0: if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
dataframe["DI_values"] = 0 dataframe["DI_values"] = 0
if dk.data['extra_returns_per_train']: if dk.data["extra_returns_per_train"]:
rets = dk.data['extra_returns_per_train'] rets = dk.data["extra_returns_per_train"]
for return_str in rets: for return_str in rets:
dataframe[return_str] = 0 dataframe[return_str] = 0
dk.return_dataframe = dataframe dk.return_dataframe = dataframe
def purge_old_models(self) -> None: def purge_old_models(self) -> None:
num_keep = self.freqai_info["purge_old_models"] num_keep = self.freqai_info["purge_old_models"]
if not num_keep: if not num_keep:
return return
@ -508,10 +512,10 @@ class FreqaiDataDrawer:
save_path = Path(dk.data_path) save_path = Path(dk.data_path)
# Save the trained model # Save the trained model
if self.model_type == 'joblib': if self.model_type == "joblib":
with (save_path / f"{dk.model_filename}_model.joblib").open("wb") as fp: with (save_path / f"{dk.model_filename}_model.joblib").open("wb") as fp:
cloudpickle.dump(model, fp) cloudpickle.dump(model, fp)
elif self.model_type == 'keras': elif self.model_type == "keras":
model.save(save_path / f"{dk.model_filename}_model.h5") model.save(save_path / f"{dk.model_filename}_model.h5")
elif self.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]: elif self.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]:
model.save(save_path / f"{dk.model_filename}_model.zip") model.save(save_path / f"{dk.model_filename}_model.zip")
@ -596,16 +600,18 @@ class FreqaiDataDrawer:
# try to access model in memory instead of loading object from disk to save time # try to access model in memory instead of loading object from disk to save time
if dk.live and coin in self.model_dictionary: if dk.live and coin in self.model_dictionary:
model = self.model_dictionary[coin] model = self.model_dictionary[coin]
elif self.model_type == 'joblib': elif self.model_type == "joblib":
with (dk.data_path / f"{dk.model_filename}_model.joblib").open("rb") as fp: with (dk.data_path / f"{dk.model_filename}_model.joblib").open("rb") as fp:
model = cloudpickle.load(fp) model = cloudpickle.load(fp)
elif 'stable_baselines' in self.model_type or 'sb3_contrib' == self.model_type: elif "stable_baselines" in self.model_type or "sb3_contrib" == self.model_type:
mod = importlib.import_module( mod = importlib.import_module(
self.model_type, self.freqai_info['rl_config']['model_type']) self.model_type, self.freqai_info["rl_config"]["model_type"]
MODELCLASS = getattr(mod, self.freqai_info['rl_config']['model_type']) )
MODELCLASS = getattr(mod, self.freqai_info["rl_config"]["model_type"])
model = MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model") model = MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model")
elif self.model_type == 'pytorch': elif self.model_type == "pytorch":
import torch import torch
zip = torch.load(dk.data_path / f"{dk.model_filename}_model.zip") zip = torch.load(dk.data_path / f"{dk.model_filename}_model.zip")
model = zip["pytrainer"] model = zip["pytrainer"]
model = model.load_from_checkpoint(zip) model = model.load_from_checkpoint(zip)
@ -639,23 +645,18 @@ class FreqaiDataDrawer:
df_dp = strategy.dp.get_pair_dataframe(pair, tf) df_dp = strategy.dp.get_pair_dataframe(pair, tf)
if len(df_dp.index) == 0: if len(df_dp.index) == 0:
continue continue
if str(hist_df.iloc[-1]["date"]) == str( if str(hist_df.iloc[-1]["date"]) == str(df_dp.iloc[-1:]["date"].iloc[-1]):
df_dp.iloc[-1:]["date"].iloc[-1]
):
continue continue
try: try:
index = ( index = df_dp.loc[df_dp["date"] == hist_df.iloc[-1]["date"]].index[0] + 1
df_dp.loc[
df_dp["date"] == hist_df.iloc[-1]["date"]
].index[0]
+ 1
)
except IndexError: except IndexError:
if hist_df.iloc[-1]['date'] < df_dp['date'].iloc[0]: if hist_df.iloc[-1]["date"] < df_dp["date"].iloc[0]:
raise OperationalException("In memory historical data is older than " raise OperationalException(
"In memory historical data is older than "
f"oldest DataProvider candle for {pair} on " f"oldest DataProvider candle for {pair} on "
f"timeframe {tf}") f"timeframe {tf}"
)
else: else:
index = -1 index = -1
logger.warning( logger.warning(
@ -677,7 +678,7 @@ class FreqaiDataDrawer:
axis=0, axis=0,
) )
self.current_candle = history_data[dk.pair][self.config['timeframe']].iloc[-1]['date'] self.current_candle = history_data[dk.pair][self.config["timeframe"]].iloc[-1]["date"]
def load_all_pair_histories(self, timerange: TimeRange, dk: FreqaiDataKitchen) -> None: def load_all_pair_histories(self, timerange: TimeRange, dk: FreqaiDataKitchen) -> None:
""" """
@ -715,13 +716,12 @@ class FreqaiDataDrawer:
corr_dataframes: Dict[Any, Any] = {} corr_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {} base_dataframes: Dict[Any, Any] = {}
historic_data = self.historic_data historic_data = self.historic_data
pairs = self.freqai_info["feature_parameters"].get( pairs = self.freqai_info["feature_parameters"].get("include_corr_pairlist", [])
"include_corr_pairlist", []
)
for tf in self.freqai_info["feature_parameters"].get("include_timeframes"): for tf in self.freqai_info["feature_parameters"].get("include_timeframes"):
base_dataframes[tf] = dk.slice_dataframe( base_dataframes[tf] = dk.slice_dataframe(
timerange, historic_data[pair][tf]).reset_index(drop=True) timerange, historic_data[pair][tf]
).reset_index(drop=True)
if pairs: if pairs:
for p in pairs: for p in pairs:
if pair in p: if pair in p:
@ -741,8 +741,8 @@ class FreqaiDataDrawer:
""" """
if not self.historic_predictions_path.is_file(): if not self.historic_predictions_path.is_file():
raise OperationalException( raise OperationalException(
'Historic predictions not found. Historic predictions data is required ' "Historic predictions not found. Historic predictions data is required "
'to run backtest with the freqai-backtest-live-models option ' "to run backtest with the freqai-backtest-live-models option "
) )
self.load_historic_predictions_from_disk() self.load_historic_predictions_from_disk()
@ -758,6 +758,6 @@ class FreqaiDataDrawer:
# add 1 day to string timerange to ensure BT module will load all dataframe data # add 1 day to string timerange to ensure BT module will load all dataframe data
end_date = end_date + timedelta(days=1) end_date = end_date + timedelta(days=1)
backtesting_timerange = TimeRange( backtesting_timerange = TimeRange(
'date', 'date', int(start_date.timestamp()), int(end_date.timestamp()) "date", "date", int(start_date.timestamp()), int(end_date.timestamp())
) )
return backtesting_timerange return backtesting_timerange

View File

@ -24,7 +24,7 @@ from freqtrade.strategy import merge_informative_pair
from freqtrade.strategy.interface import IStrategy from freqtrade.strategy.interface import IStrategy
pd.set_option('future.no_silent_downcasting', True) pd.set_option("future.no_silent_downcasting", True)
SECONDS_IN_DAY = 86400 SECONDS_IN_DAY = 86400
SECONDS_IN_HOUR = 3600 SECONDS_IN_HOUR = 3600
@ -98,7 +98,7 @@ class FreqaiDataKitchen:
config["freqai"]["backtest_period_days"], config["freqai"]["backtest_period_days"],
) )
self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {}) self.data["extra_returns_per_train"] = self.freqai_config.get("extra_returns_per_train", {})
if not self.freqai_config.get("data_kitchen_thread_count", 0): if not self.freqai_config.get("data_kitchen_thread_count", 0):
self.thread_count = max(int(psutil.cpu_count() * 2 - 2), 1) self.thread_count = max(int(psutil.cpu_count() * 2 - 2), 1)
else: else:
@ -120,8 +120,7 @@ class FreqaiDataKitchen:
""" """
self.full_path = self.get_full_models_path(self.config) self.full_path = self.get_full_models_path(self.config)
self.data_path = Path( self.data_path = Path(
self.full_path self.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp}"
/ f"sub-train-{pair.split('/')[0]}_{trained_timestamp}"
) )
return return
@ -138,8 +137,8 @@ class FreqaiDataKitchen:
""" """
feat_dict = self.freqai_config["feature_parameters"] feat_dict = self.freqai_config["feature_parameters"]
if 'shuffle' not in self.freqai_config['data_split_parameters']: if "shuffle" not in self.freqai_config["data_split_parameters"]:
self.freqai_config["data_split_parameters"].update({'shuffle': False}) self.freqai_config["data_split_parameters"].update({"shuffle": False})
weights: npt.ArrayLike weights: npt.ArrayLike
if feat_dict.get("weight_factor", 0) > 0: if feat_dict.get("weight_factor", 0) > 0:
@ -147,7 +146,7 @@ class FreqaiDataKitchen:
else: else:
weights = np.ones(len(filtered_dataframe)) weights = np.ones(len(filtered_dataframe))
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0: if self.freqai_config.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
( (
train_features, train_features,
test_features, test_features,
@ -172,26 +171,43 @@ class FreqaiDataKitchen:
if feat_dict["shuffle_after_split"]: if feat_dict["shuffle_after_split"]:
rint1 = random.randint(0, 100) rint1 = random.randint(0, 100)
rint2 = random.randint(0, 100) rint2 = random.randint(0, 100)
train_features = train_features.sample( train_features = train_features.sample(frac=1, random_state=rint1).reset_index(
frac=1, random_state=rint1).reset_index(drop=True) drop=True
)
train_labels = train_labels.sample(frac=1, random_state=rint1).reset_index(drop=True) train_labels = train_labels.sample(frac=1, random_state=rint1).reset_index(drop=True)
train_weights = pd.DataFrame(train_weights).sample( train_weights = (
frac=1, random_state=rint1).reset_index(drop=True).to_numpy()[:, 0] pd.DataFrame(train_weights)
.sample(frac=1, random_state=rint1)
.reset_index(drop=True)
.to_numpy()[:, 0]
)
test_features = test_features.sample(frac=1, random_state=rint2).reset_index(drop=True) test_features = test_features.sample(frac=1, random_state=rint2).reset_index(drop=True)
test_labels = test_labels.sample(frac=1, random_state=rint2).reset_index(drop=True) test_labels = test_labels.sample(frac=1, random_state=rint2).reset_index(drop=True)
test_weights = pd.DataFrame(test_weights).sample( test_weights = (
frac=1, random_state=rint2).reset_index(drop=True).to_numpy()[:, 0] pd.DataFrame(test_weights)
.sample(frac=1, random_state=rint2)
.reset_index(drop=True)
.to_numpy()[:, 0]
)
# Simplest way to reverse the order of training and test data: # Simplest way to reverse the order of training and test data:
if self.freqai_config['feature_parameters'].get('reverse_train_test_order', False): if self.freqai_config["feature_parameters"].get("reverse_train_test_order", False):
return self.build_data_dictionary( return self.build_data_dictionary(
test_features, train_features, test_labels, test_features,
train_labels, test_weights, train_weights train_features,
test_labels,
train_labels,
test_weights,
train_weights,
) )
else: else:
return self.build_data_dictionary( return self.build_data_dictionary(
train_features, test_features, train_labels, train_features,
test_labels, train_weights, test_weights test_features,
train_labels,
test_labels,
train_weights,
test_weights,
) )
def filter_features( def filter_features(
@ -224,26 +240,23 @@ class FreqaiDataKitchen:
drop_index = pd.isnull(filtered_df).any(axis=1) # get the rows that have NaNs, drop_index = pd.isnull(filtered_df).any(axis=1) # get the rows that have NaNs,
drop_index = drop_index.replace(True, 1).replace(False, 0).infer_objects(copy=False) drop_index = drop_index.replace(True, 1).replace(False, 0).infer_objects(copy=False)
if (training_filter): if training_filter:
# we don't care about total row number (total no. datapoints) in training, we only care # we don't care about total row number (total no. datapoints) in training, we only care
# about removing any row with NaNs # about removing any row with NaNs
# if labels has multiple columns (user wants to train multiple modelEs), we detect here # if labels has multiple columns (user wants to train multiple modelEs), we detect here
labels = unfiltered_df.filter(label_list, axis=1) labels = unfiltered_df.filter(label_list, axis=1)
drop_index_labels = pd.isnull(labels).any(axis=1) drop_index_labels = pd.isnull(labels).any(axis=1)
drop_index_labels = drop_index_labels.replace( drop_index_labels = (
True, 1 drop_index_labels.replace(True, 1).replace(False, 0).infer_objects(copy=False)
).replace(False, 0).infer_objects(copy=False) )
dates = unfiltered_df['date'] dates = unfiltered_df["date"]
filtered_df = filtered_df[ filtered_df = filtered_df[
(drop_index == 0) & (drop_index_labels == 0) (drop_index == 0) & (drop_index_labels == 0)
] # dropping values ] # dropping values
labels = labels[ labels = labels[
(drop_index == 0) & (drop_index_labels == 0) (drop_index == 0) & (drop_index_labels == 0)
] # assuming the labels depend entirely on the dataframe here. ] # assuming the labels depend entirely on the dataframe here.
self.train_dates = dates[ self.train_dates = dates[(drop_index == 0) & (drop_index_labels == 0)]
(drop_index == 0) & (drop_index_labels == 0)
]
logger.info( logger.info(
f"{self.pair}: dropped {len(unfiltered_df) - len(filtered_df)} training points" f"{self.pair}: dropped {len(unfiltered_df) - len(filtered_df)} training points"
f" due to NaNs in populated dataset {len(unfiltered_df)}." f" due to NaNs in populated dataset {len(unfiltered_df)}."
@ -266,7 +279,6 @@ class FreqaiDataKitchen:
self.data["filter_drop_index_training"] = drop_index self.data["filter_drop_index_training"] = drop_index
else: else:
# we are backtesting so we need to preserve row number to send back to strategy, # we are backtesting so we need to preserve row number to send back to strategy,
# so now we use do_predict to avoid any prediction based on a NaN # so now we use do_predict to avoid any prediction based on a NaN
drop_index = pd.isnull(filtered_df).any(axis=1) drop_index = pd.isnull(filtered_df).any(axis=1)
@ -295,7 +307,6 @@ class FreqaiDataKitchen:
train_weights: Any, train_weights: Any,
test_weights: Any, test_weights: Any,
) -> Dict: ) -> Dict:
self.data_dictionary = { self.data_dictionary = {
"train_features": train_df, "train_features": train_df,
"test_features": test_df, "test_features": test_df,
@ -303,7 +314,7 @@ class FreqaiDataKitchen:
"test_labels": test_labels, "test_labels": test_labels,
"train_weights": train_weights, "train_weights": train_weights,
"test_weights": test_weights, "test_weights": test_weights,
"train_dates": self.train_dates "train_dates": self.train_dates,
} }
return self.data_dictionary return self.data_dictionary
@ -330,9 +341,7 @@ class FreqaiDataKitchen:
full_timerange = TimeRange.parse_timerange(tr) full_timerange = TimeRange.parse_timerange(tr)
config_timerange = TimeRange.parse_timerange(self.config["timerange"]) config_timerange = TimeRange.parse_timerange(self.config["timerange"])
if config_timerange.stopts == 0: if config_timerange.stopts == 0:
config_timerange.stopts = int( config_timerange.stopts = int(datetime.now(tz=timezone.utc).timestamp())
datetime.now(tz=timezone.utc).timestamp()
)
timerange_train = copy.deepcopy(full_timerange) timerange_train = copy.deepcopy(full_timerange)
timerange_backtest = copy.deepcopy(full_timerange) timerange_backtest = copy.deepcopy(full_timerange)
@ -412,9 +421,9 @@ class FreqaiDataKitchen:
weights = np.exp(-np.arange(num_weights) / (wfactor * num_weights))[::-1] weights = np.exp(-np.arange(num_weights) / (wfactor * num_weights))[::-1]
return weights return weights
def get_predictions_to_append(self, predictions: DataFrame, def get_predictions_to_append(
do_predict: npt.ArrayLike, self, predictions: DataFrame, do_predict: npt.ArrayLike, dataframe_backtest: DataFrame
dataframe_backtest: DataFrame) -> DataFrame: ) -> DataFrame:
""" """
Get backtest prediction from current backtest period Get backtest prediction from current backtest period
""" """
@ -459,18 +468,18 @@ class FreqaiDataKitchen:
Back fill values to before the backtesting range so that the dataframe matches size Back fill values to before the backtesting range so that the dataframe matches size
when it goes back to the strategy. These rows are not included in the backtest. when it goes back to the strategy. These rows are not included in the backtest.
""" """
to_keep = [col for col in dataframe.columns if to_keep = [
not col.startswith("&") and not col.startswith("%%")] col for col in dataframe.columns if not col.startswith("&") and not col.startswith("%%")
self.return_dataframe = pd.merge(dataframe[to_keep], ]
self.full_df, how='left', on='date') self.return_dataframe = pd.merge(dataframe[to_keep], self.full_df, how="left", on="date")
self.return_dataframe[self.full_df.columns] = ( self.return_dataframe[self.full_df.columns] = self.return_dataframe[
self.return_dataframe[self.full_df.columns].fillna(value=0)) self.full_df.columns
].fillna(value=0)
self.full_df = DataFrame() self.full_df = DataFrame()
return return
def create_fulltimerange(self, backtest_tr: str, backtest_period_days: int) -> str: def create_fulltimerange(self, backtest_tr: str, backtest_period_days: int) -> str:
if not isinstance(backtest_period_days, int): if not isinstance(backtest_period_days, int):
raise OperationalException("backtest_period_days must be an integer") raise OperationalException("backtest_period_days must be an integer")
@ -484,9 +493,11 @@ class FreqaiDataKitchen:
# it does not. accommodating these kinds of edge cases just to allow open-ended # it does not. accommodating these kinds of edge cases just to allow open-ended
# timerange is not high enough priority to warrant the effort. It is safer for now # timerange is not high enough priority to warrant the effort. It is safer for now
# to simply ask user to add their end date # to simply ask user to add their end date
raise OperationalException("FreqAI backtesting does not allow open ended timeranges. " raise OperationalException(
"FreqAI backtesting does not allow open ended timeranges. "
"Please indicate the end date of your desired backtesting. " "Please indicate the end date of your desired backtesting. "
"timerange.") "timerange."
)
# backtest_timerange.stopts = int( # backtest_timerange.stopts = int(
# datetime.now(tz=timezone.utc).timestamp() # datetime.now(tz=timezone.utc).timestamp()
# ) # )
@ -525,7 +536,6 @@ class FreqaiDataKitchen:
def check_if_new_training_required( def check_if_new_training_required(
self, trained_timestamp: int self, trained_timestamp: int
) -> Tuple[bool, TimeRange, TimeRange]: ) -> Tuple[bool, TimeRange, TimeRange]:
time = datetime.now(tz=timezone.utc).timestamp() time = datetime.now(tz=timezone.utc).timestamp()
trained_timerange = TimeRange() trained_timerange = TimeRange()
data_load_timerange = TimeRange() data_load_timerange = TimeRange()
@ -541,7 +551,7 @@ class FreqaiDataKitchen:
# We notice that users like to use exotic indicators where # We notice that users like to use exotic indicators where
# they do not know the required timeperiod. Here we include a factor # they do not know the required timeperiod. Here we include a factor
# of safety by multiplying the user considered "max" by 2. # of safety by multiplying the user considered "max" by 2.
max_period = self.config.get('startup_candle_count', 20) * 2 max_period = self.config.get("startup_candle_count", 20) * 2
additional_seconds = max_period * max_tf_seconds additional_seconds = max_period * max_tf_seconds
if trained_timestamp != 0: if trained_timestamp != 0:
@ -578,17 +588,12 @@ class FreqaiDataKitchen:
return retrain, trained_timerange, data_load_timerange return retrain, trained_timerange, data_load_timerange
def set_new_model_names(self, pair: str, timestamp_id: int): def set_new_model_names(self, pair: str, timestamp_id: int):
coin, _ = pair.split("/") coin, _ = pair.split("/")
self.data_path = Path( self.data_path = Path(self.full_path / f"sub-train-{pair.split('/')[0]}_{timestamp_id}")
self.full_path
/ f"sub-train-{pair.split('/')[0]}_{timestamp_id}"
)
self.model_filename = f"cb_{coin.lower()}_{timestamp_id}" self.model_filename = f"cb_{coin.lower()}_{timestamp_id}"
def set_all_pairs(self) -> None: def set_all_pairs(self) -> None:
self.all_pairs = copy.deepcopy( self.all_pairs = copy.deepcopy(
self.freqai_config["feature_parameters"].get("include_corr_pairlist", []) self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
) )
@ -597,8 +602,7 @@ class FreqaiDataKitchen:
self.all_pairs.append(pair) self.all_pairs.append(pair)
def extract_corr_pair_columns_from_populated_indicators( def extract_corr_pair_columns_from_populated_indicators(
self, self, dataframe: DataFrame
dataframe: DataFrame
) -> Dict[str, DataFrame]: ) -> Dict[str, DataFrame]:
""" """
Find the columns of the dataframe corresponding to the corr_pairlist, save them Find the columns of the dataframe corresponding to the corr_pairlist, save them
@ -612,19 +616,20 @@ class FreqaiDataKitchen:
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", []) pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
for pair in pairs: for pair in pairs:
pair = pair.replace(':', '') # lightgbm does not like colons pair = pair.replace(":", "") # lightgbm does not like colons
pair_cols = [col for col in dataframe.columns if col.startswith("%") pair_cols = [
and f"{pair}_" in col] col for col in dataframe.columns if col.startswith("%") and f"{pair}_" in col
]
if pair_cols: if pair_cols:
pair_cols.insert(0, 'date') pair_cols.insert(0, "date")
corr_dataframes[pair] = dataframe.filter(pair_cols, axis=1) corr_dataframes[pair] = dataframe.filter(pair_cols, axis=1)
return corr_dataframes return corr_dataframes
def attach_corr_pair_columns(self, dataframe: DataFrame, def attach_corr_pair_columns(
corr_dataframes: Dict[str, DataFrame], self, dataframe: DataFrame, corr_dataframes: Dict[str, DataFrame], current_pair: str
current_pair: str) -> DataFrame: ) -> DataFrame:
""" """
Attach the existing corr_pair dataframes to the current pair dataframe before training Attach the existing corr_pair dataframes to the current pair dataframe before training
@ -636,21 +641,23 @@ class FreqaiDataKitchen:
ready for training ready for training
""" """
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", []) pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
current_pair = current_pair.replace(':', '') current_pair = current_pair.replace(":", "")
for pair in pairs: for pair in pairs:
pair = pair.replace(':', '') # lightgbm does not work with colons pair = pair.replace(":", "") # lightgbm does not work with colons
if current_pair != pair: if current_pair != pair:
dataframe = dataframe.merge(corr_dataframes[pair], how='left', on='date') dataframe = dataframe.merge(corr_dataframes[pair], how="left", on="date")
return dataframe return dataframe
def get_pair_data_for_features(self, def get_pair_data_for_features(
self,
pair: str, pair: str,
tf: str, tf: str,
strategy: IStrategy, strategy: IStrategy,
corr_dataframes: dict = {}, corr_dataframes: dict = {},
base_dataframes: dict = {}, base_dataframes: dict = {},
is_corr_pairs: bool = False) -> DataFrame: is_corr_pairs: bool = False,
) -> DataFrame:
""" """
Get the data for the pair. If it's not in the dictionary, get it from the data provider Get the data for the pair. If it's not in the dictionary, get it from the data provider
:param pair: str = pair to get data for :param pair: str = pair to get data for
@ -678,8 +685,9 @@ class FreqaiDataKitchen:
dataframe = strategy.dp.get_pair_dataframe(pair=pair, timeframe=tf) dataframe = strategy.dp.get_pair_dataframe(pair=pair, timeframe=tf)
return dataframe return dataframe
def merge_features(self, df_main: DataFrame, df_to_merge: DataFrame, def merge_features(
tf: str, timeframe_inf: str, suffix: str) -> DataFrame: self, df_main: DataFrame, df_to_merge: DataFrame, tf: str, timeframe_inf: str, suffix: str
) -> DataFrame:
""" """
Merge the features of the dataframe and remove HLCV and date added columns Merge the features of the dataframe and remove HLCV and date added columns
:param df_main: DataFrame = main dataframe :param df_main: DataFrame = main dataframe
@ -689,17 +697,30 @@ class FreqaiDataKitchen:
:param suffix: str = suffix to add to the columns of the dataframe to merge :param suffix: str = suffix to add to the columns of the dataframe to merge
:return: dataframe = merged dataframe :return: dataframe = merged dataframe
""" """
dataframe = merge_informative_pair(df_main, df_to_merge, tf, timeframe_inf=timeframe_inf, dataframe = merge_informative_pair(
append_timeframe=False, suffix=suffix, ffill=True) df_main,
df_to_merge,
tf,
timeframe_inf=timeframe_inf,
append_timeframe=False,
suffix=suffix,
ffill=True,
)
skip_columns = [ skip_columns = [
(f"{s}_{suffix}") for s in ["date", "open", "high", "low", "close", "volume"] (f"{s}_{suffix}") for s in ["date", "open", "high", "low", "close", "volume"]
] ]
dataframe = dataframe.drop(columns=skip_columns) dataframe = dataframe.drop(columns=skip_columns)
return dataframe return dataframe
def populate_features(self, dataframe: DataFrame, pair: str, strategy: IStrategy, def populate_features(
corr_dataframes: dict, base_dataframes: dict, self,
is_corr_pairs: bool = False) -> DataFrame: dataframe: DataFrame,
pair: str,
strategy: IStrategy,
corr_dataframes: dict,
base_dataframes: dict,
is_corr_pairs: bool = False,
) -> DataFrame:
""" """
Use the user defined strategy functions for populating features Use the user defined strategy functions for populating features
:param dataframe: DataFrame = dataframe to populate :param dataframe: DataFrame = dataframe to populate
@ -715,19 +736,22 @@ class FreqaiDataKitchen:
for tf in tfs: for tf in tfs:
metadata = {"pair": pair, "tf": tf} metadata = {"pair": pair, "tf": tf}
informative_df = self.get_pair_data_for_features( informative_df = self.get_pair_data_for_features(
pair, tf, strategy, corr_dataframes, base_dataframes, is_corr_pairs) pair, tf, strategy, corr_dataframes, base_dataframes, is_corr_pairs
)
informative_copy = informative_df.copy() informative_copy = informative_df.copy()
logger.debug(f"Populating features for {pair} {tf}") logger.debug(f"Populating features for {pair} {tf}")
for t in self.freqai_config["feature_parameters"]["indicator_periods_candles"]: for t in self.freqai_config["feature_parameters"]["indicator_periods_candles"]:
df_features = strategy.feature_engineering_expand_all( df_features = strategy.feature_engineering_expand_all(
informative_copy.copy(), t, metadata=metadata) informative_copy.copy(), t, metadata=metadata
)
suffix = f"{t}" suffix = f"{t}"
informative_df = self.merge_features(informative_df, df_features, tf, tf, suffix) informative_df = self.merge_features(informative_df, df_features, tf, tf, suffix)
generic_df = strategy.feature_engineering_expand_basic( generic_df = strategy.feature_engineering_expand_basic(
informative_copy.copy(), metadata=metadata) informative_copy.copy(), metadata=metadata
)
suffix = "gen" suffix = "gen"
informative_df = self.merge_features(informative_df, generic_df, tf, tf, suffix) informative_df = self.merge_features(informative_df, generic_df, tf, tf, suffix)
@ -740,8 +764,9 @@ class FreqaiDataKitchen:
df_shift = df_shift.add_suffix("_shift-" + str(n)) df_shift = df_shift.add_suffix("_shift-" + str(n))
informative_df = pd.concat((informative_df, df_shift), axis=1) informative_df = pd.concat((informative_df, df_shift), axis=1)
dataframe = self.merge_features(dataframe.copy(), informative_df, dataframe = self.merge_features(
self.config["timeframe"], tf, f'{pair}_{tf}') dataframe.copy(), informative_df, self.config["timeframe"], tf, f"{pair}_{tf}"
)
return dataframe return dataframe
@ -771,7 +796,8 @@ class FreqaiDataKitchen:
# check if the user is using the deprecated populate_any_indicators function # check if the user is using the deprecated populate_any_indicators function
new_version = inspect.getsource(strategy.populate_any_indicators) == ( new_version = inspect.getsource(strategy.populate_any_indicators) == (
inspect.getsource(IStrategy.populate_any_indicators)) inspect.getsource(IStrategy.populate_any_indicators)
)
if not new_version: if not new_version:
raise OperationalException( raise OperationalException(
@ -785,8 +811,7 @@ class FreqaiDataKitchen:
) )
tfs: List[str] = self.freqai_config["feature_parameters"].get("include_timeframes") tfs: List[str] = self.freqai_config["feature_parameters"].get("include_timeframes")
pairs: List[str] = self.freqai_config["feature_parameters"].get( pairs: List[str] = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
"include_corr_pairlist", [])
for tf in tfs: for tf in tfs:
if tf not in base_dataframes: if tf not in base_dataframes:
@ -804,9 +829,11 @@ class FreqaiDataKitchen:
dataframe = base_dataframes[self.config["timeframe"]].copy() dataframe = base_dataframes[self.config["timeframe"]].copy()
corr_pairs: List[str] = self.freqai_config["feature_parameters"].get( corr_pairs: List[str] = self.freqai_config["feature_parameters"].get(
"include_corr_pairlist", []) "include_corr_pairlist", []
dataframe = self.populate_features(dataframe.copy(), pair, strategy, )
corr_dataframes, base_dataframes) dataframe = self.populate_features(
dataframe.copy(), pair, strategy, corr_dataframes, base_dataframes
)
metadata = {"pair": pair} metadata = {"pair": pair}
dataframe = strategy.feature_engineering_standard(dataframe.copy(), metadata=metadata) dataframe = strategy.feature_engineering_standard(dataframe.copy(), metadata=metadata)
# ensure corr pairs are always last # ensure corr pairs are always last
@ -814,8 +841,9 @@ class FreqaiDataKitchen:
if pair == corr_pair: if pair == corr_pair:
continue # dont repeat anything from whitelist continue # dont repeat anything from whitelist
if corr_pairs and do_corr_pairs: if corr_pairs and do_corr_pairs:
dataframe = self.populate_features(dataframe.copy(), corr_pair, strategy, dataframe = self.populate_features(
corr_dataframes, base_dataframes, True) dataframe.copy(), corr_pair, strategy, corr_dataframes, base_dataframes, True
)
if self.live: if self.live:
dataframe = strategy.set_freqai_targets(dataframe.copy(), metadata=metadata) dataframe = strategy.set_freqai_targets(dataframe.copy(), metadata=metadata)
@ -823,7 +851,7 @@ class FreqaiDataKitchen:
self.get_unique_classes_from_labels(dataframe) self.get_unique_classes_from_labels(dataframe)
if self.config.get('reduce_df_footprint', False): if self.config.get("reduce_df_footprint", False):
dataframe = reduce_dataframe_footprint(dataframe) dataframe = reduce_dataframe_footprint(dataframe)
return dataframe return dataframe
@ -858,7 +886,6 @@ class FreqaiDataKitchen:
return dataframe[to_keep] return dataframe[to_keep]
def get_unique_classes_from_labels(self, dataframe: DataFrame) -> None: def get_unique_classes_from_labels(self, dataframe: DataFrame) -> None:
# self.find_features(dataframe) # self.find_features(dataframe)
self.find_labels(dataframe) self.find_labels(dataframe)
@ -870,9 +897,7 @@ class FreqaiDataKitchen:
for label in self.unique_classes: for label in self.unique_classes:
self.unique_class_list += list(self.unique_classes[label]) self.unique_class_list += list(self.unique_classes[label])
def save_backtesting_prediction( def save_backtesting_prediction(self, append_df: DataFrame) -> None:
self, append_df: DataFrame
) -> None:
""" """
Save prediction dataframe from backtesting to feather file format Save prediction dataframe from backtesting to feather file format
:param append_df: dataframe for backtesting period :param append_df: dataframe for backtesting period
@ -883,19 +908,14 @@ class FreqaiDataKitchen:
append_df.to_feather(self.backtesting_results_path) append_df.to_feather(self.backtesting_results_path)
def get_backtesting_prediction( def get_backtesting_prediction(self) -> DataFrame:
self
) -> DataFrame:
""" """
Get prediction dataframe from feather file format Get prediction dataframe from feather file format
""" """
append_df = pd.read_feather(self.backtesting_results_path) append_df = pd.read_feather(self.backtesting_results_path)
return append_df return append_df
def check_if_backtest_prediction_is_valid( def check_if_backtest_prediction_is_valid(self, len_backtest_df: int) -> bool:
self,
len_backtest_df: int
) -> bool:
""" """
Check if a backtesting prediction already exists and if the predictions Check if a backtesting prediction already exists and if the predictions
to append have the same size as the backtesting dataframe slice to append have the same size as the backtesting dataframe slice
@ -903,27 +923,29 @@ class FreqaiDataKitchen:
:return: :return:
:boolean: whether the prediction file is valid. :boolean: whether the prediction file is valid.
""" """
path_to_predictionfile = Path(self.full_path / path_to_predictionfile = Path(
self.backtest_predictions_folder / self.full_path
f"{self.model_filename}_prediction.feather") / self.backtest_predictions_folder
/ f"{self.model_filename}_prediction.feather"
)
self.backtesting_results_path = path_to_predictionfile self.backtesting_results_path = path_to_predictionfile
file_exists = path_to_predictionfile.is_file() file_exists = path_to_predictionfile.is_file()
if file_exists: if file_exists:
append_df = self.get_backtesting_prediction() append_df = self.get_backtesting_prediction()
if len(append_df) == len_backtest_df and 'date' in append_df: if len(append_df) == len_backtest_df and "date" in append_df:
logger.info(f"Found backtesting prediction file at {path_to_predictionfile}") logger.info(f"Found backtesting prediction file at {path_to_predictionfile}")
return True return True
else: else:
logger.info("A new backtesting prediction file is required. " logger.info(
"A new backtesting prediction file is required. "
"(Number of predictions is different from dataframe length or " "(Number of predictions is different from dataframe length or "
"old prediction file version).") "old prediction file version)."
)
return False return False
else: else:
logger.info( logger.info(f"Could not find backtesting prediction file at {path_to_predictionfile}")
f"Could not find backtesting prediction file at {path_to_predictionfile}"
)
return False return False
def get_full_models_path(self, config: Config) -> Path: def get_full_models_path(self, config: Config) -> Path:
@ -932,9 +954,7 @@ class FreqaiDataKitchen:
:param config: Configuration dictionary :param config: Configuration dictionary
""" """
freqai_config: Dict[str, Any] = config["freqai"] freqai_config: Dict[str, Any] = config["freqai"]
return Path( return Path(config["user_data_dir"] / "models" / str(freqai_config.get("identifier")))
config["user_data_dir"] / "models" / str(freqai_config.get("identifier"))
)
def remove_special_chars_from_feature_names(self, dataframe: pd.DataFrame) -> pd.DataFrame: def remove_special_chars_from_feature_names(self, dataframe: pd.DataFrame) -> pd.DataFrame:
""" """
@ -943,7 +963,7 @@ class FreqaiDataKitchen:
:return: dataframe with cleaned featrue names :return: dataframe with cleaned featrue names
""" """
spec_chars = [':'] spec_chars = [":"]
for c in spec_chars: for c in spec_chars:
dataframe.columns = dataframe.columns.str.replace(c, "") dataframe.columns = dataframe.columns.str.replace(c, "")
@ -976,12 +996,14 @@ class FreqaiDataKitchen:
""" """
Deprecation warning, migration assistance Deprecation warning, migration assistance
""" """
logger.warning(f"Your custom IFreqaiModel relies on the deprecated" logger.warning(
f"Your custom IFreqaiModel relies on the deprecated"
" data pipeline. Please update your model to use the new data pipeline." " data pipeline. Please update your model to use the new data pipeline."
" This can be achieved by following the migration guide at " " This can be achieved by following the migration guide at "
f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline " f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline "
"We added a basic pipeline for you, but this will be removed " "We added a basic pipeline for you, but this will be removed "
"in a future version.") "in a future version."
)
return data_dictionary return data_dictionary
@ -989,12 +1011,14 @@ class FreqaiDataKitchen:
""" """
Deprecation warning, migration assistance Deprecation warning, migration assistance
""" """
logger.warning(f"Your custom IFreqaiModel relies on the deprecated" logger.warning(
f"Your custom IFreqaiModel relies on the deprecated"
" data pipeline. Please update your model to use the new data pipeline." " data pipeline. Please update your model to use the new data pipeline."
" This can be achieved by following the migration guide at " " This can be achieved by following the migration guide at "
f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline " f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline "
"We added a basic pipeline for you, but this will be removed " "We added a basic pipeline for you, but this will be removed "
"in a future version.") "in a future version."
)
pred_df, _, _ = self.label_pipeline.inverse_transform(df) pred_df, _, _ = self.label_pipeline.inverse_transform(df)

View File

@ -57,21 +57,22 @@ class IFreqaiModel(ABC):
""" """
def __init__(self, config: Config) -> None: def __init__(self, config: Config) -> None:
self.config = config self.config = config
self.assert_config(self.config) self.assert_config(self.config)
self.freqai_info: Dict[str, Any] = config["freqai"] self.freqai_info: Dict[str, Any] = config["freqai"]
self.data_split_parameters: Dict[str, Any] = config.get("freqai", {}).get( self.data_split_parameters: Dict[str, Any] = config.get("freqai", {}).get(
"data_split_parameters", {}) "data_split_parameters", {}
)
self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get( self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get(
"model_training_parameters", {}) "model_training_parameters", {}
)
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided") self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
self.retrain = False self.retrain = False
self.first = True self.first = True
self.set_full_path() self.set_full_path()
self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True) self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True)
if self.save_backtest_models: if self.save_backtest_models:
logger.info('Backtesting module configured to save all models.') logger.info("Backtesting module configured to save all models.")
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config) self.dd = FreqaiDataDrawer(Path(self.full_path), self.config)
# set current candle to arbitrary historical date # set current candle to arbitrary historical date
@ -85,7 +86,7 @@ class IFreqaiModel(ABC):
self.ft_params["DI_threshold"] = 0 self.ft_params["DI_threshold"] = 0
logger.warning("DI threshold is not configured for Keras models yet. Deactivating.") logger.warning("DI threshold is not configured for Keras models yet. Deactivating.")
self.CONV_WIDTH = self.freqai_info.get('conv_width', 1) self.CONV_WIDTH = self.freqai_info.get("conv_width", 1)
self.class_names: List[str] = [] # used in classification subclasses self.class_names: List[str] = [] # used in classification subclasses
self.pair_it = 0 self.pair_it = 0
self.pair_it_train = 0 self.pair_it_train = 0
@ -95,8 +96,8 @@ class IFreqaiModel(ABC):
self.train_time: float = 0 self.train_time: float = 0
self.begin_time: float = 0 self.begin_time: float = 0
self.begin_time_train: float = 0 self.begin_time_train: float = 0
self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe']) self.base_tf_seconds = timeframe_to_seconds(self.config["timeframe"])
self.continual_learning = self.freqai_info.get('continual_learning', False) self.continual_learning = self.freqai_info.get("continual_learning", False)
self.plot_features = self.ft_params.get("plot_feature_importances", 0) self.plot_features = self.ft_params.get("plot_feature_importances", 0)
self.corr_dataframes: Dict[str, DataFrame] = {} self.corr_dataframes: Dict[str, DataFrame] = {}
# get_corr_dataframes is controlling the caching of corr_dataframes # get_corr_dataframes is controlling the caching of corr_dataframes
@ -109,10 +110,10 @@ class IFreqaiModel(ABC):
self.max_system_threads = max(int(psutil.cpu_count() * 2 - 2), 1) self.max_system_threads = max(int(psutil.cpu_count() * 2 - 2), 1)
self.can_short = True # overridden in start() with strategy.can_short self.can_short = True # overridden in start() with strategy.can_short
self.model: Any = None self.model: Any = None
if self.ft_params.get('principal_component_analysis', False) and self.continual_learning: if self.ft_params.get("principal_component_analysis", False) and self.continual_learning:
self.ft_params.update({'principal_component_analysis': False}) self.ft_params.update({"principal_component_analysis": False})
logger.warning('User tried to use PCA with continual learning. Deactivating PCA.') logger.warning("User tried to use PCA with continual learning. Deactivating PCA.")
self.activate_tensorboard: bool = self.freqai_info.get('activate_tensorboard', True) self.activate_tensorboard: bool = self.freqai_info.get("activate_tensorboard", True)
record_params(config, self.full_path) record_params(config, self.full_path)
@ -120,10 +121,9 @@ class IFreqaiModel(ABC):
""" """
Return an empty state to be pickled in hyperopt Return an empty state to be pickled in hyperopt
""" """
return ({}) return {}
def assert_config(self, config: Config) -> None: def assert_config(self, config: Config) -> None:
if not config.get("freqai", {}): if not config.get("freqai", {}):
raise OperationalException("No freqai parameters found in configuration file.") raise OperationalException("No freqai parameters found in configuration file.")
@ -144,7 +144,7 @@ class IFreqaiModel(ABC):
self.can_short = strategy.can_short self.can_short = strategy.can_short
if self.live: if self.live:
self.inference_timer('start') self.inference_timer("start")
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"]) self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
dk = self.start_live(dataframe, metadata, strategy, self.dk) dk = self.start_live(dataframe, metadata, strategy, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe) dataframe = dk.remove_features_from_df(dk.return_dataframe)
@ -162,13 +162,12 @@ class IFreqaiModel(ABC):
dataframe = dk.remove_features_from_df(dk.return_dataframe) dataframe = dk.remove_features_from_df(dk.return_dataframe)
else: else:
logger.info("Backtesting using historic predictions (live models)") logger.info("Backtesting using historic predictions (live models)")
dk = self.start_backtesting_from_historic_predictions( dk = self.start_backtesting_from_historic_predictions(dataframe, metadata, self.dk)
dataframe, metadata, self.dk)
dataframe = dk.return_dataframe dataframe = dk.return_dataframe
self.clean_up() self.clean_up()
if self.live: if self.live:
self.inference_timer('stop', metadata["pair"]) self.inference_timer("stop", metadata["pair"])
return dataframe return dataframe
@ -225,7 +224,7 @@ class IFreqaiModel(ABC):
# ensure pair is available in dp # ensure pair is available in dp
if pair not in strategy.dp.current_whitelist(): if pair not in strategy.dp.current_whitelist():
self.train_queue.popleft() self.train_queue.popleft()
logger.warning(f'{pair} not in current whitelist, removing from train queue.') logger.warning(f"{pair} not in current whitelist, removing from train queue.")
continue continue
(_, trained_timestamp) = self.dd.get_pair_dict_info(pair) (_, trained_timestamp) = self.dd.get_pair_dict_info(pair)
@ -238,23 +237,25 @@ class IFreqaiModel(ABC):
) = dk.check_if_new_training_required(trained_timestamp) ) = dk.check_if_new_training_required(trained_timestamp)
if retrain: if retrain:
self.train_timer('start') self.train_timer("start")
dk.set_paths(pair, new_trained_timerange.stopts) dk.set_paths(pair, new_trained_timerange.stopts)
try: try:
self.extract_data_and_train_model( self.extract_data_and_train_model(
new_trained_timerange, pair, strategy, dk, data_load_timerange new_trained_timerange, pair, strategy, dk, data_load_timerange
) )
except Exception as msg: except Exception as msg:
logger.exception(f"Training {pair} raised exception {msg.__class__.__name__}. " logger.exception(
f"Message: {msg}, skipping.") f"Training {pair} raised exception {msg.__class__.__name__}. "
f"Message: {msg}, skipping."
)
self.train_timer('stop', pair) self.train_timer("stop", pair)
# only rotate the queue after the first has been trained. # only rotate the queue after the first has been trained.
self.train_queue.rotate(-1) self.train_queue.rotate(-1)
self.dd.save_historic_predictions_to_disk() self.dd.save_historic_predictions_to_disk()
if self.freqai_info.get('write_metrics_to_disk', False): if self.freqai_info.get("write_metrics_to_disk", False):
self.dd.save_metric_tracker_to_disk() self.dd.save_metric_tracker_to_disk()
def start_backtesting( def start_backtesting(
@ -290,8 +291,13 @@ class IFreqaiModel(ABC):
train_it += 1 train_it += 1
total_trains = len(dk.backtesting_timeranges) total_trains = len(dk.backtesting_timeranges)
self.training_timerange = tr_train self.training_timerange = tr_train
len_backtest_df = len(dataframe.loc[(dataframe["date"] >= tr_backtest.startdt) & ( len_backtest_df = len(
dataframe["date"] < tr_backtest.stopdt), :]) dataframe.loc[
(dataframe["date"] >= tr_backtest.startdt)
& (dataframe["date"] < tr_backtest.stopdt),
:,
]
)
if not self.ensure_data_exists(len_backtest_df, tr_backtest, pair): if not self.ensure_data_exists(len_backtest_df, tr_backtest, pair):
continue continue
@ -327,10 +333,12 @@ class IFreqaiModel(ABC):
dataframe_base_train = dataframe.loc[dataframe["date"] < tr_train.stopdt, :] dataframe_base_train = dataframe.loc[dataframe["date"] < tr_train.stopdt, :]
dataframe_base_train = strategy.set_freqai_targets( dataframe_base_train = strategy.set_freqai_targets(
dataframe_base_train, metadata=metadata) dataframe_base_train, metadata=metadata
)
dataframe_base_backtest = dataframe.loc[dataframe["date"] < tr_backtest.stopdt, :] dataframe_base_backtest = dataframe.loc[dataframe["date"] < tr_backtest.stopdt, :]
dataframe_base_backtest = strategy.set_freqai_targets( dataframe_base_backtest = strategy.set_freqai_targets(
dataframe_base_backtest, metadata=metadata) dataframe_base_backtest, metadata=metadata
)
tr_train = dk.buffer_timerange(tr_train) tr_train = dk.buffer_timerange(tr_train)
@ -346,25 +354,27 @@ class IFreqaiModel(ABC):
dk.find_labels(dataframe_train) dk.find_labels(dataframe_train)
try: try:
self.tb_logger = get_tb_logger(self.dd.model_type, dk.data_path, self.tb_logger = get_tb_logger(
self.activate_tensorboard) self.dd.model_type, dk.data_path, self.activate_tensorboard
)
self.model = self.train(dataframe_train, pair, dk) self.model = self.train(dataframe_train, pair, dk)
self.tb_logger.close() self.tb_logger.close()
except Exception as msg: except Exception as msg:
logger.warning( logger.warning(
f"Training {pair} raised exception {msg.__class__.__name__}. " f"Training {pair} raised exception {msg.__class__.__name__}. "
f"Message: {msg}, skipping.", exc_info=True) f"Message: {msg}, skipping.",
exc_info=True,
)
self.model = None self.model = None
self.dd.pair_dict[pair]["trained_timestamp"] = int( self.dd.pair_dict[pair]["trained_timestamp"] = int(tr_train.stopts)
tr_train.stopts)
if self.plot_features and self.model is not None: if self.plot_features and self.model is not None:
plot_feature_importance(self.model, pair, dk, self.plot_features) plot_feature_importance(self.model, pair, dk, self.plot_features)
if self.save_backtest_models and self.model is not None: if self.save_backtest_models and self.model is not None:
logger.info('Saving backtest model to disk.') logger.info("Saving backtest model to disk.")
self.dd.save_data(self.model, pair, dk) self.dd.save_data(self.model, pair, dk)
else: else:
logger.info('Saving metadata to disk.') logger.info("Saving metadata to disk.")
self.dd.save_metadata(dk) self.dd.save_metadata(dk)
else: else:
self.model = self.dd.load_data(pair, dk) self.model = self.dd.load_data(pair, dk)
@ -394,9 +404,11 @@ class IFreqaiModel(ABC):
""" """
if not strategy.process_only_new_candles: if not strategy.process_only_new_candles:
raise OperationalException("You are trying to use a FreqAI strategy with " raise OperationalException(
"You are trying to use a FreqAI strategy with "
"process_only_new_candles = False. This is not supported " "process_only_new_candles = False. This is not supported "
"by FreqAI, and it is therefore aborting.") "by FreqAI, and it is therefore aborting."
)
# get the model metadata associated with the current pair # get the model metadata associated with the current pair
(_, trained_timestamp) = self.dd.get_pair_dict_info(metadata["pair"]) (_, trained_timestamp) = self.dd.get_pair_dict_info(metadata["pair"])
@ -424,8 +436,10 @@ class IFreqaiModel(ABC):
self.model = self.dd.load_data(metadata["pair"], dk) self.model = self.dd.load_data(metadata["pair"], dk)
dataframe = dk.use_strategy_to_populate_indicators( dataframe = dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"], strategy,
do_corr_pairs=self.get_corr_dataframes prediction_dataframe=dataframe,
pair=metadata["pair"],
do_corr_pairs=self.get_corr_dataframes,
) )
if not self.model: if not self.model:
@ -447,7 +461,6 @@ class IFreqaiModel(ABC):
def build_strategy_return_arrays( def build_strategy_return_arrays(
self, dataframe: DataFrame, dk: FreqaiDataKitchen, pair: str, trained_timestamp: int self, dataframe: DataFrame, dk: FreqaiDataKitchen, pair: str, trained_timestamp: int
) -> None: ) -> None:
# hold the historical predictions in memory so we are sending back # hold the historical predictions in memory so we are sending back
# correct array to strategy # correct array to strategy
@ -475,16 +488,14 @@ class IFreqaiModel(ABC):
# historical accuracy reasons. # historical accuracy reasons.
pred_df, do_preds = self.predict(dataframe.iloc[-self.CONV_WIDTH :], dk, first=False) pred_df, do_preds = self.predict(dataframe.iloc[-self.CONV_WIDTH :], dk, first=False)
if self.freqai_info.get('fit_live_predictions_candles', 0) and self.live: if self.freqai_info.get("fit_live_predictions_candles", 0) and self.live:
self.fit_live_predictions(dk, pair) self.fit_live_predictions(dk, pair)
self.dd.append_model_predictions(pair, pred_df, do_preds, dk, dataframe) self.dd.append_model_predictions(pair, pred_df, do_preds, dk, dataframe)
dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe) dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe)
return return
def check_if_feature_list_matches_strategy( def check_if_feature_list_matches_strategy(self, dk: FreqaiDataKitchen) -> None:
self, dk: FreqaiDataKitchen
) -> None:
""" """
Ensure user is passing the proper feature set if they are reusing an `identifier` pointing Ensure user is passing the proper feature set if they are reusing an `identifier` pointing
to a folder holding existing models. to a folder holding existing models.
@ -496,7 +507,7 @@ class IFreqaiModel(ABC):
if "training_features_list_raw" in dk.data: if "training_features_list_raw" in dk.data:
feature_list = dk.data["training_features_list_raw"] feature_list = dk.data["training_features_list_raw"]
else: else:
feature_list = dk.data['training_features_list'] feature_list = dk.data["training_features_list"]
if dk.training_features_list != feature_list: if dk.training_features_list != feature_list:
raise OperationalException( raise OperationalException(
@ -512,38 +523,35 @@ class IFreqaiModel(ABC):
def define_data_pipeline(self, threads=-1) -> Pipeline: def define_data_pipeline(self, threads=-1) -> Pipeline:
ft_params = self.freqai_info["feature_parameters"] ft_params = self.freqai_info["feature_parameters"]
pipe_steps = [ pipe_steps = [
('const', ds.VarianceThreshold(threshold=0)), ("const", ds.VarianceThreshold(threshold=0)),
('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))) ("scaler", SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))),
] ]
if ft_params.get("principal_component_analysis", False): if ft_params.get("principal_component_analysis", False):
pipe_steps.append(('pca', ds.PCA(n_components=0.999))) pipe_steps.append(("pca", ds.PCA(n_components=0.999)))
pipe_steps.append(('post-pca-scaler', pipe_steps.append(
SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))) ("post-pca-scaler", SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
)
if ft_params.get("use_SVM_to_remove_outliers", False): if ft_params.get("use_SVM_to_remove_outliers", False):
svm_params = ft_params.get( svm_params = ft_params.get("svm_params", {"shuffle": False, "nu": 0.01})
"svm_params", {"shuffle": False, "nu": 0.01}) pipe_steps.append(("svm", ds.SVMOutlierExtractor(**svm_params)))
pipe_steps.append(('svm', ds.SVMOutlierExtractor(**svm_params)))
di = ft_params.get("DI_threshold", 0) di = ft_params.get("DI_threshold", 0)
if di: if di:
pipe_steps.append(('di', ds.DissimilarityIndex(di_threshold=di, n_jobs=threads))) pipe_steps.append(("di", ds.DissimilarityIndex(di_threshold=di, n_jobs=threads)))
if ft_params.get("use_DBSCAN_to_remove_outliers", False): if ft_params.get("use_DBSCAN_to_remove_outliers", False):
pipe_steps.append(('dbscan', ds.DBSCAN(n_jobs=threads))) pipe_steps.append(("dbscan", ds.DBSCAN(n_jobs=threads)))
sigma = self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0) sigma = self.freqai_info["feature_parameters"].get("noise_standard_deviation", 0)
if sigma: if sigma:
pipe_steps.append(('noise', ds.Noise(sigma=sigma))) pipe_steps.append(("noise", ds.Noise(sigma=sigma)))
return Pipeline(pipe_steps) return Pipeline(pipe_steps)
def define_label_pipeline(self, threads=-1) -> Pipeline: def define_label_pipeline(self, threads=-1) -> Pipeline:
label_pipeline = Pipeline([("scaler", SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))])
label_pipeline = Pipeline([
('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
])
return label_pipeline return label_pipeline
@ -555,7 +563,7 @@ class IFreqaiModel(ABC):
:return: :return:
:boolean: whether the model file exists or not. :boolean: whether the model file exists or not.
""" """
if self.dd.model_type == 'joblib': if self.dd.model_type == "joblib":
file_type = ".joblib" file_type = ".joblib"
elif self.dd.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]: elif self.dd.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]:
file_type = ".zip" file_type = ".zip"
@ -572,9 +580,7 @@ class IFreqaiModel(ABC):
""" """
Creates and sets the full path for the identifier Creates and sets the full path for the identifier
""" """
self.full_path = Path( self.full_path = Path(self.config["user_data_dir"] / "models" / f"{self.identifier}")
self.config["user_data_dir"] / "models" / f"{self.identifier}"
)
self.full_path.mkdir(parents=True, exist_ok=True) self.full_path.mkdir(parents=True, exist_ok=True)
def extract_data_and_train_model( def extract_data_and_train_model(
@ -615,8 +621,7 @@ class IFreqaiModel(ABC):
dk.find_features(unfiltered_dataframe) dk.find_features(unfiltered_dataframe)
dk.find_labels(unfiltered_dataframe) dk.find_labels(unfiltered_dataframe)
self.tb_logger = get_tb_logger(self.dd.model_type, dk.data_path, self.tb_logger = get_tb_logger(self.dd.model_type, dk.data_path, self.activate_tensorboard)
self.activate_tensorboard)
model = self.train(unfiltered_dataframe, pair, dk) model = self.train(unfiltered_dataframe, pair, dk)
self.tb_logger.close() self.tb_logger.close()
@ -664,21 +669,21 @@ class IFreqaiModel(ABC):
for label in hist_preds_df.columns: for label in hist_preds_df.columns:
if hist_preds_df[label].dtype == object: if hist_preds_df[label].dtype == object:
continue continue
hist_preds_df[f'{label}_mean'] = 0 hist_preds_df[f"{label}_mean"] = 0
hist_preds_df[f'{label}_std'] = 0 hist_preds_df[f"{label}_std"] = 0
hist_preds_df['do_predict'] = 0 hist_preds_df["do_predict"] = 0
if self.freqai_info['feature_parameters'].get('DI_threshold', 0) > 0: if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
hist_preds_df['DI_values'] = 0 hist_preds_df["DI_values"] = 0
for return_str in dk.data['extra_returns_per_train']: for return_str in dk.data["extra_returns_per_train"]:
hist_preds_df[return_str] = dk.data['extra_returns_per_train'][return_str] hist_preds_df[return_str] = dk.data["extra_returns_per_train"][return_str]
hist_preds_df['high_price'] = strat_df['high'] hist_preds_df["high_price"] = strat_df["high"]
hist_preds_df['low_price'] = strat_df['low'] hist_preds_df["low_price"] = strat_df["low"]
hist_preds_df['close_price'] = strat_df['close'] hist_preds_df["close_price"] = strat_df["close"]
hist_preds_df['date_pred'] = strat_df['date'] hist_preds_df["date_pred"] = strat_df["date"]
def fit_live_predictions(self, dk: FreqaiDataKitchen, pair: str) -> None: def fit_live_predictions(self, dk: FreqaiDataKitchen, pair: str) -> None:
""" """
@ -694,52 +699,51 @@ class IFreqaiModel(ABC):
for label in full_labels: for label in full_labels:
if self.dd.historic_predictions[dk.pair][label].dtype == object: if self.dd.historic_predictions[dk.pair][label].dtype == object:
continue continue
f = spy.stats.norm.fit( f = spy.stats.norm.fit(self.dd.historic_predictions[dk.pair][label].tail(num_candles))
self.dd.historic_predictions[dk.pair][label].tail(num_candles))
dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1] dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1]
return return
def inference_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''): def inference_timer(self, do: Literal["start", "stop"] = "start", pair: str = ""):
""" """
Timer designed to track the cumulative time spent in FreqAI for one pass through Timer designed to track the cumulative time spent in FreqAI for one pass through
the whitelist. This will check if the time spent is more than 1/4 the time the whitelist. This will check if the time spent is more than 1/4 the time
of a single candle, and if so, it will warn the user of degraded performance of a single candle, and if so, it will warn the user of degraded performance
""" """
if do == 'start': if do == "start":
self.pair_it += 1 self.pair_it += 1
self.begin_time = time.time() self.begin_time = time.time()
elif do == 'stop': elif do == "stop":
end = time.time() end = time.time()
time_spent = (end - self.begin_time) time_spent = end - self.begin_time
if self.freqai_info.get('write_metrics_to_disk', False): if self.freqai_info.get("write_metrics_to_disk", False):
self.dd.update_metric_tracker('inference_time', time_spent, pair) self.dd.update_metric_tracker("inference_time", time_spent, pair)
self.inference_time += time_spent self.inference_time += time_spent
if self.pair_it == self.total_pairs: if self.pair_it == self.total_pairs:
logger.info( logger.info(
f'Total time spent inferencing pairlist {self.inference_time:.2f} seconds') f"Total time spent inferencing pairlist {self.inference_time:.2f} seconds"
)
self.pair_it = 0 self.pair_it = 0
self.inference_time = 0 self.inference_time = 0
return return
def train_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''): def train_timer(self, do: Literal["start", "stop"] = "start", pair: str = ""):
""" """
Timer designed to track the cumulative time spent training the full pairlist in Timer designed to track the cumulative time spent training the full pairlist in
FreqAI. FreqAI.
""" """
if do == 'start': if do == "start":
self.pair_it_train += 1 self.pair_it_train += 1
self.begin_time_train = time.time() self.begin_time_train = time.time()
elif do == 'stop': elif do == "stop":
end = time.time() end = time.time()
time_spent = (end - self.begin_time_train) time_spent = end - self.begin_time_train
if self.freqai_info.get('write_metrics_to_disk', False): if self.freqai_info.get("write_metrics_to_disk", False):
self.dd.collect_metrics(time_spent, pair) self.dd.collect_metrics(time_spent, pair)
self.train_time += time_spent self.train_time += time_spent
if self.pair_it_train == self.total_pairs: if self.pair_it_train == self.total_pairs:
logger.info( logger.info(f"Total time spent training pairlist {self.train_time:.2f} seconds")
f'Total time spent training pairlist {self.train_time:.2f} seconds')
self.pair_it_train = 0 self.pair_it_train = 0
self.train_time = 0 self.train_time = 0
return return
@ -759,14 +763,14 @@ class IFreqaiModel(ABC):
""" """
current_pairlist = self.config.get("exchange", {}).get("pair_whitelist") current_pairlist = self.config.get("exchange", {}).get("pair_whitelist")
if not self.dd.pair_dict: if not self.dd.pair_dict:
logger.info('Set fresh train queue from whitelist. ' logger.info("Set fresh train queue from whitelist. " f"Queue: {current_pairlist}")
f'Queue: {current_pairlist}')
return deque(current_pairlist) return deque(current_pairlist)
best_queue = deque() best_queue = deque()
pair_dict_sorted = sorted(self.dd.pair_dict.items(), pair_dict_sorted = sorted(
key=lambda k: k[1]['trained_timestamp']) self.dd.pair_dict.items(), key=lambda k: k[1]["trained_timestamp"]
)
for pair in pair_dict_sorted: for pair in pair_dict_sorted:
if pair[0] in current_pairlist: if pair[0] in current_pairlist:
best_queue.append(pair[0]) best_queue.append(pair[0])
@ -774,8 +778,9 @@ class IFreqaiModel(ABC):
if pair not in best_queue: if pair not in best_queue:
best_queue.appendleft(pair) best_queue.appendleft(pair)
logger.info('Set existing queue from trained timestamps. ' logger.info(
f'Best approximation queue: {best_queue}') "Set existing queue from trained timestamps. " f"Best approximation queue: {best_queue}"
)
return best_queue return best_queue
def cache_corr_pairlist_dfs(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame: def cache_corr_pairlist_dfs(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
@ -790,14 +795,15 @@ class IFreqaiModel(ABC):
if self.get_corr_dataframes: if self.get_corr_dataframes:
self.corr_dataframes = dk.extract_corr_pair_columns_from_populated_indicators(dataframe) self.corr_dataframes = dk.extract_corr_pair_columns_from_populated_indicators(dataframe)
if not self.corr_dataframes: if not self.corr_dataframes:
logger.warning("Couldn't cache corr_pair dataframes for improved performance. " logger.warning(
"Couldn't cache corr_pair dataframes for improved performance. "
"Consider ensuring that the full coin/stake, e.g. XYZ/USD, " "Consider ensuring that the full coin/stake, e.g. XYZ/USD, "
"is included in the column names when you are creating features " "is included in the column names when you are creating features "
"in `feature_engineering_*` functions.") "in `feature_engineering_*` functions."
)
self.get_corr_dataframes = not bool(self.corr_dataframes) self.get_corr_dataframes = not bool(self.corr_dataframes)
elif self.corr_dataframes: elif self.corr_dataframes:
dataframe = dk.attach_corr_pair_columns( dataframe = dk.attach_corr_pair_columns(dataframe, self.corr_dataframes, dk.pair)
dataframe, self.corr_dataframes, dk.pair)
return dataframe return dataframe
@ -813,8 +819,9 @@ class IFreqaiModel(ABC):
self.pair_it = 1 self.pair_it = 1
self.current_candle = self.dd.current_candle self.current_candle = self.dd.current_candle
def ensure_data_exists(self, len_dataframe_backtest: int, def ensure_data_exists(
tr_backtest: TimeRange, pair: str) -> bool: self, len_dataframe_backtest: int, tr_backtest: TimeRange, pair: str
) -> bool:
""" """
Check if the dataframe is empty, if not, report useful information to user. Check if the dataframe is empty, if not, report useful information to user.
:param len_dataframe_backtest: the len of backtesting dataframe :param len_dataframe_backtest: the len of backtesting dataframe
@ -823,14 +830,17 @@ class IFreqaiModel(ABC):
:return: if the data exists or not :return: if the data exists or not
""" """
if self.config.get("freqai_backtest_live_models", False) and len_dataframe_backtest == 0: if self.config.get("freqai_backtest_live_models", False) and len_dataframe_backtest == 0:
logger.info(f"No data found for pair {pair} from " logger.info(
f"No data found for pair {pair} from "
f"from {tr_backtest.start_fmt} to {tr_backtest.stop_fmt}. " f"from {tr_backtest.start_fmt} to {tr_backtest.stop_fmt}. "
"Probably more than one training within the same candle period.") "Probably more than one training within the same candle period."
)
return False return False
return True return True
def log_backtesting_progress(self, tr_train: TimeRange, pair: str, def log_backtesting_progress(
train_it: int, total_trains: int): self, tr_train: TimeRange, pair: str, train_it: int, total_trains: int
):
""" """
Log the backtesting progress so user knows how many pairs have been trained and Log the backtesting progress so user knows how many pairs have been trained and
how many more pairs/trains remain. how many more pairs/trains remain.
@ -857,30 +867,37 @@ class IFreqaiModel(ABC):
fit_live_predictions_candles = self.freqai_info.get("fit_live_predictions_candles", 0) fit_live_predictions_candles = self.freqai_info.get("fit_live_predictions_candles", 0)
if fit_live_predictions_candles: if fit_live_predictions_candles:
logger.info("Applying fit_live_predictions in backtesting") logger.info("Applying fit_live_predictions in backtesting")
label_columns = [col for col in dk.full_df.columns if ( label_columns = [
col.startswith("&") and col
not (col.startswith("&") and col.endswith("_mean")) and for col in dk.full_df.columns
not (col.startswith("&") and col.endswith("_std")) and if (
col not in self.dk.data["extra_returns_per_train"]) col.startswith("&")
and not (col.startswith("&") and col.endswith("_mean"))
and not (col.startswith("&") and col.endswith("_std"))
and col not in self.dk.data["extra_returns_per_train"]
)
] ]
for index in range(len(dk.full_df)): for index in range(len(dk.full_df)):
if index >= fit_live_predictions_candles: if index >= fit_live_predictions_candles:
self.dd.historic_predictions[self.dk.pair] = ( self.dd.historic_predictions[self.dk.pair] = dk.full_df.iloc[
dk.full_df.iloc[index - fit_live_predictions_candles:index]) index - fit_live_predictions_candles : index
]
self.fit_live_predictions(self.dk, self.dk.pair) self.fit_live_predictions(self.dk, self.dk.pair)
for label in label_columns: for label in label_columns:
if dk.full_df[label].dtype == object: if dk.full_df[label].dtype == object:
continue continue
if "labels_mean" in self.dk.data: if "labels_mean" in self.dk.data:
dk.full_df.at[index, f"{label}_mean"] = ( dk.full_df.at[index, f"{label}_mean"] = self.dk.data["labels_mean"][
self.dk.data["labels_mean"][label]) label
]
if "labels_std" in self.dk.data: if "labels_std" in self.dk.data:
dk.full_df.at[index, f"{label}_std"] = self.dk.data["labels_std"][label] dk.full_df.at[index, f"{label}_std"] = self.dk.data["labels_std"][label]
for extra_col in self.dk.data["extra_returns_per_train"]: for extra_col in self.dk.data["extra_returns_per_train"]:
dk.full_df.at[index, f"{extra_col}"] = ( dk.full_df.at[index, f"{extra_col}"] = self.dk.data[
self.dk.data["extra_returns_per_train"][extra_col]) "extra_returns_per_train"
][extra_col]
return return
@ -897,7 +914,8 @@ class IFreqaiModel(ABC):
if key_name not in self.metadata: if key_name not in self.metadata:
metadata = self.metadata metadata = self.metadata
metadata[key_name] = int( metadata[key_name] = int(
pd.to_datetime(live_dataframe.tail(1)["date"].values[0]).timestamp()) pd.to_datetime(live_dataframe.tail(1)["date"].values[0]).timestamp()
)
self.update_metadata(metadata) self.update_metadata(metadata)
def start_backtesting_from_historic_predictions( def start_backtesting_from_historic_predictions(
@ -913,19 +931,20 @@ class IFreqaiModel(ABC):
pair = metadata["pair"] pair = metadata["pair"]
dk.return_dataframe = dataframe dk.return_dataframe = dataframe
saved_dataframe = self.dd.historic_predictions[pair] saved_dataframe = self.dd.historic_predictions[pair]
columns_to_drop = list(set(saved_dataframe.columns).intersection( columns_to_drop = list(
dk.return_dataframe.columns)) set(saved_dataframe.columns).intersection(dk.return_dataframe.columns)
)
dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop)) dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop))
dk.return_dataframe = pd.merge( dk.return_dataframe = pd.merge(
dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred") dk.return_dataframe, saved_dataframe, how="left", left_on="date", right_on="date_pred"
)
return dk return dk
# Following methods which are overridden by user made prediction models. # Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModel.py for an example. # See freqai/prediction_models/CatboostPredictionModel.py for an example.
@abstractmethod @abstractmethod
def train(self, unfiltered_df: DataFrame, pair: str, def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any:
dk: FreqaiDataKitchen, **kwargs) -> Any:
""" """
Filter the training data and train a model to it. Train makes heavy use of the datahandler Filter the training data and train a model to it. Train makes heavy use of the datahandler
for storing, saving, loading, and analyzing the data. for storing, saving, loading, and analyzing the data.
@ -966,23 +985,25 @@ class IFreqaiModel(ABC):
""" """
throw deprecation warning if this function is called throw deprecation warning if this function is called
""" """
logger.warning(f"Your model {self.__class__.__name__} relies on the deprecated" logger.warning(
f"Your model {self.__class__.__name__} relies on the deprecated"
" data pipeline. Please update your model to use the new data pipeline." " data pipeline. Please update your model to use the new data pipeline."
" This can be achieved by following the migration guide at " " This can be achieved by following the migration guide at "
f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline") f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline"
)
dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count) dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
dd = dk.data_dictionary dd = dk.data_dictionary
(dd["train_features"], (dd["train_features"], dd["train_labels"], dd["train_weights"]) = (
dd["train_labels"], dk.feature_pipeline.fit_transform(
dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"], dd["train_features"], dd["train_labels"], dd["train_weights"]
dd["train_labels"], )
dd["train_weights"]) )
(dd["test_features"], (dd["test_features"], dd["test_labels"], dd["test_weights"]) = (
dd["test_labels"], dk.feature_pipeline.transform(
dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"], dd["test_features"], dd["test_labels"], dd["test_weights"]
dd["test_labels"], )
dd["test_weights"]) )
dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count) dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count)
@ -994,13 +1015,16 @@ class IFreqaiModel(ABC):
""" """
throw deprecation warning if this function is called throw deprecation warning if this function is called
""" """
logger.warning(f"Your model {self.__class__.__name__} relies on the deprecated" logger.warning(
f"Your model {self.__class__.__name__} relies on the deprecated"
" data pipeline. Please update your model to use the new data pipeline." " data pipeline. Please update your model to use the new data pipeline."
" This can be achieved by following the migration guide at " " This can be achieved by following the migration guide at "
f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline") f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline"
)
dd = dk.data_dictionary dd = dk.data_dictionary
dd["predict_features"], outliers, _ = dk.feature_pipeline.transform( dd["predict_features"], outliers, _ = dk.feature_pipeline.transform(
dd["predict_features"], outlier_check=True) dd["predict_features"], outlier_check=True
)
if self.freqai_info.get("DI_threshold", 0) > 0: if self.freqai_info.get("DI_threshold", 0) > 0:
dk.DI_values = dk.feature_pipeline["di"].di_values dk.DI_values = dk.feature_pipeline["di"].di_values
else: else:

View File

@ -46,14 +46,19 @@ class CatboostClassifier(BaseClassifierModel):
cbr = CatBoostClassifier( cbr = CatBoostClassifier(
allow_writing_files=True, allow_writing_files=True,
loss_function='MultiClass', loss_function="MultiClass",
train_dir=Path(dk.data_path), train_dir=Path(dk.data_path),
**self.model_training_parameters, **self.model_training_parameters,
) )
init_model = self.get_init_model(dk.pair) init_model = self.get_init_model(dk.pair)
cbr.fit(X=train_data, eval_set=test_data, init_model=init_model, cbr.fit(
log_cout=sys.stdout, log_cerr=sys.stderr) X=train_data,
eval_set=test_data,
init_model=init_model,
log_cout=sys.stdout,
log_cerr=sys.stderr,
)
return cbr return cbr

View File

@ -33,7 +33,7 @@ class CatboostClassifierMultiTarget(BaseClassifierModel):
cbc = CatBoostClassifier( cbc = CatBoostClassifier(
allow_writing_files=True, allow_writing_files=True,
loss_function='MultiClass', loss_function="MultiClass",
train_dir=Path(dk.data_path), train_dir=Path(dk.data_path),
**self.model_training_parameters, **self.model_training_parameters,
) )
@ -45,10 +45,10 @@ class CatboostClassifierMultiTarget(BaseClassifierModel):
eval_sets = [None] * y.shape[1] eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
eval_sets = [None] * data_dictionary['test_labels'].shape[1] eval_sets = [None] * data_dictionary["test_labels"].shape[1]
for i in range(data_dictionary['test_labels'].shape[1]): for i in range(data_dictionary["test_labels"].shape[1]):
eval_sets[i] = Pool( eval_sets[i] = Pool(
data=data_dictionary["test_features"], data=data_dictionary["test_features"],
label=data_dictionary["test_labels"].iloc[:, i], label=data_dictionary["test_labels"].iloc[:, i],
@ -64,13 +64,17 @@ class CatboostClassifierMultiTarget(BaseClassifierModel):
fit_params = [] fit_params = []
for i in range(len(eval_sets)): for i in range(len(eval_sets)):
fit_params.append({ fit_params.append(
'eval_set': eval_sets[i], 'init_model': init_models[i], {
'log_cout': sys.stdout, 'log_cerr': sys.stderr, "eval_set": eval_sets[i],
}) "init_model": init_models[i],
"log_cout": sys.stdout,
"log_cerr": sys.stderr,
}
)
model = FreqaiMultiOutputClassifier(estimator=cbc) model = FreqaiMultiOutputClassifier(estimator=cbc)
thread_training = self.freqai_info.get('multitarget_parallel_training', False) thread_training = self.freqai_info.get("multitarget_parallel_training", False)
if thread_training: if thread_training:
model.n_jobs = y.shape[1] model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)

View File

@ -35,7 +35,7 @@ class CatboostRegressor(BaseRegressionModel):
label=data_dictionary["train_labels"], label=data_dictionary["train_labels"],
weight=data_dictionary["train_weights"], weight=data_dictionary["train_weights"],
) )
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
test_data = None test_data = None
else: else:
test_data = Pool( test_data = Pool(
@ -52,7 +52,12 @@ class CatboostRegressor(BaseRegressionModel):
**self.model_training_parameters, **self.model_training_parameters,
) )
model.fit(X=train_data, eval_set=test_data, init_model=init_model, model.fit(
log_cout=sys.stdout, log_cerr=sys.stderr) X=train_data,
eval_set=test_data,
init_model=init_model,
log_cout=sys.stdout,
log_cerr=sys.stderr,
)
return model return model

View File

@ -44,10 +44,10 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
eval_sets = [None] * y.shape[1] eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
eval_sets = [None] * data_dictionary['test_labels'].shape[1] eval_sets = [None] * data_dictionary["test_labels"].shape[1]
for i in range(data_dictionary['test_labels'].shape[1]): for i in range(data_dictionary["test_labels"].shape[1]):
eval_sets[i] = Pool( eval_sets[i] = Pool(
data=data_dictionary["test_features"], data=data_dictionary["test_features"],
label=data_dictionary["test_labels"].iloc[:, i], label=data_dictionary["test_labels"].iloc[:, i],
@ -63,13 +63,17 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
fit_params = [] fit_params = []
for i in range(len(eval_sets)): for i in range(len(eval_sets)):
fit_params.append({ fit_params.append(
'eval_set': eval_sets[i], 'init_model': init_models[i], {
'log_cout': sys.stdout, 'log_cerr': sys.stderr, "eval_set": eval_sets[i],
}) "init_model": init_models[i],
"log_cout": sys.stdout,
"log_cerr": sys.stderr,
}
)
model = FreqaiMultiOutputRegressor(estimator=cbr) model = FreqaiMultiOutputRegressor(estimator=cbr)
thread_training = self.freqai_info.get('multitarget_parallel_training', False) thread_training = self.freqai_info.get("multitarget_parallel_training", False)
if thread_training: if thread_training:
model.n_jobs = y.shape[1] model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)

View File

@ -28,12 +28,16 @@ class LightGBMClassifier(BaseClassifierModel):
:param dk: The datakitchen object for the current coin/model :param dk: The datakitchen object for the current coin/model
""" """
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None eval_set = None
test_weights = None test_weights = None
else: else:
eval_set = [(data_dictionary["test_features"].to_numpy(), eval_set = [
data_dictionary["test_labels"].to_numpy()[:, 0])] (
data_dictionary["test_features"].to_numpy(),
data_dictionary["test_labels"].to_numpy()[:, 0],
)
]
test_weights = data_dictionary["test_weights"] test_weights = data_dictionary["test_weights"]
X = data_dictionary["train_features"].to_numpy() X = data_dictionary["train_features"].to_numpy()
y = data_dictionary["train_labels"].to_numpy()[:, 0] y = data_dictionary["train_labels"].to_numpy()[:, 0]
@ -42,7 +46,13 @@ class LightGBMClassifier(BaseClassifierModel):
init_model = self.get_init_model(dk.pair) init_model = self.get_init_model(dk.pair)
model = LGBMClassifier(**self.model_training_parameters) model = LGBMClassifier(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, model.fit(
eval_sample_weight=[test_weights], init_model=init_model) X=X,
y=y,
eval_set=eval_set,
sample_weight=train_weights,
eval_sample_weight=[test_weights],
init_model=init_model,
)
return model return model

View File

@ -38,13 +38,13 @@ class LightGBMClassifierMultiTarget(BaseClassifierModel):
eval_weights = None eval_weights = None
eval_sets = [None] * y.shape[1] eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
eval_weights = [data_dictionary["test_weights"]] eval_weights = [data_dictionary["test_weights"]]
eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1] # type: ignore eval_sets = [(None, None)] * data_dictionary["test_labels"].shape[1] # type: ignore
for i in range(data_dictionary['test_labels'].shape[1]): for i in range(data_dictionary["test_labels"].shape[1]):
eval_sets[i] = ( # type: ignore eval_sets[i] = ( # type: ignore
data_dictionary["test_features"], data_dictionary["test_features"],
data_dictionary["test_labels"].iloc[:, i] data_dictionary["test_labels"].iloc[:, i],
) )
init_model = self.get_init_model(dk.pair) init_model = self.get_init_model(dk.pair)
@ -56,11 +56,15 @@ class LightGBMClassifierMultiTarget(BaseClassifierModel):
fit_params = [] fit_params = []
for i in range(len(eval_sets)): for i in range(len(eval_sets)):
fit_params.append( fit_params.append(
{'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights, {
'init_model': init_models[i]}) "eval_set": eval_sets[i],
"eval_sample_weight": eval_weights,
"init_model": init_models[i],
}
)
model = FreqaiMultiOutputClassifier(estimator=lgb) model = FreqaiMultiOutputClassifier(estimator=lgb)
thread_training = self.freqai_info.get('multitarget_parallel_training', False) thread_training = self.freqai_info.get("multitarget_parallel_training", False)
if thread_training: if thread_training:
model.n_jobs = y.shape[1] model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)

View File

@ -28,7 +28,7 @@ class LightGBMRegressor(BaseRegressionModel):
:param dk: The datakitchen object for the current coin/model :param dk: The datakitchen object for the current coin/model
""" """
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None eval_set = None
eval_weights = None eval_weights = None
else: else:
@ -42,7 +42,13 @@ class LightGBMRegressor(BaseRegressionModel):
model = LGBMRegressor(**self.model_training_parameters) model = LGBMRegressor(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, model.fit(
eval_sample_weight=[eval_weights], init_model=init_model) X=X,
y=y,
eval_set=eval_set,
sample_weight=train_weights,
eval_sample_weight=[eval_weights],
init_model=init_model,
)
return model return model

View File

@ -38,14 +38,16 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel):
eval_weights = None eval_weights = None
eval_sets = [None] * y.shape[1] eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
eval_weights = [data_dictionary["test_weights"]] eval_weights = [data_dictionary["test_weights"]]
eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1] # type: ignore eval_sets = [(None, None)] * data_dictionary["test_labels"].shape[1] # type: ignore
for i in range(data_dictionary['test_labels'].shape[1]): for i in range(data_dictionary["test_labels"].shape[1]):
eval_sets[i] = [( # type: ignore eval_sets[i] = [
( # type: ignore
data_dictionary["test_features"], data_dictionary["test_features"],
data_dictionary["test_labels"].iloc[:, i] data_dictionary["test_labels"].iloc[:, i],
)] )
]
init_model = self.get_init_model(dk.pair) init_model = self.get_init_model(dk.pair)
if init_model: if init_model:
@ -56,11 +58,15 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel):
fit_params = [] fit_params = []
for i in range(len(eval_sets)): for i in range(len(eval_sets)):
fit_params.append( fit_params.append(
{'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights, {
'init_model': init_models[i]}) "eval_set": eval_sets[i],
"eval_sample_weight": eval_weights,
"init_model": init_models[i],
}
)
model = FreqaiMultiOutputRegressor(estimator=lgb) model = FreqaiMultiOutputRegressor(estimator=lgb)
thread_training = self.freqai_info.get('multitarget_parallel_training', False) thread_training = self.freqai_info.get("multitarget_parallel_training", False)
if thread_training: if thread_training:
model.n_jobs = y.shape[1] model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)

View File

@ -45,8 +45,7 @@ class PyTorchMLPClassifier(BasePyTorchClassifier):
@property @property
def data_convertor(self) -> PyTorchDataConvertor: def data_convertor(self) -> PyTorchDataConvertor:
return DefaultPyTorchDataConvertor( return DefaultPyTorchDataConvertor(
target_tensor_type=torch.long, target_tensor_type=torch.long, squeeze_target_tensor=True
squeeze_target_tensor=True
) )
def __init__(self, **kwargs) -> None: def __init__(self, **kwargs) -> None:
@ -69,9 +68,7 @@ class PyTorchMLPClassifier(BasePyTorchClassifier):
self.convert_label_column_to_int(data_dictionary, dk, class_names) self.convert_label_column_to_int(data_dictionary, dk, class_names)
n_features = data_dictionary["train_features"].shape[-1] n_features = data_dictionary["train_features"].shape[-1]
model = PyTorchMLPModel( model = PyTorchMLPModel(
input_dim=n_features, input_dim=n_features, output_dim=len(class_names), **self.model_kwargs
output_dim=len(class_names),
**self.model_kwargs
) )
model.to(self.device) model.to(self.device)
optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate) optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)

View File

@ -63,11 +63,7 @@ class PyTorchMLPRegressor(BasePyTorchRegressor):
""" """
n_features = data_dictionary["train_features"].shape[-1] n_features = data_dictionary["train_features"].shape[-1]
model = PyTorchMLPModel( model = PyTorchMLPModel(input_dim=n_features, output_dim=1, **self.model_kwargs)
input_dim=n_features,
output_dim=1,
**self.model_kwargs
)
model.to(self.device) model.to(self.device)
optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate) optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
criterion = torch.nn.MSELoss() criterion = torch.nn.MSELoss()

View File

@ -77,7 +77,7 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor):
input_dim=n_features, input_dim=n_features,
output_dim=n_labels, output_dim=n_labels,
time_window=self.window_size, time_window=self.window_size,
**self.model_kwargs **self.model_kwargs,
) )
model.to(self.device) model.to(self.device)
optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate) optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
@ -116,11 +116,11 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor):
) )
dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform( dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
dk.data_dictionary["prediction_features"], outlier_check=True) dk.data_dictionary["prediction_features"], outlier_check=True
)
x = self.data_convertor.convert_x( x = self.data_convertor.convert_x(
dk.data_dictionary["prediction_features"], dk.data_dictionary["prediction_features"], device=self.device
device=self.device
) )
# if user is asking for multiple predictions, slide the window # if user is asking for multiple predictions, slide the window
# along the tensor # along the tensor
@ -148,7 +148,8 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor):
dk.do_predict = outliers dk.do_predict = outliers
if x.shape[1] > 1: if x.shape[1] > 1:
zeros_df = pd.DataFrame(np.zeros((x.shape[1] - len(pred_df), len(pred_df.columns))), zeros_df = pd.DataFrame(
columns=pred_df.columns) np.zeros((x.shape[1] - len(pred_df), len(pred_df.columns))), columns=pred_df.columns
)
pred_df = pd.concat([zeros_df, pred_df], axis=0, ignore_index=True) pred_df = pd.concat([zeros_df, pred_df], axis=0, ignore_index=True)
return (pred_df, dk.do_predict) return (pred_df, dk.do_predict)

View File

@ -56,27 +56,30 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
train_df = data_dictionary["train_features"] train_df = data_dictionary["train_features"]
total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df) total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df)
policy_kwargs = dict(activation_fn=th.nn.ReLU, policy_kwargs = dict(activation_fn=th.nn.ReLU, net_arch=self.net_arch)
net_arch=self.net_arch)
if self.activate_tensorboard: if self.activate_tensorboard:
tb_path = Path(dk.full_path / "tensorboard" / dk.pair.split('/')[0]) tb_path = Path(dk.full_path / "tensorboard" / dk.pair.split("/")[0])
else: else:
tb_path = None tb_path = None
if dk.pair not in self.dd.model_dictionary or not self.continual_learning: if dk.pair not in self.dd.model_dictionary or not self.continual_learning:
model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs, model = self.MODELCLASS(
self.policy_type,
self.train_env,
policy_kwargs=policy_kwargs,
tensorboard_log=tb_path, tensorboard_log=tb_path,
**self.freqai_info.get('model_training_parameters', {}) **self.freqai_info.get("model_training_parameters", {}),
) )
else: else:
logger.info('Continual training activated - starting training from previously ' logger.info(
'trained agent.') "Continual training activated - starting training from previously " "trained agent."
)
model = self.dd.model_dictionary[dk.pair] model = self.dd.model_dictionary[dk.pair]
model.set_env(self.train_env) model.set_env(self.train_env)
callbacks: List[Any] = [self.eval_callback, self.tensorboard_callback] callbacks: List[Any] = [self.eval_callback, self.tensorboard_callback]
progressbar_callback: Optional[ProgressBarCallback] = None progressbar_callback: Optional[ProgressBarCallback] = None
if self.rl_config.get('progress_bar', False): if self.rl_config.get("progress_bar", False):
progressbar_callback = ProgressBarCallback() progressbar_callback = ProgressBarCallback()
callbacks.insert(0, progressbar_callback) callbacks.insert(0, progressbar_callback)
@ -90,7 +93,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
progressbar_callback.on_training_end() progressbar_callback.on_training_end()
if Path(dk.data_path / "best_model.zip").is_file(): if Path(dk.data_path / "best_model.zip").is_file():
logger.info('Callback found a best model.') logger.info("Callback found a best model.")
best_model = self.MODELCLASS.load(dk.data_path / "best_model") best_model = self.MODELCLASS.load(dk.data_path / "best_model")
return best_model return best_model
@ -127,20 +130,18 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
return -2 return -2
pnl = self.get_unrealized_profit() pnl = self.get_unrealized_profit()
factor = 100. factor = 100.0
# reward agent for entering trades # reward agent for entering trades
if (action == Actions.Long_enter.value if action == Actions.Long_enter.value and self._position == Positions.Neutral:
and self._position == Positions.Neutral):
return 25 return 25
if (action == Actions.Short_enter.value if action == Actions.Short_enter.value and self._position == Positions.Neutral:
and self._position == Positions.Neutral):
return 25 return 25
# discourage agent from not entering trades # discourage agent from not entering trades
if action == Actions.Neutral.value and self._position == Positions.Neutral: if action == Actions.Neutral.value and self._position == Positions.Neutral:
return -1 return -1
max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300) max_trade_duration = self.rl_config.get("max_trade_duration_candles", 300)
trade_duration = self._current_tick - self._last_trade_tick # type: ignore trade_duration = self._current_tick - self._last_trade_tick # type: ignore
if trade_duration <= max_trade_duration: if trade_duration <= max_trade_duration:
@ -149,20 +150,22 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
factor *= 0.5 factor *= 0.5
# discourage sitting in position # discourage sitting in position
if (self._position in (Positions.Short, Positions.Long) and if (
action == Actions.Neutral.value): self._position in (Positions.Short, Positions.Long)
and action == Actions.Neutral.value
):
return -1 * trade_duration / max_trade_duration return -1 * trade_duration / max_trade_duration
# close long # close long
if action == Actions.Long_exit.value and self._position == Positions.Long: if action == Actions.Long_exit.value and self._position == Positions.Long:
if pnl > self.profit_aim * self.rr: if pnl > self.profit_aim * self.rr:
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
return float(pnl * factor) return float(pnl * factor)
# close short # close short
if action == Actions.Short_exit.value and self._position == Positions.Short: if action == Actions.Short_exit.value and self._position == Positions.Short:
if pnl > self.profit_aim * self.rr: if pnl > self.profit_aim * self.rr:
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2) factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
return float(pnl * factor) return float(pnl * factor)
return 0. return 0.0

View File

@ -20,9 +20,13 @@ class ReinforcementLearner_multiproc(ReinforcementLearner):
Demonstration of how to build vectorized environments Demonstration of how to build vectorized environments
""" """
def set_train_and_eval_environments(self, data_dictionary: Dict[str, Any], def set_train_and_eval_environments(
prices_train: DataFrame, prices_test: DataFrame, self,
dk: FreqaiDataKitchen): data_dictionary: Dict[str, Any],
prices_train: DataFrame,
prices_test: DataFrame,
dk: FreqaiDataKitchen,
):
""" """
User can override this if they are using a custom MyRLEnv User can override this if they are using a custom MyRLEnv
:param data_dictionary: dict = common data dictionary containing train and test :param data_dictionary: dict = common data dictionary containing train and test
@ -45,22 +49,35 @@ class ReinforcementLearner_multiproc(ReinforcementLearner):
eval_freq = len(train_df) // self.max_threads eval_freq = len(train_df) // self.max_threads
env_id = "train_env" env_id = "train_env"
self.train_env = VecMonitor(SubprocVecEnv([make_env(self.MyRLEnv, env_id, i, 1, self.train_env = VecMonitor(
train_df, prices_train, SubprocVecEnv(
env_info=env_info) for i [
in range(self.max_threads)])) make_env(self.MyRLEnv, env_id, i, 1, train_df, prices_train, env_info=env_info)
for i in range(self.max_threads)
]
)
)
eval_env_id = 'eval_env' eval_env_id = "eval_env"
self.eval_env = VecMonitor(SubprocVecEnv([make_env(self.MyRLEnv, eval_env_id, i, 1, self.eval_env = VecMonitor(
test_df, prices_test, SubprocVecEnv(
env_info=env_info) for i [
in range(self.max_threads)])) make_env(
self.MyRLEnv, eval_env_id, i, 1, test_df, prices_test, env_info=env_info
)
for i in range(self.max_threads)
]
)
)
self.eval_callback = MaskableEvalCallback(self.eval_env, deterministic=True, self.eval_callback = MaskableEvalCallback(
render=False, eval_freq=eval_freq, self.eval_env,
deterministic=True,
render=False,
eval_freq=eval_freq,
best_model_save_path=str(dk.data_path), best_model_save_path=str(dk.data_path),
use_masking=(self.model_type == 'MaskablePPO' and use_masking=(self.model_type == "MaskablePPO" and is_masking_supported(self.eval_env)),
is_masking_supported(self.eval_env))) )
# TENSORBOARD CALLBACK DOES NOT RECOMMENDED TO USE WITH MULTIPLE ENVS, # TENSORBOARD CALLBACK DOES NOT RECOMMENDED TO USE WITH MULTIPLE ENVS,
# IT WILL RETURN FALSE INFORMATION, NEVERTHELESS NOT THREAD SAFE WITH SB3!!! # IT WILL RETURN FALSE INFORMATION, NEVERTHELESS NOT THREAD SAFE WITH SB3!!!

View File

@ -35,7 +35,7 @@ class SKLearnRandomForestClassifier(BaseClassifierModel):
X = data_dictionary["train_features"].to_numpy() X = data_dictionary["train_features"].to_numpy()
y = data_dictionary["train_labels"].to_numpy()[:, 0] y = data_dictionary["train_labels"].to_numpy()[:, 0]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None eval_set = None
else: else:
test_features = data_dictionary["test_features"].to_numpy() test_features = data_dictionary["test_features"].to_numpy()
@ -44,8 +44,10 @@ class SKLearnRandomForestClassifier(BaseClassifierModel):
eval_set = (test_features, test_labels) eval_set = (test_features, test_labels)
if self.freqai_info.get("continual_learning", False): if self.freqai_info.get("continual_learning", False):
logger.warning("Continual learning is not supported for " logger.warning(
"SKLearnRandomForestClassifier, ignoring.") "Continual learning is not supported for "
"SKLearnRandomForestClassifier, ignoring."
)
train_weights = data_dictionary["train_weights"] train_weights = data_dictionary["train_weights"]
@ -73,10 +75,11 @@ class SKLearnRandomForestClassifier(BaseClassifierModel):
le = LabelEncoder() le = LabelEncoder()
label = dk.label_list[0] label = dk.label_list[0]
labels_before = list(dk.data['labels_std'].keys()) labels_before = list(dk.data["labels_std"].keys())
labels_after = le.fit_transform(labels_before).tolist() labels_after = le.fit_transform(labels_before).tolist()
pred_df[label] = le.inverse_transform(pred_df[label]) pred_df[label] = le.inverse_transform(pred_df[label])
pred_df = pred_df.rename( pred_df = pred_df.rename(
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))}) columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))}
)
return (pred_df, dk.do_predict) return (pred_df, dk.do_predict)

View File

@ -41,7 +41,7 @@ class XGBoostClassifier(BaseClassifierModel):
if not is_integer_dtype(y): if not is_integer_dtype(y):
y = pd.Series(le.fit_transform(y), dtype="int64") y = pd.Series(le.fit_transform(y), dtype="int64")
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None eval_set = None
else: else:
test_features = data_dictionary["test_features"].to_numpy() test_features = data_dictionary["test_features"].to_numpy()
@ -58,8 +58,7 @@ class XGBoostClassifier(BaseClassifierModel):
model = XGBClassifier(**self.model_training_parameters) model = XGBClassifier(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, xgb_model=init_model)
xgb_model=init_model)
return model return model
@ -79,10 +78,11 @@ class XGBoostClassifier(BaseClassifierModel):
le = LabelEncoder() le = LabelEncoder()
label = dk.label_list[0] label = dk.label_list[0]
labels_before = list(dk.data['labels_std'].keys()) labels_before = list(dk.data["labels_std"].keys())
labels_after = le.fit_transform(labels_before).tolist() labels_after = le.fit_transform(labels_before).tolist()
pred_df[label] = le.inverse_transform(pred_df[label]) pred_df[label] = le.inverse_transform(pred_df[label])
pred_df = pred_df.rename( pred_df = pred_df.rename(
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))}) columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))}
)
return (pred_df, dk.do_predict) return (pred_df, dk.do_predict)

View File

@ -41,7 +41,7 @@ class XGBoostRFClassifier(BaseClassifierModel):
if not is_integer_dtype(y): if not is_integer_dtype(y):
y = pd.Series(le.fit_transform(y), dtype="int64") y = pd.Series(le.fit_transform(y), dtype="int64")
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None eval_set = None
else: else:
test_features = data_dictionary["test_features"].to_numpy() test_features = data_dictionary["test_features"].to_numpy()
@ -58,8 +58,7 @@ class XGBoostRFClassifier(BaseClassifierModel):
model = XGBRFClassifier(**self.model_training_parameters) model = XGBRFClassifier(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, xgb_model=init_model)
xgb_model=init_model)
return model return model
@ -79,10 +78,11 @@ class XGBoostRFClassifier(BaseClassifierModel):
le = LabelEncoder() le = LabelEncoder()
label = dk.label_list[0] label = dk.label_list[0]
labels_before = list(dk.data['labels_std'].keys()) labels_before = list(dk.data["labels_std"].keys())
labels_after = le.fit_transform(labels_before).tolist() labels_after = le.fit_transform(labels_before).tolist()
pred_df[label] = le.inverse_transform(pred_df[label]) pred_df[label] = le.inverse_transform(pred_df[label])
pred_df = pred_df.rename( pred_df = pred_df.rename(
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))}) columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))}
)
return (pred_df, dk.do_predict) return (pred_df, dk.do_predict)

View File

@ -37,7 +37,7 @@ class XGBoostRFRegressor(BaseRegressionModel):
eval_weights = None eval_weights = None
else: else:
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])] eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
eval_weights = [data_dictionary['test_weights']] eval_weights = [data_dictionary["test_weights"]]
sample_weight = data_dictionary["train_weights"] sample_weight = data_dictionary["train_weights"]
@ -46,8 +46,14 @@ class XGBoostRFRegressor(BaseRegressionModel):
model = XGBRFRegressor(**self.model_training_parameters) model = XGBRFRegressor(**self.model_training_parameters)
model.set_params(callbacks=[TBCallback(dk.data_path)]) model.set_params(callbacks=[TBCallback(dk.data_path)])
model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set, model.fit(
sample_weight_eval_set=eval_weights, xgb_model=xgb_model) X=X,
y=y,
sample_weight=sample_weight,
eval_set=eval_set,
sample_weight_eval_set=eval_weights,
xgb_model=xgb_model,
)
# set the callbacks to empty so that we can serialize to disk later # set the callbacks to empty so that we can serialize to disk later
model.set_params(callbacks=[]) model.set_params(callbacks=[])

View File

@ -36,15 +36,8 @@ class XGBoostRegressor(BaseRegressionModel):
eval_set = None eval_set = None
eval_weights = None eval_weights = None
else: else:
eval_set = [ eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"]), (X, y)]
(data_dictionary["test_features"], eval_weights = [data_dictionary["test_weights"], data_dictionary["train_weights"]]
data_dictionary["test_labels"]),
(X, y)
]
eval_weights = [
data_dictionary['test_weights'],
data_dictionary['train_weights']
]
sample_weight = data_dictionary["train_weights"] sample_weight = data_dictionary["train_weights"]
@ -53,8 +46,14 @@ class XGBoostRegressor(BaseRegressionModel):
model = XGBRegressor(**self.model_training_parameters) model = XGBRegressor(**self.model_training_parameters)
model.set_params(callbacks=[TBCallback(dk.data_path)]) model.set_params(callbacks=[TBCallback(dk.data_path)])
model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set, model.fit(
sample_weight_eval_set=eval_weights, xgb_model=xgb_model) X=X,
y=y,
sample_weight=sample_weight,
eval_set=eval_set,
sample_weight_eval_set=eval_weights,
xgb_model=xgb_model,
)
# set the callbacks to empty so that we can serialize to disk later # set the callbacks to empty so that we can serialize to disk later
model.set_params(callbacks=[]) model.set_params(callbacks=[])

View File

@ -38,13 +38,15 @@ class XGBoostRegressorMultiTarget(BaseRegressionModel):
eval_weights = None eval_weights = None
eval_sets = [None] * y.shape[1] eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
eval_weights = [data_dictionary["test_weights"]] eval_weights = [data_dictionary["test_weights"]]
for i in range(data_dictionary['test_labels'].shape[1]): for i in range(data_dictionary["test_labels"].shape[1]):
eval_sets[i] = [( # type: ignore eval_sets[i] = [
( # type: ignore
data_dictionary["test_features"], data_dictionary["test_features"],
data_dictionary["test_labels"].iloc[:, i] data_dictionary["test_labels"].iloc[:, i],
)] )
]
init_model = self.get_init_model(dk.pair) init_model = self.get_init_model(dk.pair)
if init_model: if init_model:
@ -55,11 +57,15 @@ class XGBoostRegressorMultiTarget(BaseRegressionModel):
fit_params = [] fit_params = []
for i in range(len(eval_sets)): for i in range(len(eval_sets)):
fit_params.append( fit_params.append(
{'eval_set': eval_sets[i], 'sample_weight_eval_set': eval_weights, {
'xgb_model': init_models[i]}) "eval_set": eval_sets[i],
"sample_weight_eval_set": eval_weights,
"xgb_model": init_models[i],
}
)
model = FreqaiMultiOutputRegressor(estimator=xgb) model = FreqaiMultiOutputRegressor(estimator=xgb)
thread_training = self.freqai_info.get('multitarget_parallel_training', False) thread_training = self.freqai_info.get("multitarget_parallel_training", False)
if thread_training: if thread_training:
model.n_jobs = y.shape[1] model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)

View File

@ -12,6 +12,7 @@ class TensorboardCallback(BaseCallback):
Custom callback for plotting additional values in tensorboard and Custom callback for plotting additional values in tensorboard and
episodic summary reports. episodic summary reports.
""" """
def __init__(self, verbose=1, actions: Type[Enum] = BaseActions): def __init__(self, verbose=1, actions: Type[Enum] = BaseActions):
super().__init__(verbose) super().__init__(verbose)
self.model: Any = None self.model: Any = None
@ -40,10 +41,9 @@ class TensorboardCallback(BaseCallback):
) )
def _on_step(self) -> bool: def _on_step(self) -> bool:
local_info = self.locals["infos"][0] local_info = self.locals["infos"][0]
if hasattr(self.training_env, 'envs'): if hasattr(self.training_env, "envs"):
tensorboard_metrics = self.training_env.envs[0].unwrapped.tensorboard_metrics tensorboard_metrics = self.training_env.envs[0].unwrapped.tensorboard_metrics
else: else:

View File

@ -1,6 +1,7 @@
# ensure users can still use a non-torch freqai version # ensure users can still use a non-torch freqai version
try: try:
from freqtrade.freqai.tensorboard.tensorboard import TensorBoardCallback, TensorboardLogger from freqtrade.freqai.tensorboard.tensorboard import TensorBoardCallback, TensorboardLogger
TBLogger = TensorboardLogger TBLogger = TensorboardLogger
TBCallback = TensorBoardCallback TBCallback = TensorBoardCallback
except ModuleNotFoundError: except ModuleNotFoundError:
@ -8,10 +9,8 @@ except ModuleNotFoundError:
BaseTensorBoardCallback, BaseTensorBoardCallback,
BaseTensorboardLogger, BaseTensorboardLogger,
) )
TBLogger = BaseTensorboardLogger # type: ignore TBLogger = BaseTensorboardLogger # type: ignore
TBCallback = BaseTensorBoardCallback # type: ignore TBCallback = BaseTensorBoardCallback # type: ignore
__all__ = ( __all__ = ("TBLogger", "TBCallback")
"TBLogger",
"TBCallback"
)

View File

@ -20,13 +20,10 @@ class BaseTensorboardLogger:
class BaseTensorBoardCallback(TrainingCallback): class BaseTensorBoardCallback(TrainingCallback):
def __init__(self, logdir: Path, activate: bool = True): def __init__(self, logdir: Path, activate: bool = True):
pass pass
def after_iteration( def after_iteration(self, model, epoch: int, evals_log: TrainingCallback.EvalsLog) -> bool:
self, model, epoch: int, evals_log: TrainingCallback.EvalsLog
) -> bool:
return False return False
def after_training(self, model): def after_training(self, model):

View File

@ -31,7 +31,6 @@ class TensorboardLogger(BaseTensorboardLogger):
class TensorBoardCallback(BaseTensorBoardCallback): class TensorBoardCallback(BaseTensorBoardCallback):
def __init__(self, logdir: Path, activate: bool = True): def __init__(self, logdir: Path, activate: bool = True):
self.activate = activate self.activate = activate
if self.activate: if self.activate:

View File

@ -28,7 +28,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
model_meta_data: Dict[str, Any] = {}, model_meta_data: Dict[str, Any] = {},
window_size: int = 1, window_size: int = 1,
tb_logger: Any = None, tb_logger: Any = None,
**kwargs **kwargs,
): ):
""" """
:param model: The PyTorch model to be trained. :param model: The PyTorch model to be trained.
@ -119,9 +119,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
self.model.train() self.model.train()
def create_data_loaders_dictionary( def create_data_loaders_dictionary(
self, self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]
data_dictionary: Dict[str, pd.DataFrame],
splits: List[str]
) -> Dict[str, DataLoader]: ) -> Dict[str, DataLoader]:
""" """
Converts the input data to PyTorch tensors using a data loader. Converts the input data to PyTorch tensors using a data loader.
@ -168,12 +166,15 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
user needs to store. e.g. class_names for classification models. user needs to store. e.g. class_names for classification models.
""" """
torch.save({ torch.save(
{
"model_state_dict": self.model.state_dict(), "model_state_dict": self.model.state_dict(),
"optimizer_state_dict": self.optimizer.state_dict(), "optimizer_state_dict": self.optimizer.state_dict(),
"model_meta_data": self.model_meta_data, "model_meta_data": self.model_meta_data,
"pytrainer": self "pytrainer": self,
}, path) },
path,
)
def load(self, path: Path): def load(self, path: Path):
checkpoint = torch.load(path) checkpoint = torch.load(path)
@ -198,9 +199,7 @@ class PyTorchTransformerTrainer(PyTorchModelTrainer):
""" """
def create_data_loaders_dictionary( def create_data_loaders_dictionary(
self, self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]
data_dictionary: Dict[str, pd.DataFrame],
splits: List[str]
) -> Dict[str, DataLoader]: ) -> Dict[str, DataLoader]:
""" """
Converts the input data to PyTorch tensors using a data loader. Converts the input data to PyTorch tensors using a data loader.

View File

@ -8,7 +8,6 @@ from torch import nn
class PyTorchTrainerInterface(ABC): class PyTorchTrainerInterface(ABC):
@abstractmethod @abstractmethod
def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]) -> None: def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]) -> None:
""" """

View File

@ -19,8 +19,16 @@ class PyTorchTransformerModel(nn.Module):
Lukasz Kaiser, and Illia Polosukhin. 2017. Lukasz Kaiser, and Illia Polosukhin. 2017.
""" """
def __init__(self, input_dim: int = 7, output_dim: int = 7, hidden_dim=1024, def __init__(
n_layer=2, dropout_percent=0.1, time_window=10, nhead=8): self,
input_dim: int = 7,
output_dim: int = 7,
hidden_dim=1024,
n_layer=2,
dropout_percent=0.1,
time_window=10,
nhead=8,
):
super().__init__() super().__init__()
self.time_window = time_window self.time_window = time_window
# ensure the input dimension to the transformer is divisible by nhead # ensure the input dimension to the transformer is divisible by nhead
@ -34,7 +42,8 @@ class PyTorchTransformerModel(nn.Module):
# Define the encoder block of the Transformer # Define the encoder block of the Transformer
self.encoder_layer = nn.TransformerEncoderLayer( self.encoder_layer = nn.TransformerEncoderLayer(
d_model=self.dim_val, nhead=nhead, dropout=dropout_percent, batch_first=True) d_model=self.dim_val, nhead=nhead, dropout=dropout_percent, batch_first=True
)
self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=n_layer) self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=n_layer)
# the pseudo decoding FC # the pseudo decoding FC
@ -48,7 +57,7 @@ class PyTorchTransformerModel(nn.Module):
nn.Linear(int(hidden_dim / 2), int(hidden_dim / 4)), nn.Linear(int(hidden_dim / 2), int(hidden_dim / 4)),
nn.ReLU(), nn.ReLU(),
nn.Dropout(dropout_percent), nn.Dropout(dropout_percent),
nn.Linear(int(hidden_dim / 4), output_dim) nn.Linear(int(hidden_dim / 4), output_dim),
) )
def forward(self, x, mask=None, add_positional_encoding=True): def forward(self, x, mask=None, add_positional_encoding=True):

View File

@ -31,10 +31,11 @@ def download_all_data_for_training(dp: DataProvider, config: Config) -> None:
""" """
if dp._exchange is None: if dp._exchange is None:
raise OperationalException('No exchange object found.') raise OperationalException("No exchange object found.")
markets = [ markets = [
p for p in dp._exchange.get_markets( p
tradable_only=True, active_only=not config.get('include_inactive') for p in dp._exchange.get_markets(
tradable_only=True, active_only=not config.get("include_inactive")
).keys() ).keys()
] ]
@ -73,29 +74,26 @@ def get_required_data_timerange(config: Config) -> TimeRange:
if secs > max_tf_seconds: if secs > max_tf_seconds:
max_tf_seconds = secs max_tf_seconds = secs
startup_candles = config.get('startup_candle_count', 0) startup_candles = config.get("startup_candle_count", 0)
indicator_periods = config["freqai"]["feature_parameters"]["indicator_periods_candles"] indicator_periods = config["freqai"]["feature_parameters"]["indicator_periods_candles"]
# factor the max_period as a factor of safety. # factor the max_period as a factor of safety.
max_period = int(max(startup_candles, max(indicator_periods)) * 1.5) max_period = int(max(startup_candles, max(indicator_periods)) * 1.5)
config['startup_candle_count'] = max_period config["startup_candle_count"] = max_period
logger.info(f'FreqAI auto-downloader using {max_period} startup candles.') logger.info(f"FreqAI auto-downloader using {max_period} startup candles.")
additional_seconds = max_period * max_tf_seconds additional_seconds = max_period * max_tf_seconds
startts = int( startts = int(time - config["freqai"].get("train_period_days", 0) * 86400 - additional_seconds)
time
- config["freqai"].get("train_period_days", 0) * 86400
- additional_seconds
)
stopts = int(time) stopts = int(time)
data_load_timerange = TimeRange('date', 'date', startts, stopts) data_load_timerange = TimeRange("date", "date", startts, stopts)
return data_load_timerange return data_load_timerange
def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen, def plot_feature_importance(
count_max: int = 25) -> None: model: Any, pair: str, dk: FreqaiDataKitchen, count_max: int = 25
) -> None:
""" """
Plot Best and worst features by importance for a single sub-train. Plot Best and worst features by importance for a single sub-train.
:param model: Any = A model which was `fit` using a common library :param model: Any = A model which was `fit` using a common library
@ -108,7 +106,7 @@ def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
# Extract feature importance from model # Extract feature importance from model
models = {} models = {}
if 'FreqaiMultiOutputRegressor' in str(model.__class__): if "FreqaiMultiOutputRegressor" in str(model.__class__):
for estimator, label in zip(model.estimators_, dk.label_list): for estimator, label in zip(model.estimators_, dk.label_list):
models[label] = estimator models[label] = estimator
else: else:
@ -123,14 +121,16 @@ def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
elif "xgb" in str(mdl.__class__): elif "xgb" in str(mdl.__class__):
feature_importance = mdl.feature_importances_ feature_importance = mdl.feature_importances_
else: else:
logger.info('Model type does not support generating feature importances.') logger.info("Model type does not support generating feature importances.")
return return
# Data preparation # Data preparation
fi_df = pd.DataFrame({ fi_df = pd.DataFrame(
"feature_names": np.array(dk.data_dictionary['train_features'].columns), {
"feature_importance": np.array(feature_importance) "feature_names": np.array(dk.data_dictionary["train_features"].columns),
}) "feature_importance": np.array(feature_importance),
}
)
fi_df_top = fi_df.nlargest(count_max, "feature_importance")[::-1] fi_df_top = fi_df.nlargest(count_max, "feature_importance")[::-1]
fi_df_worst = fi_df.nsmallest(count_max, "feature_importance")[::-1] fi_df_worst = fi_df.nsmallest(count_max, "feature_importance")[::-1]
@ -140,14 +140,18 @@ def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
go.Bar( go.Bar(
x=fi_df["feature_importance"], x=fi_df["feature_importance"],
y=fi_df["feature_names"], y=fi_df["feature_names"],
orientation='h', showlegend=False orientation="h",
), row=1, col=col showlegend=False,
),
row=1,
col=col,
) )
fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.5) fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.5)
fig = add_feature_trace(fig, fi_df_top, 1) fig = add_feature_trace(fig, fi_df_top, 1)
fig = add_feature_trace(fig, fi_df_worst, 2) fig = add_feature_trace(fig, fi_df_worst, 2)
fig.update_layout(title_text=f"Best and worst features by importance {pair}") fig.update_layout(title_text=f"Best and worst features by importance {pair}")
label = label.replace('&', '').replace('%', '') # escape two FreqAI specific characters label = label.replace("&", "").replace("%", "") # escape two FreqAI specific characters
store_plot_file(fig, f"{dk.model_filename}-{label}.html", dk.data_path) store_plot_file(fig, f"{dk.model_filename}-{label}.html", dk.data_path)
@ -158,12 +162,12 @@ def record_params(config: Dict[str, Any], full_path: Path) -> None:
params_record_path = full_path / "run_params.json" params_record_path = full_path / "run_params.json"
run_params = { run_params = {
"freqai": config.get('freqai', {}), "freqai": config.get("freqai", {}),
"timeframe": config.get('timeframe'), "timeframe": config.get("timeframe"),
"stake_amount": config.get('stake_amount'), "stake_amount": config.get("stake_amount"),
"stake_currency": config.get('stake_currency'), "stake_currency": config.get("stake_currency"),
"max_open_trades": config.get('max_open_trades'), "max_open_trades": config.get("max_open_trades"),
"pairs": config.get('exchange', {}).get('pair_whitelist') "pairs": config.get("exchange", {}).get("pair_whitelist"),
} }
with params_record_path.open("w") as handle: with params_record_path.open("w") as handle:
@ -172,7 +176,7 @@ def record_params(config: Dict[str, Any], full_path: Path) -> None:
handle, handle,
indent=4, indent=4,
default=str, default=str,
number_mode=rapidjson.NM_NATIVE | rapidjson.NM_NAN number_mode=rapidjson.NM_NATIVE | rapidjson.NM_NAN,
) )
@ -191,10 +195,11 @@ def get_timerange_backtest_live_models(config: Config) -> str:
def get_tb_logger(model_type: str, path: Path, activate: bool) -> Any: def get_tb_logger(model_type: str, path: Path, activate: bool) -> Any:
if model_type == "pytorch" and activate: if model_type == "pytorch" and activate:
from freqtrade.freqai.tensorboard import TBLogger from freqtrade.freqai.tensorboard import TBLogger
return TBLogger(path, activate) return TBLogger(path, activate)
else: else:
from freqtrade.freqai.tensorboard.base_tensorboard import BaseTensorboardLogger from freqtrade.freqai.tensorboard.base_tensorboard import BaseTensorboardLogger
return BaseTensorboardLogger(path, activate) return BaseTensorboardLogger(path, activate)