ruff format: freqai

This commit is contained in:
Matthias 2024-05-12 17:12:20 +02:00
parent e4e8c3967c
commit d1db43dee0
44 changed files with 1111 additions and 900 deletions

View File

@ -19,6 +19,7 @@ class Base3ActionRLEnv(BaseEnvironment):
"""
Base class for a 3 action environment
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.actions = Actions
@ -73,11 +74,18 @@ class Base3ActionRLEnv(BaseEnvironment):
if trade_type is not None:
self.trade_history.append(
{'price': self.current_price(), 'index': self._current_tick,
'type': trade_type, 'profit': self.get_unrealized_profit()})
{
"price": self.current_price(),
"index": self._current_tick,
"type": trade_type,
"profit": self.get_unrealized_profit(),
}
)
if (self._total_profit < self.max_drawdown or
self._total_unrealized_profit < self.max_drawdown):
if (
self._total_profit < self.max_drawdown
or self._total_unrealized_profit < self.max_drawdown
):
self._done = True
self._position_history.append(self._position)
@ -89,7 +97,7 @@ class Base3ActionRLEnv(BaseEnvironment):
total_profit=self._total_profit,
position=self._position.value,
trade_duration=self.get_trade_duration(),
current_profit_pct=self.get_unrealized_profit()
current_profit_pct=self.get_unrealized_profit(),
)
observation = self._get_observation()
@ -109,10 +117,14 @@ class Base3ActionRLEnv(BaseEnvironment):
return (
(action == Actions.Buy.value and self._position == Positions.Neutral)
or (action == Actions.Sell.value and self._position == Positions.Long)
or (action == Actions.Sell.value and self._position == Positions.Neutral
and self.can_short)
or (action == Actions.Buy.value and self._position == Positions.Short
and self.can_short)
or (
action == Actions.Sell.value
and self._position == Positions.Neutral
and self.can_short
)
or (
action == Actions.Buy.value and self._position == Positions.Short and self.can_short
)
)
def _is_valid(self, action: int) -> bool:

View File

@ -20,6 +20,7 @@ class Base4ActionRLEnv(BaseEnvironment):
"""
Base class for a 4 action environment
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.actions = Actions
@ -52,7 +53,6 @@ class Base4ActionRLEnv(BaseEnvironment):
trade_type = None
if self.is_tradesignal(action):
if action == Actions.Neutral.value:
self._position = Positions.Neutral
trade_type = "neutral"
@ -75,11 +75,18 @@ class Base4ActionRLEnv(BaseEnvironment):
if trade_type is not None:
self.trade_history.append(
{'price': self.current_price(), 'index': self._current_tick,
'type': trade_type, 'profit': self.get_unrealized_profit()})
{
"price": self.current_price(),
"index": self._current_tick,
"type": trade_type,
"profit": self.get_unrealized_profit(),
}
)
if (self._total_profit < self.max_drawdown or
self._total_unrealized_profit < self.max_drawdown):
if (
self._total_profit < self.max_drawdown
or self._total_unrealized_profit < self.max_drawdown
):
self._done = True
self._position_history.append(self._position)
@ -91,7 +98,7 @@ class Base4ActionRLEnv(BaseEnvironment):
total_profit=self._total_profit,
position=self._position.value,
trade_duration=self.get_trade_duration(),
current_profit_pct=self.get_unrealized_profit()
current_profit_pct=self.get_unrealized_profit(),
)
observation = self._get_observation()
@ -108,14 +115,16 @@ class Base4ActionRLEnv(BaseEnvironment):
Determine if the signal is a trade signal
e.g.: agent wants a Actions.Long_exit while it is in a Positions.short
"""
return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) or
(action == Actions.Neutral.value and self._position == Positions.Short) or
(action == Actions.Neutral.value and self._position == Positions.Long) or
(action == Actions.Short_enter.value and self._position == Positions.Short) or
(action == Actions.Short_enter.value and self._position == Positions.Long) or
(action == Actions.Exit.value and self._position == Positions.Neutral) or
(action == Actions.Long_enter.value and self._position == Positions.Long) or
(action == Actions.Long_enter.value and self._position == Positions.Short))
return not (
(action == Actions.Neutral.value and self._position == Positions.Neutral)
or (action == Actions.Neutral.value and self._position == Positions.Short)
or (action == Actions.Neutral.value and self._position == Positions.Long)
or (action == Actions.Short_enter.value and self._position == Positions.Short)
or (action == Actions.Short_enter.value and self._position == Positions.Long)
or (action == Actions.Exit.value and self._position == Positions.Neutral)
or (action == Actions.Long_enter.value and self._position == Positions.Long)
or (action == Actions.Long_enter.value and self._position == Positions.Short)
)
def _is_valid(self, action: int) -> bool:
"""

View File

@ -21,6 +21,7 @@ class Base5ActionRLEnv(BaseEnvironment):
"""
Base class for a 5 action environment
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.actions = Actions
@ -53,7 +54,6 @@ class Base5ActionRLEnv(BaseEnvironment):
trade_type = None
if self.is_tradesignal(action):
if action == Actions.Neutral.value:
self._position = Positions.Neutral
trade_type = "neutral"
@ -81,11 +81,18 @@ class Base5ActionRLEnv(BaseEnvironment):
if trade_type is not None:
self.trade_history.append(
{'price': self.current_price(), 'index': self._current_tick,
'type': trade_type, 'profit': self.get_unrealized_profit()})
{
"price": self.current_price(),
"index": self._current_tick,
"type": trade_type,
"profit": self.get_unrealized_profit(),
}
)
if (self._total_profit < self.max_drawdown or
self._total_unrealized_profit < self.max_drawdown):
if (
self._total_profit < self.max_drawdown
or self._total_unrealized_profit < self.max_drawdown
):
self._done = True
self._position_history.append(self._position)
@ -97,7 +104,7 @@ class Base5ActionRLEnv(BaseEnvironment):
total_profit=self._total_profit,
position=self._position.value,
trade_duration=self.get_trade_duration(),
current_profit_pct=self.get_unrealized_profit()
current_profit_pct=self.get_unrealized_profit(),
)
observation = self._get_observation()
@ -113,17 +120,19 @@ class Base5ActionRLEnv(BaseEnvironment):
Determine if the signal is a trade signal
e.g.: agent wants a Actions.Long_exit while it is in a Positions.short
"""
return not ((action == Actions.Neutral.value and self._position == Positions.Neutral) or
(action == Actions.Neutral.value and self._position == Positions.Short) or
(action == Actions.Neutral.value and self._position == Positions.Long) or
(action == Actions.Short_enter.value and self._position == Positions.Short) or
(action == Actions.Short_enter.value and self._position == Positions.Long) or
(action == Actions.Short_exit.value and self._position == Positions.Long) or
(action == Actions.Short_exit.value and self._position == Positions.Neutral) or
(action == Actions.Long_enter.value and self._position == Positions.Long) or
(action == Actions.Long_enter.value and self._position == Positions.Short) or
(action == Actions.Long_exit.value and self._position == Positions.Short) or
(action == Actions.Long_exit.value and self._position == Positions.Neutral))
return not (
(action == Actions.Neutral.value and self._position == Positions.Neutral)
or (action == Actions.Neutral.value and self._position == Positions.Short)
or (action == Actions.Neutral.value and self._position == Positions.Long)
or (action == Actions.Short_enter.value and self._position == Positions.Short)
or (action == Actions.Short_enter.value and self._position == Positions.Long)
or (action == Actions.Short_exit.value and self._position == Positions.Long)
or (action == Actions.Short_exit.value and self._position == Positions.Neutral)
or (action == Actions.Long_enter.value and self._position == Positions.Long)
or (action == Actions.Long_enter.value and self._position == Positions.Short)
or (action == Actions.Long_exit.value and self._position == Positions.Short)
or (action == Actions.Long_exit.value and self._position == Positions.Neutral)
)
def _is_valid(self, action: int) -> bool:
# trade signal

View File

@ -21,6 +21,7 @@ class BaseActions(Enum):
"""
Default action space, mostly used for type handling.
"""
Neutral = 0
Long_enter = 1
Long_exit = 2
@ -44,11 +45,22 @@ class BaseEnvironment(gym.Env):
See RL/Base5ActionRLEnv.py and RL/Base4ActionRLEnv.py
"""
def __init__(self, df: DataFrame = DataFrame(), prices: DataFrame = DataFrame(),
reward_kwargs: dict = {}, window_size=10, starting_point=True,
id: str = 'baseenv-1', seed: int = 1, config: dict = {}, live: bool = False,
fee: float = 0.0015, can_short: bool = False, pair: str = "",
df_raw: DataFrame = DataFrame()):
def __init__(
self,
df: DataFrame = DataFrame(),
prices: DataFrame = DataFrame(),
reward_kwargs: dict = {},
window_size=10,
starting_point=True,
id: str = "baseenv-1",
seed: int = 1,
config: dict = {},
live: bool = False,
fee: float = 0.0015,
can_short: bool = False,
pair: str = "",
df_raw: DataFrame = DataFrame(),
):
"""
Initializes the training/eval environment.
:param df: dataframe of features
@ -64,15 +76,15 @@ class BaseEnvironment(gym.Env):
:param can_short: Whether or not the environment can short
"""
self.config: dict = config
self.rl_config: dict = config['freqai']['rl_config']
self.add_state_info: bool = self.rl_config.get('add_state_info', False)
self.rl_config: dict = config["freqai"]["rl_config"]
self.add_state_info: bool = self.rl_config.get("add_state_info", False)
self.id: str = id
self.max_drawdown: float = 1 - self.rl_config.get('max_training_drawdown_pct', 0.8)
self.compound_trades: bool = config['stake_amount'] == 'unlimited'
self.max_drawdown: float = 1 - self.rl_config.get("max_training_drawdown_pct", 0.8)
self.compound_trades: bool = config["stake_amount"] == "unlimited"
self.pair: str = pair
self.raw_features: DataFrame = df_raw
if self.config.get('fee', None) is not None:
self.fee = self.config['fee']
if self.config.get("fee", None) is not None:
self.fee = self.config["fee"]
else:
self.fee = fee
@ -82,14 +94,22 @@ class BaseEnvironment(gym.Env):
self.can_short: bool = can_short
self.live: bool = live
if not self.live and self.add_state_info:
raise OperationalException("`add_state_info` is not available in backtesting. Change "
"parameter to false in your rl_config. See `add_state_info` "
"docs for more info.")
raise OperationalException(
"`add_state_info` is not available in backtesting. Change "
"parameter to false in your rl_config. See `add_state_info` "
"docs for more info."
)
self.seed(seed)
self.reset_env(df, prices, window_size, reward_kwargs, starting_point)
def reset_env(self, df: DataFrame, prices: DataFrame, window_size: int,
reward_kwargs: dict, starting_point=True):
def reset_env(
self,
df: DataFrame,
prices: DataFrame,
window_size: int,
reward_kwargs: dict,
starting_point=True,
):
"""
Resets the environment when the agent fails (in our case, if the drawdown
exceeds the user set max_training_drawdown_pct)
@ -113,8 +133,7 @@ class BaseEnvironment(gym.Env):
self.total_features = self.signal_features.shape[1]
self.shape = (window_size, self.total_features)
self.set_action_space()
self.observation_space = spaces.Box(
low=-1, high=1, shape=self.shape, dtype=np.float32)
self.observation_space = spaces.Box(low=-1, high=1, shape=self.shape, dtype=np.float32)
# episode
self._start_tick: int = self.window_size
@ -151,8 +170,13 @@ class BaseEnvironment(gym.Env):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def tensorboard_log(self, metric: str, value: Optional[Union[int, float]] = None,
inc: Optional[bool] = None, category: str = "custom"):
def tensorboard_log(
self,
metric: str,
value: Optional[Union[int, float]] = None,
inc: Optional[bool] = None,
category: str = "custom",
):
"""
Function builds the tensorboard_metrics dictionary
to be parsed by the TensorboardCallback. This
@ -195,7 +219,7 @@ class BaseEnvironment(gym.Env):
self._done = False
if self.starting_point is True:
if self.rl_config.get('randomize_starting_position', False):
if self.rl_config.get("randomize_starting_position", False):
length_of_data = int(self._end_tick / 4)
start_tick = random.randint(self.window_size + 1, length_of_data)
self._start_tick = start_tick
@ -207,8 +231,8 @@ class BaseEnvironment(gym.Env):
self._last_trade_tick = None
self._position = Positions.Neutral
self.total_reward = 0.
self._total_profit = 1. # unit
self.total_reward = 0.0
self._total_profit = 1.0 # unit
self.history = {}
self.trade_history = []
self.portfolio_log_returns = np.zeros(len(self.prices))
@ -231,18 +255,19 @@ class BaseEnvironment(gym.Env):
This may or may not be independent of action types, user can inherit
this in their custom "MyRLEnv"
"""
features_window = self.signal_features[(
self._current_tick - self.window_size):self._current_tick]
features_window = self.signal_features[
(self._current_tick - self.window_size) : self._current_tick
]
if self.add_state_info:
features_and_state = DataFrame(np.zeros((len(features_window), 3)),
columns=['current_profit_pct',
'position',
'trade_duration'],
index=features_window.index)
features_and_state = DataFrame(
np.zeros((len(features_window), 3)),
columns=["current_profit_pct", "position", "trade_duration"],
index=features_window.index,
)
features_and_state['current_profit_pct'] = self.get_unrealized_profit()
features_and_state['position'] = self._position.value
features_and_state['trade_duration'] = self.get_trade_duration()
features_and_state["current_profit_pct"] = self.get_unrealized_profit()
features_and_state["position"] = self._position.value
features_and_state["trade_duration"] = self.get_trade_duration()
features_and_state = pd.concat([features_window, features_and_state], axis=1)
return features_and_state
else:
@ -262,10 +287,10 @@ class BaseEnvironment(gym.Env):
Get the unrealized profit if the agent is in a trade
"""
if self._last_trade_tick is None:
return 0.
return 0.0
if self._position == Positions.Neutral:
return 0.
return 0.0
elif self._position == Positions.Short:
current_price = self.add_entry_fee(self.prices.iloc[self._current_tick].open)
last_trade_price = self.add_exit_fee(self.prices.iloc[self._last_trade_tick].open)
@ -275,7 +300,7 @@ class BaseEnvironment(gym.Env):
last_trade_price = self.add_entry_fee(self.prices.iloc[self._last_trade_tick].open)
return (current_price - last_trade_price) / last_trade_price
else:
return 0.
return 0.0
@abstractmethod
def is_tradesignal(self, action: int) -> bool:

View File

@ -30,10 +30,10 @@ from freqtrade.persistence import Trade
logger = logging.getLogger(__name__)
torch.multiprocessing.set_sharing_strategy('file_system')
torch.multiprocessing.set_sharing_strategy("file_system")
SB3_MODELS = ['PPO', 'A2C', 'DQN']
SB3_CONTRIB_MODELS = ['TRPO', 'ARS', 'RecurrentPPO', 'MaskablePPO', 'QRDQN']
SB3_MODELS = ["PPO", "A2C", "DQN"]
SB3_CONTRIB_MODELS = ["TRPO", "ARS", "RecurrentPPO", "MaskablePPO", "QRDQN"]
class BaseReinforcementLearningModel(IFreqaiModel):
@ -42,57 +42,60 @@ class BaseReinforcementLearningModel(IFreqaiModel):
"""
def __init__(self, **kwargs) -> None:
super().__init__(config=kwargs['config'])
self.max_threads = min(self.freqai_info['rl_config'].get(
'cpu_count', 1), max(int(self.max_system_threads / 2), 1))
super().__init__(config=kwargs["config"])
self.max_threads = min(
self.freqai_info["rl_config"].get("cpu_count", 1),
max(int(self.max_system_threads / 2), 1),
)
th.set_num_threads(self.max_threads)
self.reward_params = self.freqai_info['rl_config']['model_reward_parameters']
self.reward_params = self.freqai_info["rl_config"]["model_reward_parameters"]
self.train_env: Union[VecMonitor, SubprocVecEnv, gym.Env] = gym.Env()
self.eval_env: Union[VecMonitor, SubprocVecEnv, gym.Env] = gym.Env()
self.eval_callback: Optional[MaskableEvalCallback] = None
self.model_type = self.freqai_info['rl_config']['model_type']
self.rl_config = self.freqai_info['rl_config']
self.model_type = self.freqai_info["rl_config"]["model_type"]
self.rl_config = self.freqai_info["rl_config"]
self.df_raw: DataFrame = DataFrame()
self.continual_learning = self.freqai_info.get('continual_learning', False)
self.continual_learning = self.freqai_info.get("continual_learning", False)
if self.model_type in SB3_MODELS:
import_str = 'stable_baselines3'
import_str = "stable_baselines3"
elif self.model_type in SB3_CONTRIB_MODELS:
import_str = 'sb3_contrib'
import_str = "sb3_contrib"
else:
raise OperationalException(f'{self.model_type} not available in stable_baselines3 or '
f'sb3_contrib. please choose one of {SB3_MODELS} or '
f'{SB3_CONTRIB_MODELS}')
raise OperationalException(
f"{self.model_type} not available in stable_baselines3 or "
f"sb3_contrib. please choose one of {SB3_MODELS} or "
f"{SB3_CONTRIB_MODELS}"
)
mod = importlib.import_module(import_str, self.model_type)
self.MODELCLASS = getattr(mod, self.model_type)
self.policy_type = self.freqai_info['rl_config']['policy_type']
self.policy_type = self.freqai_info["rl_config"]["policy_type"]
self.unset_outlier_removal()
self.net_arch = self.rl_config.get('net_arch', [128, 128])
self.net_arch = self.rl_config.get("net_arch", [128, 128])
self.dd.model_type = import_str
self.tensorboard_callback: TensorboardCallback = \
TensorboardCallback(verbose=1, actions=BaseActions)
self.tensorboard_callback: TensorboardCallback = TensorboardCallback(
verbose=1, actions=BaseActions
)
def unset_outlier_removal(self):
"""
If user has activated any function that may remove training points, this
function will set them to false and warn them
"""
if self.ft_params.get('use_SVM_to_remove_outliers', False):
self.ft_params.update({'use_SVM_to_remove_outliers': False})
logger.warning('User tried to use SVM with RL. Deactivating SVM.')
if self.ft_params.get('use_DBSCAN_to_remove_outliers', False):
self.ft_params.update({'use_DBSCAN_to_remove_outliers': False})
logger.warning('User tried to use DBSCAN with RL. Deactivating DBSCAN.')
if self.ft_params.get('DI_threshold', False):
self.ft_params.update({'DI_threshold': False})
logger.warning('User tried to use DI_threshold with RL. Deactivating DI_threshold.')
if self.freqai_info['data_split_parameters'].get('shuffle', False):
self.freqai_info['data_split_parameters'].update({'shuffle': False})
logger.warning('User tried to shuffle training data. Setting shuffle to False')
if self.ft_params.get("use_SVM_to_remove_outliers", False):
self.ft_params.update({"use_SVM_to_remove_outliers": False})
logger.warning("User tried to use SVM with RL. Deactivating SVM.")
if self.ft_params.get("use_DBSCAN_to_remove_outliers", False):
self.ft_params.update({"use_DBSCAN_to_remove_outliers": False})
logger.warning("User tried to use DBSCAN with RL. Deactivating DBSCAN.")
if self.ft_params.get("DI_threshold", False):
self.ft_params.update({"DI_threshold": False})
logger.warning("User tried to use DI_threshold with RL. Deactivating DI_threshold.")
if self.freqai_info["data_split_parameters"].get("shuffle", False):
self.freqai_info["data_split_parameters"].update({"shuffle": False})
logger.warning("User tried to shuffle training data. Setting shuffle to False")
def train(
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
@ -111,8 +114,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
training_filter=True,
)
dd: Dict[str, Any] = dk.make_train_test_datasets(
features_filtered, labels_filtered)
dd: Dict[str, Any] = dk.make_train_test_datasets(features_filtered, labels_filtered)
self.df_raw = copy.deepcopy(dd["train_features"])
dk.fit_labels() # FIXME useless for now, but just satiating append methods
@ -121,18 +123,18 @@ class BaseReinforcementLearningModel(IFreqaiModel):
dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
(dd["train_features"],
dd["train_labels"],
dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
dd["train_labels"],
dd["train_weights"])
(dd["train_features"], dd["train_labels"], dd["train_weights"]) = (
dk.feature_pipeline.fit_transform(
dd["train_features"], dd["train_labels"], dd["train_weights"]
)
)
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
(dd["test_features"],
dd["test_labels"],
dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
dd["test_labels"],
dd["test_weights"])
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
(dd["test_features"], dd["test_labels"], dd["test_weights"]) = (
dk.feature_pipeline.transform(
dd["test_features"], dd["test_labels"], dd["test_weights"]
)
)
logger.info(
f'Training model on {len(dk.data_dictionary["train_features"].columns)}'
@ -147,9 +149,13 @@ class BaseReinforcementLearningModel(IFreqaiModel):
return model
def set_train_and_eval_environments(self, data_dictionary: Dict[str, DataFrame],
prices_train: DataFrame, prices_test: DataFrame,
dk: FreqaiDataKitchen):
def set_train_and_eval_environments(
self,
data_dictionary: Dict[str, DataFrame],
prices_train: DataFrame,
prices_test: DataFrame,
dk: FreqaiDataKitchen,
):
"""
User can override this if they are using a custom MyRLEnv
:param data_dictionary: dict = common data dictionary containing train and test
@ -165,11 +171,14 @@ class BaseReinforcementLearningModel(IFreqaiModel):
self.train_env = self.MyRLEnv(df=train_df, prices=prices_train, **env_info)
self.eval_env = Monitor(self.MyRLEnv(df=test_df, prices=prices_test, **env_info))
self.eval_callback = MaskableEvalCallback(self.eval_env, deterministic=True,
render=False, eval_freq=len(train_df),
best_model_save_path=str(dk.data_path),
use_masking=(self.model_type == 'MaskablePPO' and
is_masking_supported(self.eval_env)))
self.eval_callback = MaskableEvalCallback(
self.eval_env,
deterministic=True,
render=False,
eval_freq=len(train_df),
best_model_save_path=str(dk.data_path),
use_masking=(self.model_type == "MaskablePPO" and is_masking_supported(self.eval_env)),
)
actions = self.train_env.get_actions()
self.tensorboard_callback = TensorboardCallback(verbose=1, actions=actions)
@ -178,16 +187,19 @@ class BaseReinforcementLearningModel(IFreqaiModel):
"""
Create dictionary of environment arguments
"""
env_info = {"window_size": self.CONV_WIDTH,
"reward_kwargs": self.reward_params,
"config": self.config,
"live": self.live,
"can_short": self.can_short,
"pair": pair,
"df_raw": self.df_raw}
env_info = {
"window_size": self.CONV_WIDTH,
"reward_kwargs": self.reward_params,
"config": self.config,
"live": self.live,
"can_short": self.can_short,
"pair": pair,
"df_raw": self.df_raw,
}
if self.data_provider:
env_info["fee"] = self.data_provider._exchange \
.get_fee(symbol=self.data_provider.current_whitelist()[0]) # type: ignore
env_info["fee"] = self.data_provider._exchange.get_fee(
symbol=self.data_provider.current_whitelist()[0]
) # type: ignore
return env_info
@ -219,11 +231,12 @@ class BaseReinforcementLearningModel(IFreqaiModel):
for trade in open_trades:
if trade.pair == pair:
if self.data_provider._exchange is None: # type: ignore
logger.error('No exchange available.')
logger.error("No exchange available.")
return 0, 0, 0
else:
current_rate = self.data_provider._exchange.get_rate( # type: ignore
pair, refresh=False, side="exit", is_short=trade.is_short)
pair, refresh=False, side="exit", is_short=trade.is_short
)
now = datetime.now(timezone.utc).timestamp()
trade_duration = int((now - trade.open_date_utc.timestamp()) / self.base_tf_seconds)
@ -255,16 +268,17 @@ class BaseReinforcementLearningModel(IFreqaiModel):
dk.data_dictionary["prediction_features"] = self.drop_ohlc_from_df(filtered_dataframe, dk)
dk.data_dictionary["prediction_features"], _, _ = dk.feature_pipeline.transform(
dk.data_dictionary["prediction_features"], outlier_check=True)
dk.data_dictionary["prediction_features"], outlier_check=True
)
pred_df = self.rl_model_predict(
dk.data_dictionary["prediction_features"], dk, self.model)
pred_df = self.rl_model_predict(dk.data_dictionary["prediction_features"], dk, self.model)
pred_df.fillna(0, inplace=True)
return (pred_df, dk.do_predict)
def rl_model_predict(self, dataframe: DataFrame,
dk: FreqaiDataKitchen, model: Any) -> DataFrame:
def rl_model_predict(
self, dataframe: DataFrame, dk: FreqaiDataKitchen, model: Any
) -> DataFrame:
"""
A helper function to make predictions in the Reinforcement learning module.
:param dataframe: DataFrame = the dataframe of features to make the predictions on
@ -275,11 +289,11 @@ class BaseReinforcementLearningModel(IFreqaiModel):
def _predict(window):
observations = dataframe.iloc[window.index]
if self.live and self.rl_config.get('add_state_info', False):
if self.live and self.rl_config.get("add_state_info", False):
market_side, current_profit, trade_duration = self.get_state_info(dk.pair)
observations['current_profit_pct'] = current_profit
observations['position'] = market_side
observations['trade_duration'] = trade_duration
observations["current_profit_pct"] = current_profit
observations["position"] = market_side
observations["trade_duration"] = trade_duration
res, _ = model.predict(observations, deterministic=True)
return res
@ -287,23 +301,31 @@ class BaseReinforcementLearningModel(IFreqaiModel):
return output
def build_ohlc_price_dataframes(self, data_dictionary: dict,
pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame,
DataFrame]:
def build_ohlc_price_dataframes(
self, data_dictionary: dict, pair: str, dk: FreqaiDataKitchen
) -> Tuple[DataFrame, DataFrame]:
"""
Builds the train prices and test prices for the environment.
"""
pair = pair.replace(':', '')
pair = pair.replace(":", "")
train_df = data_dictionary["train_features"]
test_df = data_dictionary["test_features"]
# price data for model training and evaluation
tf = self.config['timeframe']
rename_dict = {'%-raw_open': 'open', '%-raw_low': 'low',
'%-raw_high': ' high', '%-raw_close': 'close'}
rename_dict_old = {f'%-{pair}raw_open_{tf}': 'open', f'%-{pair}raw_low_{tf}': 'low',
f'%-{pair}raw_high_{tf}': ' high', f'%-{pair}raw_close_{tf}': 'close'}
tf = self.config["timeframe"]
rename_dict = {
"%-raw_open": "open",
"%-raw_low": "low",
"%-raw_high": " high",
"%-raw_close": "close",
}
rename_dict_old = {
f"%-{pair}raw_open_{tf}": "open",
f"%-{pair}raw_low_{tf}": "low",
f"%-{pair}raw_high_{tf}": " high",
f"%-{pair}raw_close_{tf}": "close",
}
prices_train = train_df.filter(rename_dict.keys(), axis=1)
prices_train_old = train_df.filter(rename_dict_old.keys(), axis=1)
@ -311,17 +333,21 @@ class BaseReinforcementLearningModel(IFreqaiModel):
if not prices_train_old.empty:
prices_train = prices_train_old
rename_dict = rename_dict_old
logger.warning('Reinforcement learning module didn\'t find the correct raw prices '
'assigned in feature_engineering_standard(). '
'Please assign them with:\n'
'dataframe["%-raw_close"] = dataframe["close"]\n'
'dataframe["%-raw_open"] = dataframe["open"]\n'
'dataframe["%-raw_high"] = dataframe["high"]\n'
'dataframe["%-raw_low"] = dataframe["low"]\n'
'inside `feature_engineering_standard()')
logger.warning(
"Reinforcement learning module didn't find the correct raw prices "
"assigned in feature_engineering_standard(). "
"Please assign them with:\n"
'dataframe["%-raw_close"] = dataframe["close"]\n'
'dataframe["%-raw_open"] = dataframe["open"]\n'
'dataframe["%-raw_high"] = dataframe["high"]\n'
'dataframe["%-raw_low"] = dataframe["low"]\n'
"inside `feature_engineering_standard()"
)
elif prices_train.empty:
raise OperationalException("No prices found, please follow log warning "
"instructions to correct the strategy.")
raise OperationalException(
"No prices found, please follow log warning "
"instructions to correct the strategy."
)
prices_train.rename(columns=rename_dict, inplace=True)
prices_train.reset_index(drop=True)
@ -339,7 +365,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
"""
Given a dataframe, drop the ohlc data
"""
drop_list = ['%-raw_open', '%-raw_low', '%-raw_high', '%-raw_close']
drop_list = ["%-raw_open", "%-raw_low", "%-raw_high", "%-raw_close"]
if self.rl_config["drop_ohlc_from_features"]:
df.drop(drop_list, axis=1, inplace=True)
@ -358,7 +384,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
if exists:
model = self.MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model")
else:
logger.info('No model file on disk to continue learning from.')
logger.info("No model file on disk to continue learning from.")
return model
@ -400,15 +426,18 @@ class BaseReinforcementLearningModel(IFreqaiModel):
return -2
pnl = self.get_unrealized_profit()
factor = 100.
factor = 100.0
# you can use feature values from dataframe
rsi_now = self.raw_features[f"%-rsi-period-10_shift-1_{self.pair}_"
f"{self.config['timeframe']}"].iloc[self._current_tick]
rsi_now = self.raw_features[
f"%-rsi-period-10_shift-1_{self.pair}_" f"{self.config['timeframe']}"
].iloc[self._current_tick]
# reward agent for entering trades
if (action in (Actions.Long_enter.value, Actions.Short_enter.value)
and self._position == Positions.Neutral):
if (
action in (Actions.Long_enter.value, Actions.Short_enter.value)
and self._position == Positions.Neutral
):
if rsi_now < 40:
factor = 40 / rsi_now
else:
@ -419,7 +448,7 @@ class BaseReinforcementLearningModel(IFreqaiModel):
if action == Actions.Neutral.value and self._position == Positions.Neutral:
return -1
max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
max_trade_duration = self.rl_config.get("max_trade_duration_candles", 300)
if self._last_trade_tick:
trade_duration = self._current_tick - self._last_trade_tick
else:
@ -431,28 +460,36 @@ class BaseReinforcementLearningModel(IFreqaiModel):
factor *= 0.5
# discourage sitting in position
if (self._position in (Positions.Short, Positions.Long) and
action == Actions.Neutral.value):
if (
self._position in (Positions.Short, Positions.Long)
and action == Actions.Neutral.value
):
return -1 * trade_duration / max_trade_duration
# close long
if action == Actions.Long_exit.value and self._position == Positions.Long:
if pnl > self.profit_aim * self.rr:
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
return float(pnl * factor)
# close short
if action == Actions.Short_exit.value and self._position == Positions.Short:
if pnl > self.profit_aim * self.rr:
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
return float(pnl * factor)
return 0.
return 0.0
def make_env(MyRLEnv: Type[BaseEnvironment], env_id: str, rank: int,
seed: int, train_df: DataFrame, price: DataFrame,
env_info: Dict[str, Any] = {}) -> Callable:
def make_env(
MyRLEnv: Type[BaseEnvironment],
env_id: str,
rank: int,
seed: int,
train_df: DataFrame,
price: DataFrame,
env_info: Dict[str, Any] = {},
) -> Callable:
"""
Utility function for multiprocessed env.
@ -465,10 +502,9 @@ def make_env(MyRLEnv: Type[BaseEnvironment], env_id: str, rank: int,
"""
def _init() -> gym.Env:
env = MyRLEnv(df=train_df, prices=price, id=env_id, seed=seed + rank,
**env_info)
env = MyRLEnv(df=train_df, prices=price, id=env_id, seed=seed + rank, **env_info)
return env
set_random_seed(seed)
return _init

View File

@ -21,9 +21,7 @@ class BaseClassifierModel(IFreqaiModel):
such as prediction_models/CatboostClassifier.py for guidance.
"""
def train(
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
@ -47,26 +45,28 @@ class BaseClassifierModel(IFreqaiModel):
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
logger.info(f"-------------------- Training on data from {start_date} to "
f"{end_date} --------------------")
logger.info(
f"-------------------- Training on data from {start_date} to "
f"{end_date} --------------------"
)
# split data into train/test data.
dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
dk.fit_labels()
dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
(dd["train_features"],
dd["train_labels"],
dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
dd["train_labels"],
dd["train_weights"])
(dd["train_features"], dd["train_labels"], dd["train_weights"]) = (
dk.feature_pipeline.fit_transform(
dd["train_features"], dd["train_labels"], dd["train_weights"]
)
)
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
(dd["test_features"],
dd["test_labels"],
dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
dd["test_labels"],
dd["test_weights"])
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
(dd["test_features"], dd["test_labels"], dd["test_weights"]) = (
dk.feature_pipeline.transform(
dd["test_features"], dd["test_labels"], dd["test_weights"]
)
)
logger.info(
f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
@ -77,8 +77,10 @@ class BaseClassifierModel(IFreqaiModel):
end_time = time()
logger.info(f"-------------------- Done training {pair} "
f"({end_time - start_time:.2f} secs) --------------------")
logger.info(
f"-------------------- Done training {pair} "
f"({end_time - start_time:.2f} secs) --------------------"
)
return model
@ -102,7 +104,8 @@ class BaseClassifierModel(IFreqaiModel):
dk.data_dictionary["prediction_features"] = filtered_df
dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
dk.data_dictionary["prediction_features"], outlier_check=True)
dk.data_dictionary["prediction_features"], outlier_check=True
)
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
if self.CONV_WIDTH == 1:

View File

@ -59,8 +59,7 @@ class BasePyTorchClassifier(BasePyTorchModel):
class_names = self.model.model_meta_data.get("class_names", None)
if not class_names:
raise ValueError(
"Missing class names. "
"self.model.model_meta_data['class_names'] is None."
"Missing class names. " "self.model.model_meta_data['class_names'] is None."
)
if not self.class_name_to_index:
@ -74,11 +73,11 @@ class BasePyTorchClassifier(BasePyTorchModel):
dk.data_dictionary["prediction_features"] = filtered_df
dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
dk.data_dictionary["prediction_features"], outlier_check=True)
dk.data_dictionary["prediction_features"], outlier_check=True
)
x = self.data_convertor.convert_x(
dk.data_dictionary["prediction_features"],
device=self.device
dk.data_dictionary["prediction_features"], device=self.device
)
self.model.model.eval()
logits = self.model.model(x)
@ -100,10 +99,10 @@ class BasePyTorchClassifier(BasePyTorchModel):
return (pred_df, dk.do_predict)
def encode_class_names(
self,
data_dictionary: Dict[str, pd.DataFrame],
dk: FreqaiDataKitchen,
class_names: List[str],
self,
data_dictionary: Dict[str, pd.DataFrame],
dk: FreqaiDataKitchen,
class_names: List[str],
):
"""
encode class name, str -> int
@ -120,15 +119,12 @@ class BasePyTorchClassifier(BasePyTorchModel):
)
@staticmethod
def assert_valid_class_names(
target_column: pd.Series,
class_names: List[str]
):
def assert_valid_class_names(target_column: pd.Series, class_names: List[str]):
non_defined_labels = set(target_column) - set(class_names)
if len(non_defined_labels) != 0:
raise OperationalException(
f"Found non defined labels: {non_defined_labels}, ",
f"expecting labels: {class_names}"
f"expecting labels: {class_names}",
)
def decode_class_names(self, class_ints: torch.Tensor) -> List[str]:
@ -144,10 +140,10 @@ class BasePyTorchClassifier(BasePyTorchModel):
logger.info(f"encoded class name to index: {self.class_name_to_index}")
def convert_label_column_to_int(
self,
data_dictionary: Dict[str, pd.DataFrame],
dk: FreqaiDataKitchen,
class_names: List[str]
self,
data_dictionary: Dict[str, pd.DataFrame],
dk: FreqaiDataKitchen,
class_names: List[str],
):
self.init_class_names_to_index_mapping(class_names)
self.encode_class_names(data_dictionary, dk, class_names)
@ -162,9 +158,7 @@ class BasePyTorchClassifier(BasePyTorchModel):
return self.class_names
def train(
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
@ -191,18 +185,18 @@ class BasePyTorchClassifier(BasePyTorchModel):
dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
(dd["train_features"],
dd["train_labels"],
dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
dd["train_labels"],
dd["train_weights"])
(dd["train_features"], dd["train_labels"], dd["train_weights"]) = (
dk.feature_pipeline.fit_transform(
dd["train_features"], dd["train_labels"], dd["train_weights"]
)
)
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
(dd["test_features"],
dd["test_labels"],
dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
dd["test_labels"],
dd["test_weights"])
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
(dd["test_features"], dd["test_labels"], dd["test_weights"]) = (
dk.feature_pipeline.transform(
dd["test_features"], dd["test_labels"], dd["test_weights"]
)
)
logger.info(
f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
@ -212,7 +206,9 @@ class BasePyTorchClassifier(BasePyTorchModel):
model = self.fit(dd, dk)
end_time = time()
logger.info(f"-------------------- Done training {pair} "
f"({end_time - start_time:.2f} secs) --------------------")
logger.info(
f"-------------------- Done training {pair} "
f"({end_time - start_time:.2f} secs) --------------------"
)
return model

View File

@ -21,7 +21,7 @@ class BasePyTorchModel(IFreqaiModel, ABC):
super().__init__(config=kwargs["config"])
self.dd.model_type = "pytorch"
self.device = "cuda" if torch.cuda.is_available() else "cpu"
test_size = self.freqai_info.get('data_split_parameters', {}).get('test_size')
test_size = self.freqai_info.get("data_split_parameters", {}).get("test_size")
self.splits = ["train", "test"] if test_size != 0 else ["train"]
self.window_size = self.freqai_info.get("conv_width", 1)

View File

@ -41,11 +41,11 @@ class BasePyTorchRegressor(BasePyTorchModel):
dk.data_dictionary["prediction_features"] = filtered_df
dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
dk.data_dictionary["prediction_features"], outlier_check=True)
dk.data_dictionary["prediction_features"], outlier_check=True
)
x = self.data_convertor.convert_x(
dk.data_dictionary["prediction_features"],
device=self.device
dk.data_dictionary["prediction_features"], device=self.device
)
self.model.model.eval()
y = self.model.model(x)
@ -59,9 +59,7 @@ class BasePyTorchRegressor(BasePyTorchModel):
dk.do_predict = outliers
return (pred_df, dk.do_predict)
def train(
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
@ -91,19 +89,19 @@ class BasePyTorchRegressor(BasePyTorchModel):
dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
(dd["train_features"],
dd["train_labels"],
dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
dd["train_labels"],
dd["train_weights"])
(dd["train_features"], dd["train_labels"], dd["train_weights"]) = (
dk.feature_pipeline.fit_transform(
dd["train_features"], dd["train_labels"], dd["train_weights"]
)
)
dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
(dd["test_features"],
dd["test_labels"],
dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
dd["test_labels"],
dd["test_weights"])
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
(dd["test_features"], dd["test_labels"], dd["test_weights"]) = (
dk.feature_pipeline.transform(
dd["test_features"], dd["test_labels"], dd["test_weights"]
)
)
dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
logger.info(
@ -114,7 +112,9 @@ class BasePyTorchRegressor(BasePyTorchModel):
model = self.fit(dd, dk)
end_time = time()
logger.info(f"-------------------- Done training {pair} "
f"({end_time - start_time:.2f} secs) --------------------")
logger.info(
f"-------------------- Done training {pair} "
f"({end_time - start_time:.2f} secs) --------------------"
)
return model

View File

@ -20,9 +20,7 @@ class BaseRegressionModel(IFreqaiModel):
such as prediction_models/CatboostRegressor.py for guidance.
"""
def train(
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
@ -46,8 +44,10 @@ class BaseRegressionModel(IFreqaiModel):
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
logger.info(f"-------------------- Training on data from {start_date} to "
f"{end_date} --------------------")
logger.info(
f"-------------------- Training on data from {start_date} to "
f"{end_date} --------------------"
)
# split data into train/test data.
dd = dk.make_train_test_datasets(features_filtered, labels_filtered)
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
@ -55,19 +55,19 @@ class BaseRegressionModel(IFreqaiModel):
dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count)
(dd["train_features"],
dd["train_labels"],
dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
dd["train_labels"],
dd["train_weights"])
(dd["train_features"], dd["train_labels"], dd["train_weights"]) = (
dk.feature_pipeline.fit_transform(
dd["train_features"], dd["train_labels"], dd["train_weights"]
)
)
dd["train_labels"], _, _ = dk.label_pipeline.fit_transform(dd["train_labels"])
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
(dd["test_features"],
dd["test_labels"],
dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
dd["test_labels"],
dd["test_weights"])
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
(dd["test_features"], dd["test_labels"], dd["test_weights"]) = (
dk.feature_pipeline.transform(
dd["test_features"], dd["test_labels"], dd["test_weights"]
)
)
dd["test_labels"], _, _ = dk.label_pipeline.transform(dd["test_labels"])
logger.info(
@ -79,8 +79,10 @@ class BaseRegressionModel(IFreqaiModel):
end_time = time()
logger.info(f"-------------------- Done training {pair} "
f"({end_time - start_time:.2f} secs) --------------------")
logger.info(
f"-------------------- Done training {pair} "
f"({end_time - start_time:.2f} secs) --------------------"
)
return model
@ -102,7 +104,8 @@ class BaseRegressionModel(IFreqaiModel):
)
dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
dk.data_dictionary["prediction_features"], outlier_check=True)
dk.data_dictionary["prediction_features"], outlier_check=True
)
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
if self.CONV_WIDTH == 1:

View File

@ -9,7 +9,6 @@ from freqtrade.exceptions import OperationalException
class FreqaiMultiOutputClassifier(MultiOutputClassifier):
def fit(self, X, y, sample_weight=None, fit_params=None):
"""Fit the model to data, separately for each output variable.
Parameters
@ -48,18 +47,14 @@ class FreqaiMultiOutputClassifier(MultiOutputClassifier):
"multi-output regression but has only one."
)
if sample_weight is not None and not has_fit_parameter(
self.estimator, "sample_weight"
):
if sample_weight is not None and not has_fit_parameter(self.estimator, "sample_weight"):
raise ValueError("Underlying estimator does not support sample weights.")
if not fit_params:
fit_params = [None] * y.shape[1]
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_estimator)(
self.estimator, X, y[:, i], sample_weight, **fit_params[i]
)
delayed(_fit_estimator)(self.estimator, X, y[:, i], sample_weight, **fit_params[i])
for i in range(y.shape[1])
)
@ -67,8 +62,9 @@ class FreqaiMultiOutputClassifier(MultiOutputClassifier):
for estimator in self.estimators_:
self.classes_.extend(estimator.classes_)
if len(set(self.classes_)) != len(self.classes_):
raise OperationalException(f"Class labels must be unique across targets: "
f"{self.classes_}")
raise OperationalException(
f"Class labels must be unique across targets: " f"{self.classes_}"
)
if hasattr(self.estimators_[0], "n_features_in_"):
self.n_features_in_ = self.estimators_[0].n_features_in_

View File

@ -4,7 +4,6 @@ from sklearn.utils.validation import has_fit_parameter
class FreqaiMultiOutputRegressor(MultiOutputRegressor):
def fit(self, X, y, sample_weight=None, fit_params=None):
"""Fit the model to data, separately for each output variable.
Parameters
@ -40,18 +39,14 @@ class FreqaiMultiOutputRegressor(MultiOutputRegressor):
"multi-output regression but has only one."
)
if sample_weight is not None and not has_fit_parameter(
self.estimator, "sample_weight"
):
if sample_weight is not None and not has_fit_parameter(self.estimator, "sample_weight"):
raise ValueError("Underlying estimator does not support sample weights.")
if not fit_params:
fit_params = [None] * y.shape[1]
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_estimator)(
self.estimator, X, y[:, i], sample_weight, **fit_params[i]
)
delayed(_fit_estimator)(self.estimator, X, y[:, i], sample_weight, **fit_params[i])
for i in range(y.shape[1])
)

View File

@ -66,7 +66,6 @@ class FreqaiDataDrawer:
"""
def __init__(self, full_path: Path, config: Config):
self.config = config
self.freqai_info = config.get("freqai", {})
# dictionary holding all pair metadata necessary to load in from disk
@ -81,7 +80,8 @@ class FreqaiDataDrawer:
self.full_path = full_path
self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl")
self.historic_predictions_bkp_path = Path(
self.full_path / "historic_predictions.backup.pkl")
self.full_path / "historic_predictions.backup.pkl"
)
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
self.global_metadata_path = Path(self.full_path / "global_metadata.json")
self.metric_tracker_path = Path(self.full_path / "metric_tracker.json")
@ -96,9 +96,12 @@ class FreqaiDataDrawer:
self.metric_tracker_lock = threading.Lock()
self.old_DBSCAN_eps: Dict[str, float] = {}
self.empty_pair_dict: pair_info = {
"model_filename": "", "trained_timestamp": 0,
"data_path": "", "extras": {}}
self.model_type = self.freqai_info.get('model_save_type', 'joblib')
"model_filename": "",
"trained_timestamp": 0,
"data_path": "",
"extras": {},
}
self.model_type = self.freqai_info.get("model_save_type", "joblib")
def update_metric_tracker(self, metric: str, value: float, pair: str) -> None:
"""
@ -109,11 +112,11 @@ class FreqaiDataDrawer:
if pair not in self.metric_tracker:
self.metric_tracker[pair] = {}
if metric not in self.metric_tracker[pair]:
self.metric_tracker[pair][metric] = {'timestamp': [], 'value': []}
self.metric_tracker[pair][metric] = {"timestamp": [], "value": []}
timestamp = int(datetime.now(timezone.utc).timestamp())
self.metric_tracker[pair][metric]['value'].append(value)
self.metric_tracker[pair][metric]['timestamp'].append(timestamp)
self.metric_tracker[pair][metric]["value"].append(value)
self.metric_tracker[pair][metric]["timestamp"].append(timestamp)
def collect_metrics(self, time_spent: float, pair: str):
"""
@ -121,10 +124,10 @@ class FreqaiDataDrawer:
"""
load1, load5, load15 = psutil.getloadavg()
cpus = psutil.cpu_count()
self.update_metric_tracker('train_time', time_spent, pair)
self.update_metric_tracker('cpu_load1min', load1 / cpus, pair)
self.update_metric_tracker('cpu_load5min', load5 / cpus, pair)
self.update_metric_tracker('cpu_load15min', load15 / cpus, pair)
self.update_metric_tracker("train_time", time_spent, pair)
self.update_metric_tracker("cpu_load1min", load1 / cpus, pair)
self.update_metric_tracker("cpu_load5min", load5 / cpus, pair)
self.update_metric_tracker("cpu_load15min", load15 / cpus, pair)
def load_global_metadata_from_disk(self):
"""
@ -155,7 +158,7 @@ class FreqaiDataDrawer:
Tries to load an existing metrics dictionary if the user
wants to collect metrics.
"""
if self.freqai_info.get('write_metrics_to_disk', False):
if self.freqai_info.get("write_metrics_to_disk", False):
exists = self.metric_tracker_path.is_file()
if exists:
with self.metric_tracker_path.open("r") as fp:
@ -181,10 +184,11 @@ class FreqaiDataDrawer:
)
except EOFError:
logger.warning(
'Historical prediction file was corrupted. Trying to load backup file.')
"Historical prediction file was corrupted. Trying to load backup file."
)
with self.historic_predictions_bkp_path.open("rb") as fp:
self.historic_predictions = cloudpickle.load(fp)
logger.warning('FreqAI successfully loaded the backup historical predictions file.')
logger.warning("FreqAI successfully loaded the backup historical predictions file.")
else:
logger.info("Could not find existing historic_predictions, starting from scratch")
@ -206,27 +210,33 @@ class FreqaiDataDrawer:
Save metric tracker of all pair metrics collected.
"""
with self.save_lock:
with self.metric_tracker_path.open('w') as fp:
rapidjson.dump(self.metric_tracker, fp, default=self.np_encoder,
number_mode=rapidjson.NM_NATIVE)
with self.metric_tracker_path.open("w") as fp:
rapidjson.dump(
self.metric_tracker,
fp,
default=self.np_encoder,
number_mode=rapidjson.NM_NATIVE,
)
def save_drawer_to_disk(self) -> None:
"""
Save data drawer full of all pair model metadata in present model folder.
"""
with self.save_lock:
with self.pair_dictionary_path.open('w') as fp:
rapidjson.dump(self.pair_dict, fp, default=self.np_encoder,
number_mode=rapidjson.NM_NATIVE)
with self.pair_dictionary_path.open("w") as fp:
rapidjson.dump(
self.pair_dict, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE
)
def save_global_metadata_to_disk(self, metadata: Dict[str, Any]):
"""
Save global metadata json to disk
"""
with self.save_lock:
with self.global_metadata_path.open('w') as fp:
rapidjson.dump(metadata, fp, default=self.np_encoder,
number_mode=rapidjson.NM_NATIVE)
with self.global_metadata_path.open("w") as fp:
rapidjson.dump(
metadata, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE
)
def np_encoder(self, object):
if isinstance(object, np.generic):
@ -264,9 +274,7 @@ class FreqaiDataDrawer:
return
def set_initial_return_values(
self, pair: str,
pred_df: DataFrame,
dataframe: DataFrame
self, pair: str, pred_df: DataFrame, dataframe: DataFrame
) -> None:
"""
Set the initial return values to the historical predictions dataframe. This avoids needing
@ -285,7 +293,7 @@ class FreqaiDataDrawer:
new_pred["date_pred"] = dataframe["date"]
# set everything to nan except date_pred
columns_to_nan = new_pred.columns.difference(['date_pred', 'date'])
columns_to_nan = new_pred.columns.difference(["date_pred", "date"])
new_pred[columns_to_nan] = None
hist_preds = self.historic_predictions[pair].copy()
@ -296,14 +304,15 @@ class FreqaiDataDrawer:
# find the closest common date between new_pred and historic predictions
# and cut off the new_pred dataframe at that date
common_dates = pd.merge(new_pred, hist_preds,
on="date_pred", how="inner")
common_dates = pd.merge(new_pred, hist_preds, on="date_pred", how="inner")
if len(common_dates.index) > 0:
new_pred = new_pred.iloc[len(common_dates):]
new_pred = new_pred.iloc[len(common_dates) :]
else:
logger.warning("No common dates found between new predictions and historic "
"predictions. You likely left your FreqAI instance offline "
f"for more than {len(dataframe.index)} candles.")
logger.warning(
"No common dates found between new predictions and historic "
"predictions. You likely left your FreqAI instance offline "
f"for more than {len(dataframe.index)} candles."
)
# Pandas warns that its keeping dtypes of non NaN columns...
# yea we know and we already want that behavior. Ignoring.
@ -311,21 +320,22 @@ class FreqaiDataDrawer:
warnings.filterwarnings("ignore", category=FutureWarning)
# reindex new_pred columns to match the historic predictions dataframe
new_pred_reindexed = new_pred.reindex(columns=hist_preds.columns)
df_concat = pd.concat(
[hist_preds, new_pred_reindexed],
ignore_index=True
)
df_concat = pd.concat([hist_preds, new_pred_reindexed], ignore_index=True)
# any missing values will get zeroed out so users can see the exact
# downtime in FreqUI
df_concat = df_concat.fillna(0)
self.historic_predictions[pair] = df_concat
self.model_return_values[pair] = df_concat.tail(
len(dataframe.index)).reset_index(drop=True)
self.model_return_values[pair] = df_concat.tail(len(dataframe.index)).reset_index(drop=True)
def append_model_predictions(self, pair: str, predictions: DataFrame,
do_preds: NDArray[np.int_],
dk: FreqaiDataKitchen, strat_df: DataFrame) -> None:
def append_model_predictions(
self,
pair: str,
predictions: DataFrame,
do_preds: NDArray[np.int_],
dk: FreqaiDataKitchen,
strat_df: DataFrame,
) -> None:
"""
Append model predictions to historic predictions dataframe, then set the
strategy return dataframe to the tail of the historic predictions. The length of
@ -338,15 +348,9 @@ class FreqaiDataDrawer:
index = self.historic_predictions[pair].index[-1:]
columns = self.historic_predictions[pair].columns
zeros_df = pd.DataFrame(
np.zeros((1, len(columns))),
index=index,
columns=columns
)
zeros_df = pd.DataFrame(np.zeros((1, len(columns))), index=index, columns=columns)
self.historic_predictions[pair] = pd.concat(
[self.historic_predictions[pair], zeros_df],
ignore_index=True,
axis=0
[self.historic_predictions[pair], zeros_df], ignore_index=True, axis=0
)
df = self.historic_predictions[pair]
@ -370,8 +374,8 @@ class FreqaiDataDrawer:
df.iloc[-1, DI_values_loc] = dk.DI_values[-1]
# extra values the user added within custom prediction model
if dk.data['extra_returns_per_train']:
rets = dk.data['extra_returns_per_train']
if dk.data["extra_returns_per_train"]:
rets = dk.data["extra_returns_per_train"]
for return_str in rets:
return_loc = df.columns.get_loc(return_str)
df.iloc[-1, return_loc] = rets[return_str]
@ -392,7 +396,8 @@ class FreqaiDataDrawer:
self.model_return_values[pair] = df.tail(len_df).reset_index(drop=True)
def attach_return_values_to_return_dataframe(
self, pair: str, dataframe: DataFrame) -> DataFrame:
self, pair: str, dataframe: DataFrame
) -> DataFrame:
"""
Attach the return values to the strat dataframe
:param dataframe: DataFrame = strategy dataframe
@ -423,15 +428,14 @@ class FreqaiDataDrawer:
if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
dataframe["DI_values"] = 0
if dk.data['extra_returns_per_train']:
rets = dk.data['extra_returns_per_train']
if dk.data["extra_returns_per_train"]:
rets = dk.data["extra_returns_per_train"]
for return_str in rets:
dataframe[return_str] = 0
dk.return_dataframe = dataframe
def purge_old_models(self) -> None:
num_keep = self.freqai_info["purge_old_models"]
if not num_keep:
return
@ -508,10 +512,10 @@ class FreqaiDataDrawer:
save_path = Path(dk.data_path)
# Save the trained model
if self.model_type == 'joblib':
if self.model_type == "joblib":
with (save_path / f"{dk.model_filename}_model.joblib").open("wb") as fp:
cloudpickle.dump(model, fp)
elif self.model_type == 'keras':
elif self.model_type == "keras":
model.save(save_path / f"{dk.model_filename}_model.h5")
elif self.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]:
model.save(save_path / f"{dk.model_filename}_model.zip")
@ -596,16 +600,18 @@ class FreqaiDataDrawer:
# try to access model in memory instead of loading object from disk to save time
if dk.live and coin in self.model_dictionary:
model = self.model_dictionary[coin]
elif self.model_type == 'joblib':
elif self.model_type == "joblib":
with (dk.data_path / f"{dk.model_filename}_model.joblib").open("rb") as fp:
model = cloudpickle.load(fp)
elif 'stable_baselines' in self.model_type or 'sb3_contrib' == self.model_type:
elif "stable_baselines" in self.model_type or "sb3_contrib" == self.model_type:
mod = importlib.import_module(
self.model_type, self.freqai_info['rl_config']['model_type'])
MODELCLASS = getattr(mod, self.freqai_info['rl_config']['model_type'])
self.model_type, self.freqai_info["rl_config"]["model_type"]
)
MODELCLASS = getattr(mod, self.freqai_info["rl_config"]["model_type"])
model = MODELCLASS.load(dk.data_path / f"{dk.model_filename}_model")
elif self.model_type == 'pytorch':
elif self.model_type == "pytorch":
import torch
zip = torch.load(dk.data_path / f"{dk.model_filename}_model.zip")
model = zip["pytrainer"]
model = model.load_from_checkpoint(zip)
@ -639,23 +645,18 @@ class FreqaiDataDrawer:
df_dp = strategy.dp.get_pair_dataframe(pair, tf)
if len(df_dp.index) == 0:
continue
if str(hist_df.iloc[-1]["date"]) == str(
df_dp.iloc[-1:]["date"].iloc[-1]
):
if str(hist_df.iloc[-1]["date"]) == str(df_dp.iloc[-1:]["date"].iloc[-1]):
continue
try:
index = (
df_dp.loc[
df_dp["date"] == hist_df.iloc[-1]["date"]
].index[0]
+ 1
)
index = df_dp.loc[df_dp["date"] == hist_df.iloc[-1]["date"]].index[0] + 1
except IndexError:
if hist_df.iloc[-1]['date'] < df_dp['date'].iloc[0]:
raise OperationalException("In memory historical data is older than "
f"oldest DataProvider candle for {pair} on "
f"timeframe {tf}")
if hist_df.iloc[-1]["date"] < df_dp["date"].iloc[0]:
raise OperationalException(
"In memory historical data is older than "
f"oldest DataProvider candle for {pair} on "
f"timeframe {tf}"
)
else:
index = -1
logger.warning(
@ -677,7 +678,7 @@ class FreqaiDataDrawer:
axis=0,
)
self.current_candle = history_data[dk.pair][self.config['timeframe']].iloc[-1]['date']
self.current_candle = history_data[dk.pair][self.config["timeframe"]].iloc[-1]["date"]
def load_all_pair_histories(self, timerange: TimeRange, dk: FreqaiDataKitchen) -> None:
"""
@ -715,13 +716,12 @@ class FreqaiDataDrawer:
corr_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {}
historic_data = self.historic_data
pairs = self.freqai_info["feature_parameters"].get(
"include_corr_pairlist", []
)
pairs = self.freqai_info["feature_parameters"].get("include_corr_pairlist", [])
for tf in self.freqai_info["feature_parameters"].get("include_timeframes"):
base_dataframes[tf] = dk.slice_dataframe(
timerange, historic_data[pair][tf]).reset_index(drop=True)
timerange, historic_data[pair][tf]
).reset_index(drop=True)
if pairs:
for p in pairs:
if pair in p:
@ -741,8 +741,8 @@ class FreqaiDataDrawer:
"""
if not self.historic_predictions_path.is_file():
raise OperationalException(
'Historic predictions not found. Historic predictions data is required '
'to run backtest with the freqai-backtest-live-models option '
"Historic predictions not found. Historic predictions data is required "
"to run backtest with the freqai-backtest-live-models option "
)
self.load_historic_predictions_from_disk()
@ -758,6 +758,6 @@ class FreqaiDataDrawer:
# add 1 day to string timerange to ensure BT module will load all dataframe data
end_date = end_date + timedelta(days=1)
backtesting_timerange = TimeRange(
'date', 'date', int(start_date.timestamp()), int(end_date.timestamp())
"date", "date", int(start_date.timestamp()), int(end_date.timestamp())
)
return backtesting_timerange

View File

@ -24,7 +24,7 @@ from freqtrade.strategy import merge_informative_pair
from freqtrade.strategy.interface import IStrategy
pd.set_option('future.no_silent_downcasting', True)
pd.set_option("future.no_silent_downcasting", True)
SECONDS_IN_DAY = 86400
SECONDS_IN_HOUR = 3600
@ -98,7 +98,7 @@ class FreqaiDataKitchen:
config["freqai"]["backtest_period_days"],
)
self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {})
self.data["extra_returns_per_train"] = self.freqai_config.get("extra_returns_per_train", {})
if not self.freqai_config.get("data_kitchen_thread_count", 0):
self.thread_count = max(int(psutil.cpu_count() * 2 - 2), 1)
else:
@ -120,8 +120,7 @@ class FreqaiDataKitchen:
"""
self.full_path = self.get_full_models_path(self.config)
self.data_path = Path(
self.full_path
/ f"sub-train-{pair.split('/')[0]}_{trained_timestamp}"
self.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp}"
)
return
@ -138,8 +137,8 @@ class FreqaiDataKitchen:
"""
feat_dict = self.freqai_config["feature_parameters"]
if 'shuffle' not in self.freqai_config['data_split_parameters']:
self.freqai_config["data_split_parameters"].update({'shuffle': False})
if "shuffle" not in self.freqai_config["data_split_parameters"]:
self.freqai_config["data_split_parameters"].update({"shuffle": False})
weights: npt.ArrayLike
if feat_dict.get("weight_factor", 0) > 0:
@ -147,7 +146,7 @@ class FreqaiDataKitchen:
else:
weights = np.ones(len(filtered_dataframe))
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
if self.freqai_config.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
(
train_features,
test_features,
@ -172,26 +171,43 @@ class FreqaiDataKitchen:
if feat_dict["shuffle_after_split"]:
rint1 = random.randint(0, 100)
rint2 = random.randint(0, 100)
train_features = train_features.sample(
frac=1, random_state=rint1).reset_index(drop=True)
train_features = train_features.sample(frac=1, random_state=rint1).reset_index(
drop=True
)
train_labels = train_labels.sample(frac=1, random_state=rint1).reset_index(drop=True)
train_weights = pd.DataFrame(train_weights).sample(
frac=1, random_state=rint1).reset_index(drop=True).to_numpy()[:, 0]
train_weights = (
pd.DataFrame(train_weights)
.sample(frac=1, random_state=rint1)
.reset_index(drop=True)
.to_numpy()[:, 0]
)
test_features = test_features.sample(frac=1, random_state=rint2).reset_index(drop=True)
test_labels = test_labels.sample(frac=1, random_state=rint2).reset_index(drop=True)
test_weights = pd.DataFrame(test_weights).sample(
frac=1, random_state=rint2).reset_index(drop=True).to_numpy()[:, 0]
test_weights = (
pd.DataFrame(test_weights)
.sample(frac=1, random_state=rint2)
.reset_index(drop=True)
.to_numpy()[:, 0]
)
# Simplest way to reverse the order of training and test data:
if self.freqai_config['feature_parameters'].get('reverse_train_test_order', False):
if self.freqai_config["feature_parameters"].get("reverse_train_test_order", False):
return self.build_data_dictionary(
test_features, train_features, test_labels,
train_labels, test_weights, train_weights
)
test_features,
train_features,
test_labels,
train_labels,
test_weights,
train_weights,
)
else:
return self.build_data_dictionary(
train_features, test_features, train_labels,
test_labels, train_weights, test_weights
train_features,
test_features,
train_labels,
test_labels,
train_weights,
test_weights,
)
def filter_features(
@ -224,26 +240,23 @@ class FreqaiDataKitchen:
drop_index = pd.isnull(filtered_df).any(axis=1) # get the rows that have NaNs,
drop_index = drop_index.replace(True, 1).replace(False, 0).infer_objects(copy=False)
if (training_filter):
if training_filter:
# we don't care about total row number (total no. datapoints) in training, we only care
# about removing any row with NaNs
# if labels has multiple columns (user wants to train multiple modelEs), we detect here
labels = unfiltered_df.filter(label_list, axis=1)
drop_index_labels = pd.isnull(labels).any(axis=1)
drop_index_labels = drop_index_labels.replace(
True, 1
).replace(False, 0).infer_objects(copy=False)
dates = unfiltered_df['date']
drop_index_labels = (
drop_index_labels.replace(True, 1).replace(False, 0).infer_objects(copy=False)
)
dates = unfiltered_df["date"]
filtered_df = filtered_df[
(drop_index == 0) & (drop_index_labels == 0)
] # dropping values
labels = labels[
(drop_index == 0) & (drop_index_labels == 0)
] # assuming the labels depend entirely on the dataframe here.
self.train_dates = dates[
(drop_index == 0) & (drop_index_labels == 0)
]
self.train_dates = dates[(drop_index == 0) & (drop_index_labels == 0)]
logger.info(
f"{self.pair}: dropped {len(unfiltered_df) - len(filtered_df)} training points"
f" due to NaNs in populated dataset {len(unfiltered_df)}."
@ -266,7 +279,6 @@ class FreqaiDataKitchen:
self.data["filter_drop_index_training"] = drop_index
else:
# we are backtesting so we need to preserve row number to send back to strategy,
# so now we use do_predict to avoid any prediction based on a NaN
drop_index = pd.isnull(filtered_df).any(axis=1)
@ -295,7 +307,6 @@ class FreqaiDataKitchen:
train_weights: Any,
test_weights: Any,
) -> Dict:
self.data_dictionary = {
"train_features": train_df,
"test_features": test_df,
@ -303,7 +314,7 @@ class FreqaiDataKitchen:
"test_labels": test_labels,
"train_weights": train_weights,
"test_weights": test_weights,
"train_dates": self.train_dates
"train_dates": self.train_dates,
}
return self.data_dictionary
@ -330,9 +341,7 @@ class FreqaiDataKitchen:
full_timerange = TimeRange.parse_timerange(tr)
config_timerange = TimeRange.parse_timerange(self.config["timerange"])
if config_timerange.stopts == 0:
config_timerange.stopts = int(
datetime.now(tz=timezone.utc).timestamp()
)
config_timerange.stopts = int(datetime.now(tz=timezone.utc).timestamp())
timerange_train = copy.deepcopy(full_timerange)
timerange_backtest = copy.deepcopy(full_timerange)
@ -412,9 +421,9 @@ class FreqaiDataKitchen:
weights = np.exp(-np.arange(num_weights) / (wfactor * num_weights))[::-1]
return weights
def get_predictions_to_append(self, predictions: DataFrame,
do_predict: npt.ArrayLike,
dataframe_backtest: DataFrame) -> DataFrame:
def get_predictions_to_append(
self, predictions: DataFrame, do_predict: npt.ArrayLike, dataframe_backtest: DataFrame
) -> DataFrame:
"""
Get backtest prediction from current backtest period
"""
@ -459,18 +468,18 @@ class FreqaiDataKitchen:
Back fill values to before the backtesting range so that the dataframe matches size
when it goes back to the strategy. These rows are not included in the backtest.
"""
to_keep = [col for col in dataframe.columns if
not col.startswith("&") and not col.startswith("%%")]
self.return_dataframe = pd.merge(dataframe[to_keep],
self.full_df, how='left', on='date')
self.return_dataframe[self.full_df.columns] = (
self.return_dataframe[self.full_df.columns].fillna(value=0))
to_keep = [
col for col in dataframe.columns if not col.startswith("&") and not col.startswith("%%")
]
self.return_dataframe = pd.merge(dataframe[to_keep], self.full_df, how="left", on="date")
self.return_dataframe[self.full_df.columns] = self.return_dataframe[
self.full_df.columns
].fillna(value=0)
self.full_df = DataFrame()
return
def create_fulltimerange(self, backtest_tr: str, backtest_period_days: int) -> str:
if not isinstance(backtest_period_days, int):
raise OperationalException("backtest_period_days must be an integer")
@ -484,9 +493,11 @@ class FreqaiDataKitchen:
# it does not. accommodating these kinds of edge cases just to allow open-ended
# timerange is not high enough priority to warrant the effort. It is safer for now
# to simply ask user to add their end date
raise OperationalException("FreqAI backtesting does not allow open ended timeranges. "
"Please indicate the end date of your desired backtesting. "
"timerange.")
raise OperationalException(
"FreqAI backtesting does not allow open ended timeranges. "
"Please indicate the end date of your desired backtesting. "
"timerange."
)
# backtest_timerange.stopts = int(
# datetime.now(tz=timezone.utc).timestamp()
# )
@ -525,7 +536,6 @@ class FreqaiDataKitchen:
def check_if_new_training_required(
self, trained_timestamp: int
) -> Tuple[bool, TimeRange, TimeRange]:
time = datetime.now(tz=timezone.utc).timestamp()
trained_timerange = TimeRange()
data_load_timerange = TimeRange()
@ -541,7 +551,7 @@ class FreqaiDataKitchen:
# We notice that users like to use exotic indicators where
# they do not know the required timeperiod. Here we include a factor
# of safety by multiplying the user considered "max" by 2.
max_period = self.config.get('startup_candle_count', 20) * 2
max_period = self.config.get("startup_candle_count", 20) * 2
additional_seconds = max_period * max_tf_seconds
if trained_timestamp != 0:
@ -578,17 +588,12 @@ class FreqaiDataKitchen:
return retrain, trained_timerange, data_load_timerange
def set_new_model_names(self, pair: str, timestamp_id: int):
coin, _ = pair.split("/")
self.data_path = Path(
self.full_path
/ f"sub-train-{pair.split('/')[0]}_{timestamp_id}"
)
self.data_path = Path(self.full_path / f"sub-train-{pair.split('/')[0]}_{timestamp_id}")
self.model_filename = f"cb_{coin.lower()}_{timestamp_id}"
def set_all_pairs(self) -> None:
self.all_pairs = copy.deepcopy(
self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
)
@ -597,8 +602,7 @@ class FreqaiDataKitchen:
self.all_pairs.append(pair)
def extract_corr_pair_columns_from_populated_indicators(
self,
dataframe: DataFrame
self, dataframe: DataFrame
) -> Dict[str, DataFrame]:
"""
Find the columns of the dataframe corresponding to the corr_pairlist, save them
@ -612,19 +616,20 @@ class FreqaiDataKitchen:
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
for pair in pairs:
pair = pair.replace(':', '') # lightgbm does not like colons
pair_cols = [col for col in dataframe.columns if col.startswith("%")
and f"{pair}_" in col]
pair = pair.replace(":", "") # lightgbm does not like colons
pair_cols = [
col for col in dataframe.columns if col.startswith("%") and f"{pair}_" in col
]
if pair_cols:
pair_cols.insert(0, 'date')
pair_cols.insert(0, "date")
corr_dataframes[pair] = dataframe.filter(pair_cols, axis=1)
return corr_dataframes
def attach_corr_pair_columns(self, dataframe: DataFrame,
corr_dataframes: Dict[str, DataFrame],
current_pair: str) -> DataFrame:
def attach_corr_pair_columns(
self, dataframe: DataFrame, corr_dataframes: Dict[str, DataFrame], current_pair: str
) -> DataFrame:
"""
Attach the existing corr_pair dataframes to the current pair dataframe before training
@ -636,21 +641,23 @@ class FreqaiDataKitchen:
ready for training
"""
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
current_pair = current_pair.replace(':', '')
current_pair = current_pair.replace(":", "")
for pair in pairs:
pair = pair.replace(':', '') # lightgbm does not work with colons
pair = pair.replace(":", "") # lightgbm does not work with colons
if current_pair != pair:
dataframe = dataframe.merge(corr_dataframes[pair], how='left', on='date')
dataframe = dataframe.merge(corr_dataframes[pair], how="left", on="date")
return dataframe
def get_pair_data_for_features(self,
pair: str,
tf: str,
strategy: IStrategy,
corr_dataframes: dict = {},
base_dataframes: dict = {},
is_corr_pairs: bool = False) -> DataFrame:
def get_pair_data_for_features(
self,
pair: str,
tf: str,
strategy: IStrategy,
corr_dataframes: dict = {},
base_dataframes: dict = {},
is_corr_pairs: bool = False,
) -> DataFrame:
"""
Get the data for the pair. If it's not in the dictionary, get it from the data provider
:param pair: str = pair to get data for
@ -678,8 +685,9 @@ class FreqaiDataKitchen:
dataframe = strategy.dp.get_pair_dataframe(pair=pair, timeframe=tf)
return dataframe
def merge_features(self, df_main: DataFrame, df_to_merge: DataFrame,
tf: str, timeframe_inf: str, suffix: str) -> DataFrame:
def merge_features(
self, df_main: DataFrame, df_to_merge: DataFrame, tf: str, timeframe_inf: str, suffix: str
) -> DataFrame:
"""
Merge the features of the dataframe and remove HLCV and date added columns
:param df_main: DataFrame = main dataframe
@ -689,17 +697,30 @@ class FreqaiDataKitchen:
:param suffix: str = suffix to add to the columns of the dataframe to merge
:return: dataframe = merged dataframe
"""
dataframe = merge_informative_pair(df_main, df_to_merge, tf, timeframe_inf=timeframe_inf,
append_timeframe=False, suffix=suffix, ffill=True)
dataframe = merge_informative_pair(
df_main,
df_to_merge,
tf,
timeframe_inf=timeframe_inf,
append_timeframe=False,
suffix=suffix,
ffill=True,
)
skip_columns = [
(f"{s}_{suffix}") for s in ["date", "open", "high", "low", "close", "volume"]
]
dataframe = dataframe.drop(columns=skip_columns)
return dataframe
def populate_features(self, dataframe: DataFrame, pair: str, strategy: IStrategy,
corr_dataframes: dict, base_dataframes: dict,
is_corr_pairs: bool = False) -> DataFrame:
def populate_features(
self,
dataframe: DataFrame,
pair: str,
strategy: IStrategy,
corr_dataframes: dict,
base_dataframes: dict,
is_corr_pairs: bool = False,
) -> DataFrame:
"""
Use the user defined strategy functions for populating features
:param dataframe: DataFrame = dataframe to populate
@ -715,19 +736,22 @@ class FreqaiDataKitchen:
for tf in tfs:
metadata = {"pair": pair, "tf": tf}
informative_df = self.get_pair_data_for_features(
pair, tf, strategy, corr_dataframes, base_dataframes, is_corr_pairs)
pair, tf, strategy, corr_dataframes, base_dataframes, is_corr_pairs
)
informative_copy = informative_df.copy()
logger.debug(f"Populating features for {pair} {tf}")
for t in self.freqai_config["feature_parameters"]["indicator_periods_candles"]:
df_features = strategy.feature_engineering_expand_all(
informative_copy.copy(), t, metadata=metadata)
informative_copy.copy(), t, metadata=metadata
)
suffix = f"{t}"
informative_df = self.merge_features(informative_df, df_features, tf, tf, suffix)
generic_df = strategy.feature_engineering_expand_basic(
informative_copy.copy(), metadata=metadata)
informative_copy.copy(), metadata=metadata
)
suffix = "gen"
informative_df = self.merge_features(informative_df, generic_df, tf, tf, suffix)
@ -740,8 +764,9 @@ class FreqaiDataKitchen:
df_shift = df_shift.add_suffix("_shift-" + str(n))
informative_df = pd.concat((informative_df, df_shift), axis=1)
dataframe = self.merge_features(dataframe.copy(), informative_df,
self.config["timeframe"], tf, f'{pair}_{tf}')
dataframe = self.merge_features(
dataframe.copy(), informative_df, self.config["timeframe"], tf, f"{pair}_{tf}"
)
return dataframe
@ -771,7 +796,8 @@ class FreqaiDataKitchen:
# check if the user is using the deprecated populate_any_indicators function
new_version = inspect.getsource(strategy.populate_any_indicators) == (
inspect.getsource(IStrategy.populate_any_indicators))
inspect.getsource(IStrategy.populate_any_indicators)
)
if not new_version:
raise OperationalException(
@ -782,11 +808,10 @@ class FreqaiDataKitchen:
f"{DOCS_LINK}/strategy_migration/#freqai-strategy \n"
"And the feature_engineering_* documentation: \n"
f"{DOCS_LINK}/freqai-feature-engineering/"
)
)
tfs: List[str] = self.freqai_config["feature_parameters"].get("include_timeframes")
pairs: List[str] = self.freqai_config["feature_parameters"].get(
"include_corr_pairlist", [])
pairs: List[str] = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
for tf in tfs:
if tf not in base_dataframes:
@ -804,9 +829,11 @@ class FreqaiDataKitchen:
dataframe = base_dataframes[self.config["timeframe"]].copy()
corr_pairs: List[str] = self.freqai_config["feature_parameters"].get(
"include_corr_pairlist", [])
dataframe = self.populate_features(dataframe.copy(), pair, strategy,
corr_dataframes, base_dataframes)
"include_corr_pairlist", []
)
dataframe = self.populate_features(
dataframe.copy(), pair, strategy, corr_dataframes, base_dataframes
)
metadata = {"pair": pair}
dataframe = strategy.feature_engineering_standard(dataframe.copy(), metadata=metadata)
# ensure corr pairs are always last
@ -814,8 +841,9 @@ class FreqaiDataKitchen:
if pair == corr_pair:
continue # dont repeat anything from whitelist
if corr_pairs and do_corr_pairs:
dataframe = self.populate_features(dataframe.copy(), corr_pair, strategy,
corr_dataframes, base_dataframes, True)
dataframe = self.populate_features(
dataframe.copy(), corr_pair, strategy, corr_dataframes, base_dataframes, True
)
if self.live:
dataframe = strategy.set_freqai_targets(dataframe.copy(), metadata=metadata)
@ -823,7 +851,7 @@ class FreqaiDataKitchen:
self.get_unique_classes_from_labels(dataframe)
if self.config.get('reduce_df_footprint', False):
if self.config.get("reduce_df_footprint", False):
dataframe = reduce_dataframe_footprint(dataframe)
return dataframe
@ -858,7 +886,6 @@ class FreqaiDataKitchen:
return dataframe[to_keep]
def get_unique_classes_from_labels(self, dataframe: DataFrame) -> None:
# self.find_features(dataframe)
self.find_labels(dataframe)
@ -870,9 +897,7 @@ class FreqaiDataKitchen:
for label in self.unique_classes:
self.unique_class_list += list(self.unique_classes[label])
def save_backtesting_prediction(
self, append_df: DataFrame
) -> None:
def save_backtesting_prediction(self, append_df: DataFrame) -> None:
"""
Save prediction dataframe from backtesting to feather file format
:param append_df: dataframe for backtesting period
@ -883,19 +908,14 @@ class FreqaiDataKitchen:
append_df.to_feather(self.backtesting_results_path)
def get_backtesting_prediction(
self
) -> DataFrame:
def get_backtesting_prediction(self) -> DataFrame:
"""
Get prediction dataframe from feather file format
"""
append_df = pd.read_feather(self.backtesting_results_path)
return append_df
def check_if_backtest_prediction_is_valid(
self,
len_backtest_df: int
) -> bool:
def check_if_backtest_prediction_is_valid(self, len_backtest_df: int) -> bool:
"""
Check if a backtesting prediction already exists and if the predictions
to append have the same size as the backtesting dataframe slice
@ -903,27 +923,29 @@ class FreqaiDataKitchen:
:return:
:boolean: whether the prediction file is valid.
"""
path_to_predictionfile = Path(self.full_path /
self.backtest_predictions_folder /
f"{self.model_filename}_prediction.feather")
path_to_predictionfile = Path(
self.full_path
/ self.backtest_predictions_folder
/ f"{self.model_filename}_prediction.feather"
)
self.backtesting_results_path = path_to_predictionfile
file_exists = path_to_predictionfile.is_file()
if file_exists:
append_df = self.get_backtesting_prediction()
if len(append_df) == len_backtest_df and 'date' in append_df:
if len(append_df) == len_backtest_df and "date" in append_df:
logger.info(f"Found backtesting prediction file at {path_to_predictionfile}")
return True
else:
logger.info("A new backtesting prediction file is required. "
"(Number of predictions is different from dataframe length or "
"old prediction file version).")
logger.info(
"A new backtesting prediction file is required. "
"(Number of predictions is different from dataframe length or "
"old prediction file version)."
)
return False
else:
logger.info(
f"Could not find backtesting prediction file at {path_to_predictionfile}"
)
logger.info(f"Could not find backtesting prediction file at {path_to_predictionfile}")
return False
def get_full_models_path(self, config: Config) -> Path:
@ -932,9 +954,7 @@ class FreqaiDataKitchen:
:param config: Configuration dictionary
"""
freqai_config: Dict[str, Any] = config["freqai"]
return Path(
config["user_data_dir"] / "models" / str(freqai_config.get("identifier"))
)
return Path(config["user_data_dir"] / "models" / str(freqai_config.get("identifier")))
def remove_special_chars_from_feature_names(self, dataframe: pd.DataFrame) -> pd.DataFrame:
"""
@ -943,7 +963,7 @@ class FreqaiDataKitchen:
:return: dataframe with cleaned featrue names
"""
spec_chars = [':']
spec_chars = [":"]
for c in spec_chars:
dataframe.columns = dataframe.columns.str.replace(c, "")
@ -976,12 +996,14 @@ class FreqaiDataKitchen:
"""
Deprecation warning, migration assistance
"""
logger.warning(f"Your custom IFreqaiModel relies on the deprecated"
" data pipeline. Please update your model to use the new data pipeline."
" This can be achieved by following the migration guide at "
f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline "
"We added a basic pipeline for you, but this will be removed "
"in a future version.")
logger.warning(
f"Your custom IFreqaiModel relies on the deprecated"
" data pipeline. Please update your model to use the new data pipeline."
" This can be achieved by following the migration guide at "
f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline "
"We added a basic pipeline for you, but this will be removed "
"in a future version."
)
return data_dictionary
@ -989,12 +1011,14 @@ class FreqaiDataKitchen:
"""
Deprecation warning, migration assistance
"""
logger.warning(f"Your custom IFreqaiModel relies on the deprecated"
" data pipeline. Please update your model to use the new data pipeline."
" This can be achieved by following the migration guide at "
f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline "
"We added a basic pipeline for you, but this will be removed "
"in a future version.")
logger.warning(
f"Your custom IFreqaiModel relies on the deprecated"
" data pipeline. Please update your model to use the new data pipeline."
" This can be achieved by following the migration guide at "
f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline "
"We added a basic pipeline for you, but this will be removed "
"in a future version."
)
pred_df, _, _ = self.label_pipeline.inverse_transform(df)

View File

@ -57,21 +57,22 @@ class IFreqaiModel(ABC):
"""
def __init__(self, config: Config) -> None:
self.config = config
self.assert_config(self.config)
self.freqai_info: Dict[str, Any] = config["freqai"]
self.data_split_parameters: Dict[str, Any] = config.get("freqai", {}).get(
"data_split_parameters", {})
"data_split_parameters", {}
)
self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get(
"model_training_parameters", {})
"model_training_parameters", {}
)
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
self.retrain = False
self.first = True
self.set_full_path()
self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True)
if self.save_backtest_models:
logger.info('Backtesting module configured to save all models.')
logger.info("Backtesting module configured to save all models.")
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config)
# set current candle to arbitrary historical date
@ -85,7 +86,7 @@ class IFreqaiModel(ABC):
self.ft_params["DI_threshold"] = 0
logger.warning("DI threshold is not configured for Keras models yet. Deactivating.")
self.CONV_WIDTH = self.freqai_info.get('conv_width', 1)
self.CONV_WIDTH = self.freqai_info.get("conv_width", 1)
self.class_names: List[str] = [] # used in classification subclasses
self.pair_it = 0
self.pair_it_train = 0
@ -95,8 +96,8 @@ class IFreqaiModel(ABC):
self.train_time: float = 0
self.begin_time: float = 0
self.begin_time_train: float = 0
self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe'])
self.continual_learning = self.freqai_info.get('continual_learning', False)
self.base_tf_seconds = timeframe_to_seconds(self.config["timeframe"])
self.continual_learning = self.freqai_info.get("continual_learning", False)
self.plot_features = self.ft_params.get("plot_feature_importances", 0)
self.corr_dataframes: Dict[str, DataFrame] = {}
# get_corr_dataframes is controlling the caching of corr_dataframes
@ -109,10 +110,10 @@ class IFreqaiModel(ABC):
self.max_system_threads = max(int(psutil.cpu_count() * 2 - 2), 1)
self.can_short = True # overridden in start() with strategy.can_short
self.model: Any = None
if self.ft_params.get('principal_component_analysis', False) and self.continual_learning:
self.ft_params.update({'principal_component_analysis': False})
logger.warning('User tried to use PCA with continual learning. Deactivating PCA.')
self.activate_tensorboard: bool = self.freqai_info.get('activate_tensorboard', True)
if self.ft_params.get("principal_component_analysis", False) and self.continual_learning:
self.ft_params.update({"principal_component_analysis": False})
logger.warning("User tried to use PCA with continual learning. Deactivating PCA.")
self.activate_tensorboard: bool = self.freqai_info.get("activate_tensorboard", True)
record_params(config, self.full_path)
@ -120,10 +121,9 @@ class IFreqaiModel(ABC):
"""
Return an empty state to be pickled in hyperopt
"""
return ({})
return {}
def assert_config(self, config: Config) -> None:
if not config.get("freqai", {}):
raise OperationalException("No freqai parameters found in configuration file.")
@ -144,7 +144,7 @@ class IFreqaiModel(ABC):
self.can_short = strategy.can_short
if self.live:
self.inference_timer('start')
self.inference_timer("start")
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
dk = self.start_live(dataframe, metadata, strategy, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe)
@ -162,13 +162,12 @@ class IFreqaiModel(ABC):
dataframe = dk.remove_features_from_df(dk.return_dataframe)
else:
logger.info("Backtesting using historic predictions (live models)")
dk = self.start_backtesting_from_historic_predictions(
dataframe, metadata, self.dk)
dk = self.start_backtesting_from_historic_predictions(dataframe, metadata, self.dk)
dataframe = dk.return_dataframe
self.clean_up()
if self.live:
self.inference_timer('stop', metadata["pair"])
self.inference_timer("stop", metadata["pair"])
return dataframe
@ -225,7 +224,7 @@ class IFreqaiModel(ABC):
# ensure pair is available in dp
if pair not in strategy.dp.current_whitelist():
self.train_queue.popleft()
logger.warning(f'{pair} not in current whitelist, removing from train queue.')
logger.warning(f"{pair} not in current whitelist, removing from train queue.")
continue
(_, trained_timestamp) = self.dd.get_pair_dict_info(pair)
@ -238,23 +237,25 @@ class IFreqaiModel(ABC):
) = dk.check_if_new_training_required(trained_timestamp)
if retrain:
self.train_timer('start')
self.train_timer("start")
dk.set_paths(pair, new_trained_timerange.stopts)
try:
self.extract_data_and_train_model(
new_trained_timerange, pair, strategy, dk, data_load_timerange
)
except Exception as msg:
logger.exception(f"Training {pair} raised exception {msg.__class__.__name__}. "
f"Message: {msg}, skipping.")
logger.exception(
f"Training {pair} raised exception {msg.__class__.__name__}. "
f"Message: {msg}, skipping."
)
self.train_timer('stop', pair)
self.train_timer("stop", pair)
# only rotate the queue after the first has been trained.
self.train_queue.rotate(-1)
self.dd.save_historic_predictions_to_disk()
if self.freqai_info.get('write_metrics_to_disk', False):
if self.freqai_info.get("write_metrics_to_disk", False):
self.dd.save_metric_tracker_to_disk()
def start_backtesting(
@ -290,8 +291,13 @@ class IFreqaiModel(ABC):
train_it += 1
total_trains = len(dk.backtesting_timeranges)
self.training_timerange = tr_train
len_backtest_df = len(dataframe.loc[(dataframe["date"] >= tr_backtest.startdt) & (
dataframe["date"] < tr_backtest.stopdt), :])
len_backtest_df = len(
dataframe.loc[
(dataframe["date"] >= tr_backtest.startdt)
& (dataframe["date"] < tr_backtest.stopdt),
:,
]
)
if not self.ensure_data_exists(len_backtest_df, tr_backtest, pair):
continue
@ -327,10 +333,12 @@ class IFreqaiModel(ABC):
dataframe_base_train = dataframe.loc[dataframe["date"] < tr_train.stopdt, :]
dataframe_base_train = strategy.set_freqai_targets(
dataframe_base_train, metadata=metadata)
dataframe_base_train, metadata=metadata
)
dataframe_base_backtest = dataframe.loc[dataframe["date"] < tr_backtest.stopdt, :]
dataframe_base_backtest = strategy.set_freqai_targets(
dataframe_base_backtest, metadata=metadata)
dataframe_base_backtest, metadata=metadata
)
tr_train = dk.buffer_timerange(tr_train)
@ -346,25 +354,27 @@ class IFreqaiModel(ABC):
dk.find_labels(dataframe_train)
try:
self.tb_logger = get_tb_logger(self.dd.model_type, dk.data_path,
self.activate_tensorboard)
self.tb_logger = get_tb_logger(
self.dd.model_type, dk.data_path, self.activate_tensorboard
)
self.model = self.train(dataframe_train, pair, dk)
self.tb_logger.close()
except Exception as msg:
logger.warning(
f"Training {pair} raised exception {msg.__class__.__name__}. "
f"Message: {msg}, skipping.", exc_info=True)
f"Message: {msg}, skipping.",
exc_info=True,
)
self.model = None
self.dd.pair_dict[pair]["trained_timestamp"] = int(
tr_train.stopts)
self.dd.pair_dict[pair]["trained_timestamp"] = int(tr_train.stopts)
if self.plot_features and self.model is not None:
plot_feature_importance(self.model, pair, dk, self.plot_features)
if self.save_backtest_models and self.model is not None:
logger.info('Saving backtest model to disk.')
logger.info("Saving backtest model to disk.")
self.dd.save_data(self.model, pair, dk)
else:
logger.info('Saving metadata to disk.')
logger.info("Saving metadata to disk.")
self.dd.save_metadata(dk)
else:
self.model = self.dd.load_data(pair, dk)
@ -394,9 +404,11 @@ class IFreqaiModel(ABC):
"""
if not strategy.process_only_new_candles:
raise OperationalException("You are trying to use a FreqAI strategy with "
"process_only_new_candles = False. This is not supported "
"by FreqAI, and it is therefore aborting.")
raise OperationalException(
"You are trying to use a FreqAI strategy with "
"process_only_new_candles = False. This is not supported "
"by FreqAI, and it is therefore aborting."
)
# get the model metadata associated with the current pair
(_, trained_timestamp) = self.dd.get_pair_dict_info(metadata["pair"])
@ -424,8 +436,10 @@ class IFreqaiModel(ABC):
self.model = self.dd.load_data(metadata["pair"], dk)
dataframe = dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"],
do_corr_pairs=self.get_corr_dataframes
strategy,
prediction_dataframe=dataframe,
pair=metadata["pair"],
do_corr_pairs=self.get_corr_dataframes,
)
if not self.model:
@ -447,7 +461,6 @@ class IFreqaiModel(ABC):
def build_strategy_return_arrays(
self, dataframe: DataFrame, dk: FreqaiDataKitchen, pair: str, trained_timestamp: int
) -> None:
# hold the historical predictions in memory so we are sending back
# correct array to strategy
@ -473,18 +486,16 @@ class IFreqaiModel(ABC):
else:
# remaining predictions are made only on the most recent candles for performance and
# historical accuracy reasons.
pred_df, do_preds = self.predict(dataframe.iloc[-self.CONV_WIDTH:], dk, first=False)
pred_df, do_preds = self.predict(dataframe.iloc[-self.CONV_WIDTH :], dk, first=False)
if self.freqai_info.get('fit_live_predictions_candles', 0) and self.live:
if self.freqai_info.get("fit_live_predictions_candles", 0) and self.live:
self.fit_live_predictions(dk, pair)
self.dd.append_model_predictions(pair, pred_df, do_preds, dk, dataframe)
dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe)
return
def check_if_feature_list_matches_strategy(
self, dk: FreqaiDataKitchen
) -> None:
def check_if_feature_list_matches_strategy(self, dk: FreqaiDataKitchen) -> None:
"""
Ensure user is passing the proper feature set if they are reusing an `identifier` pointing
to a folder holding existing models.
@ -496,7 +507,7 @@ class IFreqaiModel(ABC):
if "training_features_list_raw" in dk.data:
feature_list = dk.data["training_features_list_raw"]
else:
feature_list = dk.data['training_features_list']
feature_list = dk.data["training_features_list"]
if dk.training_features_list != feature_list:
raise OperationalException(
@ -512,38 +523,35 @@ class IFreqaiModel(ABC):
def define_data_pipeline(self, threads=-1) -> Pipeline:
ft_params = self.freqai_info["feature_parameters"]
pipe_steps = [
('const', ds.VarianceThreshold(threshold=0)),
('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
]
("const", ds.VarianceThreshold(threshold=0)),
("scaler", SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))),
]
if ft_params.get("principal_component_analysis", False):
pipe_steps.append(('pca', ds.PCA(n_components=0.999)))
pipe_steps.append(('post-pca-scaler',
SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1)))))
pipe_steps.append(("pca", ds.PCA(n_components=0.999)))
pipe_steps.append(
("post-pca-scaler", SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
)
if ft_params.get("use_SVM_to_remove_outliers", False):
svm_params = ft_params.get(
"svm_params", {"shuffle": False, "nu": 0.01})
pipe_steps.append(('svm', ds.SVMOutlierExtractor(**svm_params)))
svm_params = ft_params.get("svm_params", {"shuffle": False, "nu": 0.01})
pipe_steps.append(("svm", ds.SVMOutlierExtractor(**svm_params)))
di = ft_params.get("DI_threshold", 0)
if di:
pipe_steps.append(('di', ds.DissimilarityIndex(di_threshold=di, n_jobs=threads)))
pipe_steps.append(("di", ds.DissimilarityIndex(di_threshold=di, n_jobs=threads)))
if ft_params.get("use_DBSCAN_to_remove_outliers", False):
pipe_steps.append(('dbscan', ds.DBSCAN(n_jobs=threads)))
pipe_steps.append(("dbscan", ds.DBSCAN(n_jobs=threads)))
sigma = self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0)
sigma = self.freqai_info["feature_parameters"].get("noise_standard_deviation", 0)
if sigma:
pipe_steps.append(('noise', ds.Noise(sigma=sigma)))
pipe_steps.append(("noise", ds.Noise(sigma=sigma)))
return Pipeline(pipe_steps)
def define_label_pipeline(self, threads=-1) -> Pipeline:
label_pipeline = Pipeline([
('scaler', SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))
])
label_pipeline = Pipeline([("scaler", SKLearnWrapper(MinMaxScaler(feature_range=(-1, 1))))])
return label_pipeline
@ -555,7 +563,7 @@ class IFreqaiModel(ABC):
:return:
:boolean: whether the model file exists or not.
"""
if self.dd.model_type == 'joblib':
if self.dd.model_type == "joblib":
file_type = ".joblib"
elif self.dd.model_type in ["stable_baselines3", "sb3_contrib", "pytorch"]:
file_type = ".zip"
@ -572,9 +580,7 @@ class IFreqaiModel(ABC):
"""
Creates and sets the full path for the identifier
"""
self.full_path = Path(
self.config["user_data_dir"] / "models" / f"{self.identifier}"
)
self.full_path = Path(self.config["user_data_dir"] / "models" / f"{self.identifier}")
self.full_path.mkdir(parents=True, exist_ok=True)
def extract_data_and_train_model(
@ -615,8 +621,7 @@ class IFreqaiModel(ABC):
dk.find_features(unfiltered_dataframe)
dk.find_labels(unfiltered_dataframe)
self.tb_logger = get_tb_logger(self.dd.model_type, dk.data_path,
self.activate_tensorboard)
self.tb_logger = get_tb_logger(self.dd.model_type, dk.data_path, self.activate_tensorboard)
model = self.train(unfiltered_dataframe, pair, dk)
self.tb_logger.close()
@ -664,21 +669,21 @@ class IFreqaiModel(ABC):
for label in hist_preds_df.columns:
if hist_preds_df[label].dtype == object:
continue
hist_preds_df[f'{label}_mean'] = 0
hist_preds_df[f'{label}_std'] = 0
hist_preds_df[f"{label}_mean"] = 0
hist_preds_df[f"{label}_std"] = 0
hist_preds_df['do_predict'] = 0
hist_preds_df["do_predict"] = 0
if self.freqai_info['feature_parameters'].get('DI_threshold', 0) > 0:
hist_preds_df['DI_values'] = 0
if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
hist_preds_df["DI_values"] = 0
for return_str in dk.data['extra_returns_per_train']:
hist_preds_df[return_str] = dk.data['extra_returns_per_train'][return_str]
for return_str in dk.data["extra_returns_per_train"]:
hist_preds_df[return_str] = dk.data["extra_returns_per_train"][return_str]
hist_preds_df['high_price'] = strat_df['high']
hist_preds_df['low_price'] = strat_df['low']
hist_preds_df['close_price'] = strat_df['close']
hist_preds_df['date_pred'] = strat_df['date']
hist_preds_df["high_price"] = strat_df["high"]
hist_preds_df["low_price"] = strat_df["low"]
hist_preds_df["close_price"] = strat_df["close"]
hist_preds_df["date_pred"] = strat_df["date"]
def fit_live_predictions(self, dk: FreqaiDataKitchen, pair: str) -> None:
"""
@ -694,52 +699,51 @@ class IFreqaiModel(ABC):
for label in full_labels:
if self.dd.historic_predictions[dk.pair][label].dtype == object:
continue
f = spy.stats.norm.fit(
self.dd.historic_predictions[dk.pair][label].tail(num_candles))
f = spy.stats.norm.fit(self.dd.historic_predictions[dk.pair][label].tail(num_candles))
dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1]
return
def inference_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):
def inference_timer(self, do: Literal["start", "stop"] = "start", pair: str = ""):
"""
Timer designed to track the cumulative time spent in FreqAI for one pass through
the whitelist. This will check if the time spent is more than 1/4 the time
of a single candle, and if so, it will warn the user of degraded performance
"""
if do == 'start':
if do == "start":
self.pair_it += 1
self.begin_time = time.time()
elif do == 'stop':
elif do == "stop":
end = time.time()
time_spent = (end - self.begin_time)
if self.freqai_info.get('write_metrics_to_disk', False):
self.dd.update_metric_tracker('inference_time', time_spent, pair)
time_spent = end - self.begin_time
if self.freqai_info.get("write_metrics_to_disk", False):
self.dd.update_metric_tracker("inference_time", time_spent, pair)
self.inference_time += time_spent
if self.pair_it == self.total_pairs:
logger.info(
f'Total time spent inferencing pairlist {self.inference_time:.2f} seconds')
f"Total time spent inferencing pairlist {self.inference_time:.2f} seconds"
)
self.pair_it = 0
self.inference_time = 0
return
def train_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):
def train_timer(self, do: Literal["start", "stop"] = "start", pair: str = ""):
"""
Timer designed to track the cumulative time spent training the full pairlist in
FreqAI.
"""
if do == 'start':
if do == "start":
self.pair_it_train += 1
self.begin_time_train = time.time()
elif do == 'stop':
elif do == "stop":
end = time.time()
time_spent = (end - self.begin_time_train)
if self.freqai_info.get('write_metrics_to_disk', False):
time_spent = end - self.begin_time_train
if self.freqai_info.get("write_metrics_to_disk", False):
self.dd.collect_metrics(time_spent, pair)
self.train_time += time_spent
if self.pair_it_train == self.total_pairs:
logger.info(
f'Total time spent training pairlist {self.train_time:.2f} seconds')
logger.info(f"Total time spent training pairlist {self.train_time:.2f} seconds")
self.pair_it_train = 0
self.train_time = 0
return
@ -759,14 +763,14 @@ class IFreqaiModel(ABC):
"""
current_pairlist = self.config.get("exchange", {}).get("pair_whitelist")
if not self.dd.pair_dict:
logger.info('Set fresh train queue from whitelist. '
f'Queue: {current_pairlist}')
logger.info("Set fresh train queue from whitelist. " f"Queue: {current_pairlist}")
return deque(current_pairlist)
best_queue = deque()
pair_dict_sorted = sorted(self.dd.pair_dict.items(),
key=lambda k: k[1]['trained_timestamp'])
pair_dict_sorted = sorted(
self.dd.pair_dict.items(), key=lambda k: k[1]["trained_timestamp"]
)
for pair in pair_dict_sorted:
if pair[0] in current_pairlist:
best_queue.append(pair[0])
@ -774,8 +778,9 @@ class IFreqaiModel(ABC):
if pair not in best_queue:
best_queue.appendleft(pair)
logger.info('Set existing queue from trained timestamps. '
f'Best approximation queue: {best_queue}')
logger.info(
"Set existing queue from trained timestamps. " f"Best approximation queue: {best_queue}"
)
return best_queue
def cache_corr_pairlist_dfs(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
@ -790,14 +795,15 @@ class IFreqaiModel(ABC):
if self.get_corr_dataframes:
self.corr_dataframes = dk.extract_corr_pair_columns_from_populated_indicators(dataframe)
if not self.corr_dataframes:
logger.warning("Couldn't cache corr_pair dataframes for improved performance. "
"Consider ensuring that the full coin/stake, e.g. XYZ/USD, "
"is included in the column names when you are creating features "
"in `feature_engineering_*` functions.")
logger.warning(
"Couldn't cache corr_pair dataframes for improved performance. "
"Consider ensuring that the full coin/stake, e.g. XYZ/USD, "
"is included in the column names when you are creating features "
"in `feature_engineering_*` functions."
)
self.get_corr_dataframes = not bool(self.corr_dataframes)
elif self.corr_dataframes:
dataframe = dk.attach_corr_pair_columns(
dataframe, self.corr_dataframes, dk.pair)
dataframe = dk.attach_corr_pair_columns(dataframe, self.corr_dataframes, dk.pair)
return dataframe
@ -813,8 +819,9 @@ class IFreqaiModel(ABC):
self.pair_it = 1
self.current_candle = self.dd.current_candle
def ensure_data_exists(self, len_dataframe_backtest: int,
tr_backtest: TimeRange, pair: str) -> bool:
def ensure_data_exists(
self, len_dataframe_backtest: int, tr_backtest: TimeRange, pair: str
) -> bool:
"""
Check if the dataframe is empty, if not, report useful information to user.
:param len_dataframe_backtest: the len of backtesting dataframe
@ -823,14 +830,17 @@ class IFreqaiModel(ABC):
:return: if the data exists or not
"""
if self.config.get("freqai_backtest_live_models", False) and len_dataframe_backtest == 0:
logger.info(f"No data found for pair {pair} from "
f"from {tr_backtest.start_fmt} to {tr_backtest.stop_fmt}. "
"Probably more than one training within the same candle period.")
logger.info(
f"No data found for pair {pair} from "
f"from {tr_backtest.start_fmt} to {tr_backtest.stop_fmt}. "
"Probably more than one training within the same candle period."
)
return False
return True
def log_backtesting_progress(self, tr_train: TimeRange, pair: str,
train_it: int, total_trains: int):
def log_backtesting_progress(
self, tr_train: TimeRange, pair: str, train_it: int, total_trains: int
):
"""
Log the backtesting progress so user knows how many pairs have been trained and
how many more pairs/trains remain.
@ -857,30 +867,37 @@ class IFreqaiModel(ABC):
fit_live_predictions_candles = self.freqai_info.get("fit_live_predictions_candles", 0)
if fit_live_predictions_candles:
logger.info("Applying fit_live_predictions in backtesting")
label_columns = [col for col in dk.full_df.columns if (
col.startswith("&") and
not (col.startswith("&") and col.endswith("_mean")) and
not (col.startswith("&") and col.endswith("_std")) and
col not in self.dk.data["extra_returns_per_train"])
label_columns = [
col
for col in dk.full_df.columns
if (
col.startswith("&")
and not (col.startswith("&") and col.endswith("_mean"))
and not (col.startswith("&") and col.endswith("_std"))
and col not in self.dk.data["extra_returns_per_train"]
)
]
for index in range(len(dk.full_df)):
if index >= fit_live_predictions_candles:
self.dd.historic_predictions[self.dk.pair] = (
dk.full_df.iloc[index - fit_live_predictions_candles:index])
self.dd.historic_predictions[self.dk.pair] = dk.full_df.iloc[
index - fit_live_predictions_candles : index
]
self.fit_live_predictions(self.dk, self.dk.pair)
for label in label_columns:
if dk.full_df[label].dtype == object:
continue
if "labels_mean" in self.dk.data:
dk.full_df.at[index, f"{label}_mean"] = (
self.dk.data["labels_mean"][label])
dk.full_df.at[index, f"{label}_mean"] = self.dk.data["labels_mean"][
label
]
if "labels_std" in self.dk.data:
dk.full_df.at[index, f"{label}_std"] = self.dk.data["labels_std"][label]
for extra_col in self.dk.data["extra_returns_per_train"]:
dk.full_df.at[index, f"{extra_col}"] = (
self.dk.data["extra_returns_per_train"][extra_col])
dk.full_df.at[index, f"{extra_col}"] = self.dk.data[
"extra_returns_per_train"
][extra_col]
return
@ -897,7 +914,8 @@ class IFreqaiModel(ABC):
if key_name not in self.metadata:
metadata = self.metadata
metadata[key_name] = int(
pd.to_datetime(live_dataframe.tail(1)["date"].values[0]).timestamp())
pd.to_datetime(live_dataframe.tail(1)["date"].values[0]).timestamp()
)
self.update_metadata(metadata)
def start_backtesting_from_historic_predictions(
@ -913,19 +931,20 @@ class IFreqaiModel(ABC):
pair = metadata["pair"]
dk.return_dataframe = dataframe
saved_dataframe = self.dd.historic_predictions[pair]
columns_to_drop = list(set(saved_dataframe.columns).intersection(
dk.return_dataframe.columns))
columns_to_drop = list(
set(saved_dataframe.columns).intersection(dk.return_dataframe.columns)
)
dk.return_dataframe = dk.return_dataframe.drop(columns=list(columns_to_drop))
dk.return_dataframe = pd.merge(
dk.return_dataframe, saved_dataframe, how='left', left_on='date', right_on="date_pred")
dk.return_dataframe, saved_dataframe, how="left", left_on="date", right_on="date_pred"
)
return dk
# Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModel.py for an example.
@abstractmethod
def train(self, unfiltered_df: DataFrame, pair: str,
dk: FreqaiDataKitchen, **kwargs) -> Any:
def train(self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datahandler
for storing, saving, loading, and analyzing the data.
@ -966,23 +985,25 @@ class IFreqaiModel(ABC):
"""
throw deprecation warning if this function is called
"""
logger.warning(f"Your model {self.__class__.__name__} relies on the deprecated"
" data pipeline. Please update your model to use the new data pipeline."
" This can be achieved by following the migration guide at "
f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline")
logger.warning(
f"Your model {self.__class__.__name__} relies on the deprecated"
" data pipeline. Please update your model to use the new data pipeline."
" This can be achieved by following the migration guide at "
f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline"
)
dk.feature_pipeline = self.define_data_pipeline(threads=dk.thread_count)
dd = dk.data_dictionary
(dd["train_features"],
dd["train_labels"],
dd["train_weights"]) = dk.feature_pipeline.fit_transform(dd["train_features"],
dd["train_labels"],
dd["train_weights"])
(dd["train_features"], dd["train_labels"], dd["train_weights"]) = (
dk.feature_pipeline.fit_transform(
dd["train_features"], dd["train_labels"], dd["train_weights"]
)
)
(dd["test_features"],
dd["test_labels"],
dd["test_weights"]) = dk.feature_pipeline.transform(dd["test_features"],
dd["test_labels"],
dd["test_weights"])
(dd["test_features"], dd["test_labels"], dd["test_weights"]) = (
dk.feature_pipeline.transform(
dd["test_features"], dd["test_labels"], dd["test_weights"]
)
)
dk.label_pipeline = self.define_label_pipeline(threads=dk.thread_count)
@ -994,13 +1015,16 @@ class IFreqaiModel(ABC):
"""
throw deprecation warning if this function is called
"""
logger.warning(f"Your model {self.__class__.__name__} relies on the deprecated"
" data pipeline. Please update your model to use the new data pipeline."
" This can be achieved by following the migration guide at "
f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline")
logger.warning(
f"Your model {self.__class__.__name__} relies on the deprecated"
" data pipeline. Please update your model to use the new data pipeline."
" This can be achieved by following the migration guide at "
f"{DOCS_LINK}/strategy_migration/#freqai-new-data-pipeline"
)
dd = dk.data_dictionary
dd["predict_features"], outliers, _ = dk.feature_pipeline.transform(
dd["predict_features"], outlier_check=True)
dd["predict_features"], outlier_check=True
)
if self.freqai_info.get("DI_threshold", 0) > 0:
dk.DI_values = dk.feature_pipeline["di"].di_values
else:

View File

@ -46,14 +46,19 @@ class CatboostClassifier(BaseClassifierModel):
cbr = CatBoostClassifier(
allow_writing_files=True,
loss_function='MultiClass',
loss_function="MultiClass",
train_dir=Path(dk.data_path),
**self.model_training_parameters,
)
init_model = self.get_init_model(dk.pair)
cbr.fit(X=train_data, eval_set=test_data, init_model=init_model,
log_cout=sys.stdout, log_cerr=sys.stderr)
cbr.fit(
X=train_data,
eval_set=test_data,
init_model=init_model,
log_cout=sys.stdout,
log_cerr=sys.stderr,
)
return cbr

View File

@ -33,7 +33,7 @@ class CatboostClassifierMultiTarget(BaseClassifierModel):
cbc = CatBoostClassifier(
allow_writing_files=True,
loss_function='MultiClass',
loss_function="MultiClass",
train_dir=Path(dk.data_path),
**self.model_training_parameters,
)
@ -45,10 +45,10 @@ class CatboostClassifierMultiTarget(BaseClassifierModel):
eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
eval_sets = [None] * data_dictionary['test_labels'].shape[1]
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
eval_sets = [None] * data_dictionary["test_labels"].shape[1]
for i in range(data_dictionary['test_labels'].shape[1]):
for i in range(data_dictionary["test_labels"].shape[1]):
eval_sets[i] = Pool(
data=data_dictionary["test_features"],
label=data_dictionary["test_labels"].iloc[:, i],
@ -64,13 +64,17 @@ class CatboostClassifierMultiTarget(BaseClassifierModel):
fit_params = []
for i in range(len(eval_sets)):
fit_params.append({
'eval_set': eval_sets[i], 'init_model': init_models[i],
'log_cout': sys.stdout, 'log_cerr': sys.stderr,
})
fit_params.append(
{
"eval_set": eval_sets[i],
"init_model": init_models[i],
"log_cout": sys.stdout,
"log_cerr": sys.stderr,
}
)
model = FreqaiMultiOutputClassifier(estimator=cbc)
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
thread_training = self.freqai_info.get("multitarget_parallel_training", False)
if thread_training:
model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)

View File

@ -35,7 +35,7 @@ class CatboostRegressor(BaseRegressionModel):
label=data_dictionary["train_labels"],
weight=data_dictionary["train_weights"],
)
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
test_data = None
else:
test_data = Pool(
@ -52,7 +52,12 @@ class CatboostRegressor(BaseRegressionModel):
**self.model_training_parameters,
)
model.fit(X=train_data, eval_set=test_data, init_model=init_model,
log_cout=sys.stdout, log_cerr=sys.stderr)
model.fit(
X=train_data,
eval_set=test_data,
init_model=init_model,
log_cout=sys.stdout,
log_cerr=sys.stderr,
)
return model

View File

@ -44,10 +44,10 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
eval_sets = [None] * data_dictionary['test_labels'].shape[1]
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
eval_sets = [None] * data_dictionary["test_labels"].shape[1]
for i in range(data_dictionary['test_labels'].shape[1]):
for i in range(data_dictionary["test_labels"].shape[1]):
eval_sets[i] = Pool(
data=data_dictionary["test_features"],
label=data_dictionary["test_labels"].iloc[:, i],
@ -63,13 +63,17 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
fit_params = []
for i in range(len(eval_sets)):
fit_params.append({
'eval_set': eval_sets[i], 'init_model': init_models[i],
'log_cout': sys.stdout, 'log_cerr': sys.stderr,
})
fit_params.append(
{
"eval_set": eval_sets[i],
"init_model": init_models[i],
"log_cout": sys.stdout,
"log_cerr": sys.stderr,
}
)
model = FreqaiMultiOutputRegressor(estimator=cbr)
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
thread_training = self.freqai_info.get("multitarget_parallel_training", False)
if thread_training:
model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)

View File

@ -28,12 +28,16 @@ class LightGBMClassifier(BaseClassifierModel):
:param dk: The datakitchen object for the current coin/model
"""
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None
test_weights = None
else:
eval_set = [(data_dictionary["test_features"].to_numpy(),
data_dictionary["test_labels"].to_numpy()[:, 0])]
eval_set = [
(
data_dictionary["test_features"].to_numpy(),
data_dictionary["test_labels"].to_numpy()[:, 0],
)
]
test_weights = data_dictionary["test_weights"]
X = data_dictionary["train_features"].to_numpy()
y = data_dictionary["train_labels"].to_numpy()[:, 0]
@ -42,7 +46,13 @@ class LightGBMClassifier(BaseClassifierModel):
init_model = self.get_init_model(dk.pair)
model = LGBMClassifier(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
eval_sample_weight=[test_weights], init_model=init_model)
model.fit(
X=X,
y=y,
eval_set=eval_set,
sample_weight=train_weights,
eval_sample_weight=[test_weights],
init_model=init_model,
)
return model

View File

@ -38,13 +38,13 @@ class LightGBMClassifierMultiTarget(BaseClassifierModel):
eval_weights = None
eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
eval_weights = [data_dictionary["test_weights"]]
eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1] # type: ignore
for i in range(data_dictionary['test_labels'].shape[1]):
eval_sets = [(None, None)] * data_dictionary["test_labels"].shape[1] # type: ignore
for i in range(data_dictionary["test_labels"].shape[1]):
eval_sets[i] = ( # type: ignore
data_dictionary["test_features"],
data_dictionary["test_labels"].iloc[:, i]
data_dictionary["test_labels"].iloc[:, i],
)
init_model = self.get_init_model(dk.pair)
@ -56,11 +56,15 @@ class LightGBMClassifierMultiTarget(BaseClassifierModel):
fit_params = []
for i in range(len(eval_sets)):
fit_params.append(
{'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights,
'init_model': init_models[i]})
{
"eval_set": eval_sets[i],
"eval_sample_weight": eval_weights,
"init_model": init_models[i],
}
)
model = FreqaiMultiOutputClassifier(estimator=lgb)
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
thread_training = self.freqai_info.get("multitarget_parallel_training", False)
if thread_training:
model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)

View File

@ -28,7 +28,7 @@ class LightGBMRegressor(BaseRegressionModel):
:param dk: The datakitchen object for the current coin/model
"""
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None
eval_weights = None
else:
@ -42,7 +42,13 @@ class LightGBMRegressor(BaseRegressionModel):
model = LGBMRegressor(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
eval_sample_weight=[eval_weights], init_model=init_model)
model.fit(
X=X,
y=y,
eval_set=eval_set,
sample_weight=train_weights,
eval_sample_weight=[eval_weights],
init_model=init_model,
)
return model

View File

@ -38,14 +38,16 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel):
eval_weights = None
eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
eval_weights = [data_dictionary["test_weights"]]
eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1] # type: ignore
for i in range(data_dictionary['test_labels'].shape[1]):
eval_sets[i] = [( # type: ignore
data_dictionary["test_features"],
data_dictionary["test_labels"].iloc[:, i]
)]
eval_sets = [(None, None)] * data_dictionary["test_labels"].shape[1] # type: ignore
for i in range(data_dictionary["test_labels"].shape[1]):
eval_sets[i] = [
( # type: ignore
data_dictionary["test_features"],
data_dictionary["test_labels"].iloc[:, i],
)
]
init_model = self.get_init_model(dk.pair)
if init_model:
@ -56,11 +58,15 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel):
fit_params = []
for i in range(len(eval_sets)):
fit_params.append(
{'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights,
'init_model': init_models[i]})
{
"eval_set": eval_sets[i],
"eval_sample_weight": eval_weights,
"init_model": init_models[i],
}
)
model = FreqaiMultiOutputRegressor(estimator=lgb)
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
thread_training = self.freqai_info.get("multitarget_parallel_training", False)
if thread_training:
model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)

View File

@ -45,16 +45,15 @@ class PyTorchMLPClassifier(BasePyTorchClassifier):
@property
def data_convertor(self) -> PyTorchDataConvertor:
return DefaultPyTorchDataConvertor(
target_tensor_type=torch.long,
squeeze_target_tensor=True
target_tensor_type=torch.long, squeeze_target_tensor=True
)
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
config = self.freqai_info.get("model_training_parameters", {})
self.learning_rate: float = config.get("learning_rate", 3e-4)
self.model_kwargs: Dict[str, Any] = config.get("model_kwargs", {})
self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs", {})
self.learning_rate: float = config.get("learning_rate", 3e-4)
self.model_kwargs: Dict[str, Any] = config.get("model_kwargs", {})
self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs", {})
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
@ -69,9 +68,7 @@ class PyTorchMLPClassifier(BasePyTorchClassifier):
self.convert_label_column_to_int(data_dictionary, dk, class_names)
n_features = data_dictionary["train_features"].shape[-1]
model = PyTorchMLPModel(
input_dim=n_features,
output_dim=len(class_names),
**self.model_kwargs
input_dim=n_features, output_dim=len(class_names), **self.model_kwargs
)
model.to(self.device)
optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)

View File

@ -50,9 +50,9 @@ class PyTorchMLPRegressor(BasePyTorchRegressor):
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
config = self.freqai_info.get("model_training_parameters", {})
self.learning_rate: float = config.get("learning_rate", 3e-4)
self.model_kwargs: Dict[str, Any] = config.get("model_kwargs", {})
self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs", {})
self.learning_rate: float = config.get("learning_rate", 3e-4)
self.model_kwargs: Dict[str, Any] = config.get("model_kwargs", {})
self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs", {})
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
@ -63,11 +63,7 @@ class PyTorchMLPRegressor(BasePyTorchRegressor):
"""
n_features = data_dictionary["train_features"].shape[-1]
model = PyTorchMLPModel(
input_dim=n_features,
output_dim=1,
**self.model_kwargs
)
model = PyTorchMLPModel(input_dim=n_features, output_dim=1, **self.model_kwargs)
model.to(self.device)
optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
criterion = torch.nn.MSELoss()

View File

@ -59,9 +59,9 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor):
def __init__(self, **kwargs) -> None:
super().__init__(**kwargs)
config = self.freqai_info.get("model_training_parameters", {})
self.learning_rate: float = config.get("learning_rate", 3e-4)
self.model_kwargs: Dict[str, Any] = config.get("model_kwargs", {})
self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs", {})
self.learning_rate: float = config.get("learning_rate", 3e-4)
self.model_kwargs: Dict[str, Any] = config.get("model_kwargs", {})
self.trainer_kwargs: Dict[str, Any] = config.get("trainer_kwargs", {})
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
@ -77,7 +77,7 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor):
input_dim=n_features,
output_dim=n_labels,
time_window=self.window_size,
**self.model_kwargs
**self.model_kwargs,
)
model.to(self.device)
optimizer = torch.optim.AdamW(model.parameters(), lr=self.learning_rate)
@ -116,11 +116,11 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor):
)
dk.data_dictionary["prediction_features"], outliers, _ = dk.feature_pipeline.transform(
dk.data_dictionary["prediction_features"], outlier_check=True)
dk.data_dictionary["prediction_features"], outlier_check=True
)
x = self.data_convertor.convert_x(
dk.data_dictionary["prediction_features"],
device=self.device
dk.data_dictionary["prediction_features"], device=self.device
)
# if user is asking for multiple predictions, slide the window
# along the tensor
@ -131,7 +131,7 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor):
if x.shape[1] > self.window_size:
ws = self.window_size
for i in range(0, x.shape[1] - ws):
xb = x[:, i:i + ws, :].to(self.device)
xb = x[:, i : i + ws, :].to(self.device)
y = self.model.model(xb)
yb = torch.cat((yb, y), dim=1)
else:
@ -148,7 +148,8 @@ class PyTorchTransformerRegressor(BasePyTorchRegressor):
dk.do_predict = outliers
if x.shape[1] > 1:
zeros_df = pd.DataFrame(np.zeros((x.shape[1] - len(pred_df), len(pred_df.columns))),
columns=pred_df.columns)
zeros_df = pd.DataFrame(
np.zeros((x.shape[1] - len(pred_df), len(pred_df.columns))), columns=pred_df.columns
)
pred_df = pd.concat([zeros_df, pred_df], axis=0, ignore_index=True)
return (pred_df, dk.do_predict)

View File

@ -56,27 +56,30 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
train_df = data_dictionary["train_features"]
total_timesteps = self.freqai_info["rl_config"]["train_cycles"] * len(train_df)
policy_kwargs = dict(activation_fn=th.nn.ReLU,
net_arch=self.net_arch)
policy_kwargs = dict(activation_fn=th.nn.ReLU, net_arch=self.net_arch)
if self.activate_tensorboard:
tb_path = Path(dk.full_path / "tensorboard" / dk.pair.split('/')[0])
tb_path = Path(dk.full_path / "tensorboard" / dk.pair.split("/")[0])
else:
tb_path = None
if dk.pair not in self.dd.model_dictionary or not self.continual_learning:
model = self.MODELCLASS(self.policy_type, self.train_env, policy_kwargs=policy_kwargs,
tensorboard_log=tb_path,
**self.freqai_info.get('model_training_parameters', {})
)
model = self.MODELCLASS(
self.policy_type,
self.train_env,
policy_kwargs=policy_kwargs,
tensorboard_log=tb_path,
**self.freqai_info.get("model_training_parameters", {}),
)
else:
logger.info('Continual training activated - starting training from previously '
'trained agent.')
logger.info(
"Continual training activated - starting training from previously " "trained agent."
)
model = self.dd.model_dictionary[dk.pair]
model.set_env(self.train_env)
callbacks: List[Any] = [self.eval_callback, self.tensorboard_callback]
progressbar_callback: Optional[ProgressBarCallback] = None
if self.rl_config.get('progress_bar', False):
if self.rl_config.get("progress_bar", False):
progressbar_callback = ProgressBarCallback()
callbacks.insert(0, progressbar_callback)
@ -90,7 +93,7 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
progressbar_callback.on_training_end()
if Path(dk.data_path / "best_model.zip").is_file():
logger.info('Callback found a best model.')
logger.info("Callback found a best model.")
best_model = self.MODELCLASS.load(dk.data_path / "best_model")
return best_model
@ -127,20 +130,18 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
return -2
pnl = self.get_unrealized_profit()
factor = 100.
factor = 100.0
# reward agent for entering trades
if (action == Actions.Long_enter.value
and self._position == Positions.Neutral):
if action == Actions.Long_enter.value and self._position == Positions.Neutral:
return 25
if (action == Actions.Short_enter.value
and self._position == Positions.Neutral):
if action == Actions.Short_enter.value and self._position == Positions.Neutral:
return 25
# discourage agent from not entering trades
if action == Actions.Neutral.value and self._position == Positions.Neutral:
return -1
max_trade_duration = self.rl_config.get('max_trade_duration_candles', 300)
max_trade_duration = self.rl_config.get("max_trade_duration_candles", 300)
trade_duration = self._current_tick - self._last_trade_tick # type: ignore
if trade_duration <= max_trade_duration:
@ -149,20 +150,22 @@ class ReinforcementLearner(BaseReinforcementLearningModel):
factor *= 0.5
# discourage sitting in position
if (self._position in (Positions.Short, Positions.Long) and
action == Actions.Neutral.value):
if (
self._position in (Positions.Short, Positions.Long)
and action == Actions.Neutral.value
):
return -1 * trade_duration / max_trade_duration
# close long
if action == Actions.Long_exit.value and self._position == Positions.Long:
if pnl > self.profit_aim * self.rr:
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
return float(pnl * factor)
# close short
if action == Actions.Short_exit.value and self._position == Positions.Short:
if pnl > self.profit_aim * self.rr:
factor *= self.rl_config['model_reward_parameters'].get('win_reward_factor', 2)
factor *= self.rl_config["model_reward_parameters"].get("win_reward_factor", 2)
return float(pnl * factor)
return 0.
return 0.0

View File

@ -20,9 +20,13 @@ class ReinforcementLearner_multiproc(ReinforcementLearner):
Demonstration of how to build vectorized environments
"""
def set_train_and_eval_environments(self, data_dictionary: Dict[str, Any],
prices_train: DataFrame, prices_test: DataFrame,
dk: FreqaiDataKitchen):
def set_train_and_eval_environments(
self,
data_dictionary: Dict[str, Any],
prices_train: DataFrame,
prices_test: DataFrame,
dk: FreqaiDataKitchen,
):
"""
User can override this if they are using a custom MyRLEnv
:param data_dictionary: dict = common data dictionary containing train and test
@ -45,22 +49,35 @@ class ReinforcementLearner_multiproc(ReinforcementLearner):
eval_freq = len(train_df) // self.max_threads
env_id = "train_env"
self.train_env = VecMonitor(SubprocVecEnv([make_env(self.MyRLEnv, env_id, i, 1,
train_df, prices_train,
env_info=env_info) for i
in range(self.max_threads)]))
self.train_env = VecMonitor(
SubprocVecEnv(
[
make_env(self.MyRLEnv, env_id, i, 1, train_df, prices_train, env_info=env_info)
for i in range(self.max_threads)
]
)
)
eval_env_id = 'eval_env'
self.eval_env = VecMonitor(SubprocVecEnv([make_env(self.MyRLEnv, eval_env_id, i, 1,
test_df, prices_test,
env_info=env_info) for i
in range(self.max_threads)]))
eval_env_id = "eval_env"
self.eval_env = VecMonitor(
SubprocVecEnv(
[
make_env(
self.MyRLEnv, eval_env_id, i, 1, test_df, prices_test, env_info=env_info
)
for i in range(self.max_threads)
]
)
)
self.eval_callback = MaskableEvalCallback(self.eval_env, deterministic=True,
render=False, eval_freq=eval_freq,
best_model_save_path=str(dk.data_path),
use_masking=(self.model_type == 'MaskablePPO' and
is_masking_supported(self.eval_env)))
self.eval_callback = MaskableEvalCallback(
self.eval_env,
deterministic=True,
render=False,
eval_freq=eval_freq,
best_model_save_path=str(dk.data_path),
use_masking=(self.model_type == "MaskablePPO" and is_masking_supported(self.eval_env)),
)
# TENSORBOARD CALLBACK DOES NOT RECOMMENDED TO USE WITH MULTIPLE ENVS,
# IT WILL RETURN FALSE INFORMATION, NEVERTHELESS NOT THREAD SAFE WITH SB3!!!

View File

@ -35,7 +35,7 @@ class SKLearnRandomForestClassifier(BaseClassifierModel):
X = data_dictionary["train_features"].to_numpy()
y = data_dictionary["train_labels"].to_numpy()[:, 0]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None
else:
test_features = data_dictionary["test_features"].to_numpy()
@ -44,8 +44,10 @@ class SKLearnRandomForestClassifier(BaseClassifierModel):
eval_set = (test_features, test_labels)
if self.freqai_info.get("continual_learning", False):
logger.warning("Continual learning is not supported for "
"SKLearnRandomForestClassifier, ignoring.")
logger.warning(
"Continual learning is not supported for "
"SKLearnRandomForestClassifier, ignoring."
)
train_weights = data_dictionary["train_weights"]
@ -73,10 +75,11 @@ class SKLearnRandomForestClassifier(BaseClassifierModel):
le = LabelEncoder()
label = dk.label_list[0]
labels_before = list(dk.data['labels_std'].keys())
labels_before = list(dk.data["labels_std"].keys())
labels_after = le.fit_transform(labels_before).tolist()
pred_df[label] = le.inverse_transform(pred_df[label])
pred_df = pred_df.rename(
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))})
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))}
)
return (pred_df, dk.do_predict)

View File

@ -41,7 +41,7 @@ class XGBoostClassifier(BaseClassifierModel):
if not is_integer_dtype(y):
y = pd.Series(le.fit_transform(y), dtype="int64")
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None
else:
test_features = data_dictionary["test_features"].to_numpy()
@ -58,8 +58,7 @@ class XGBoostClassifier(BaseClassifierModel):
model = XGBClassifier(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
xgb_model=init_model)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, xgb_model=init_model)
return model
@ -79,10 +78,11 @@ class XGBoostClassifier(BaseClassifierModel):
le = LabelEncoder()
label = dk.label_list[0]
labels_before = list(dk.data['labels_std'].keys())
labels_before = list(dk.data["labels_std"].keys())
labels_after = le.fit_transform(labels_before).tolist()
pred_df[label] = le.inverse_transform(pred_df[label])
pred_df = pred_df.rename(
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))})
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))}
)
return (pred_df, dk.do_predict)

View File

@ -41,7 +41,7 @@ class XGBoostRFClassifier(BaseClassifierModel):
if not is_integer_dtype(y):
y = pd.Series(le.fit_transform(y), dtype="int64")
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None
else:
test_features = data_dictionary["test_features"].to_numpy()
@ -58,8 +58,7 @@ class XGBoostRFClassifier(BaseClassifierModel):
model = XGBRFClassifier(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
xgb_model=init_model)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, xgb_model=init_model)
return model
@ -79,10 +78,11 @@ class XGBoostRFClassifier(BaseClassifierModel):
le = LabelEncoder()
label = dk.label_list[0]
labels_before = list(dk.data['labels_std'].keys())
labels_before = list(dk.data["labels_std"].keys())
labels_after = le.fit_transform(labels_before).tolist()
pred_df[label] = le.inverse_transform(pred_df[label])
pred_df = pred_df.rename(
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))})
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))}
)
return (pred_df, dk.do_predict)

View File

@ -37,7 +37,7 @@ class XGBoostRFRegressor(BaseRegressionModel):
eval_weights = None
else:
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
eval_weights = [data_dictionary['test_weights']]
eval_weights = [data_dictionary["test_weights"]]
sample_weight = data_dictionary["train_weights"]
@ -46,8 +46,14 @@ class XGBoostRFRegressor(BaseRegressionModel):
model = XGBRFRegressor(**self.model_training_parameters)
model.set_params(callbacks=[TBCallback(dk.data_path)])
model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set,
sample_weight_eval_set=eval_weights, xgb_model=xgb_model)
model.fit(
X=X,
y=y,
sample_weight=sample_weight,
eval_set=eval_set,
sample_weight_eval_set=eval_weights,
xgb_model=xgb_model,
)
# set the callbacks to empty so that we can serialize to disk later
model.set_params(callbacks=[])

View File

@ -36,15 +36,8 @@ class XGBoostRegressor(BaseRegressionModel):
eval_set = None
eval_weights = None
else:
eval_set = [
(data_dictionary["test_features"],
data_dictionary["test_labels"]),
(X, y)
]
eval_weights = [
data_dictionary['test_weights'],
data_dictionary['train_weights']
]
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"]), (X, y)]
eval_weights = [data_dictionary["test_weights"], data_dictionary["train_weights"]]
sample_weight = data_dictionary["train_weights"]
@ -53,8 +46,14 @@ class XGBoostRegressor(BaseRegressionModel):
model = XGBRegressor(**self.model_training_parameters)
model.set_params(callbacks=[TBCallback(dk.data_path)])
model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set,
sample_weight_eval_set=eval_weights, xgb_model=xgb_model)
model.fit(
X=X,
y=y,
sample_weight=sample_weight,
eval_set=eval_set,
sample_weight_eval_set=eval_weights,
xgb_model=xgb_model,
)
# set the callbacks to empty so that we can serialize to disk later
model.set_params(callbacks=[])

View File

@ -38,13 +38,15 @@ class XGBoostRegressorMultiTarget(BaseRegressionModel):
eval_weights = None
eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) != 0:
eval_weights = [data_dictionary["test_weights"]]
for i in range(data_dictionary['test_labels'].shape[1]):
eval_sets[i] = [( # type: ignore
data_dictionary["test_features"],
data_dictionary["test_labels"].iloc[:, i]
)]
for i in range(data_dictionary["test_labels"].shape[1]):
eval_sets[i] = [
( # type: ignore
data_dictionary["test_features"],
data_dictionary["test_labels"].iloc[:, i],
)
]
init_model = self.get_init_model(dk.pair)
if init_model:
@ -55,11 +57,15 @@ class XGBoostRegressorMultiTarget(BaseRegressionModel):
fit_params = []
for i in range(len(eval_sets)):
fit_params.append(
{'eval_set': eval_sets[i], 'sample_weight_eval_set': eval_weights,
'xgb_model': init_models[i]})
{
"eval_set": eval_sets[i],
"sample_weight_eval_set": eval_weights,
"xgb_model": init_models[i],
}
)
model = FreqaiMultiOutputRegressor(estimator=xgb)
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
thread_training = self.freqai_info.get("multitarget_parallel_training", False)
if thread_training:
model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)

View File

@ -12,6 +12,7 @@ class TensorboardCallback(BaseCallback):
Custom callback for plotting additional values in tensorboard and
episodic summary reports.
"""
def __init__(self, verbose=1, actions: Type[Enum] = BaseActions):
super().__init__(verbose)
self.model: Any = None
@ -40,10 +41,9 @@ class TensorboardCallback(BaseCallback):
)
def _on_step(self) -> bool:
local_info = self.locals["infos"][0]
if hasattr(self.training_env, 'envs'):
if hasattr(self.training_env, "envs"):
tensorboard_metrics = self.training_env.envs[0].unwrapped.tensorboard_metrics
else:

View File

@ -1,6 +1,7 @@
# ensure users can still use a non-torch freqai version
try:
from freqtrade.freqai.tensorboard.tensorboard import TensorBoardCallback, TensorboardLogger
TBLogger = TensorboardLogger
TBCallback = TensorBoardCallback
except ModuleNotFoundError:
@ -8,10 +9,8 @@ except ModuleNotFoundError:
BaseTensorBoardCallback,
BaseTensorboardLogger,
)
TBLogger = BaseTensorboardLogger # type: ignore
TBCallback = BaseTensorBoardCallback # type: ignore
__all__ = (
"TBLogger",
"TBCallback"
)
__all__ = ("TBLogger", "TBCallback")

View File

@ -20,13 +20,10 @@ class BaseTensorboardLogger:
class BaseTensorBoardCallback(TrainingCallback):
def __init__(self, logdir: Path, activate: bool = True):
pass
def after_iteration(
self, model, epoch: int, evals_log: TrainingCallback.EvalsLog
) -> bool:
def after_iteration(self, model, epoch: int, evals_log: TrainingCallback.EvalsLog) -> bool:
return False
def after_training(self, model):

View File

@ -31,7 +31,6 @@ class TensorboardLogger(BaseTensorboardLogger):
class TensorBoardCallback(BaseTensorBoardCallback):
def __init__(self, logdir: Path, activate: bool = True):
self.activate = activate
if self.activate:

View File

@ -31,9 +31,9 @@ class DefaultPyTorchDataConvertor(PyTorchDataConvertor):
"""
def __init__(
self,
target_tensor_type: torch.dtype = torch.float32,
squeeze_target_tensor: bool = False,
self,
target_tensor_type: torch.dtype = torch.float32,
squeeze_target_tensor: bool = False,
):
"""
:param target_tensor_type: type of target tensor, for classification use

View File

@ -19,16 +19,16 @@ logger = logging.getLogger(__name__)
class PyTorchModelTrainer(PyTorchTrainerInterface):
def __init__(
self,
model: nn.Module,
optimizer: Optimizer,
criterion: nn.Module,
device: str,
data_convertor: PyTorchDataConvertor,
model_meta_data: Dict[str, Any] = {},
window_size: int = 1,
tb_logger: Any = None,
**kwargs
self,
model: nn.Module,
optimizer: Optimizer,
criterion: nn.Module,
device: str,
data_convertor: PyTorchDataConvertor,
model_meta_data: Dict[str, Any] = {},
window_size: int = 1,
tb_logger: Any = None,
**kwargs,
):
"""
:param model: The PyTorch model to be trained.
@ -101,9 +101,9 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
@torch.no_grad()
def estimate_loss(
self,
data_loader_dictionary: Dict[str, DataLoader],
split: str,
self,
data_loader_dictionary: Dict[str, DataLoader],
split: str,
) -> None:
self.model.eval()
for _, batch_data in enumerate(data_loader_dictionary[split]):
@ -119,9 +119,7 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
self.model.train()
def create_data_loaders_dictionary(
self,
data_dictionary: Dict[str, pd.DataFrame],
splits: List[str]
self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]
) -> Dict[str, DataLoader]:
"""
Converts the input data to PyTorch tensors using a data loader.
@ -168,12 +166,15 @@ class PyTorchModelTrainer(PyTorchTrainerInterface):
user needs to store. e.g. class_names for classification models.
"""
torch.save({
"model_state_dict": self.model.state_dict(),
"optimizer_state_dict": self.optimizer.state_dict(),
"model_meta_data": self.model_meta_data,
"pytrainer": self
}, path)
torch.save(
{
"model_state_dict": self.model.state_dict(),
"optimizer_state_dict": self.optimizer.state_dict(),
"model_meta_data": self.model_meta_data,
"pytrainer": self,
},
path,
)
def load(self, path: Path):
checkpoint = torch.load(path)
@ -198,9 +199,7 @@ class PyTorchTransformerTrainer(PyTorchModelTrainer):
"""
def create_data_loaders_dictionary(
self,
data_dictionary: Dict[str, pd.DataFrame],
splits: List[str]
self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]
) -> Dict[str, DataLoader]:
"""
Converts the input data to PyTorch tensors using a data loader.

View File

@ -8,7 +8,6 @@ from torch import nn
class PyTorchTrainerInterface(ABC):
@abstractmethod
def fit(self, data_dictionary: Dict[str, pd.DataFrame], splits: List[str]) -> None:
"""

View File

@ -19,8 +19,16 @@ class PyTorchTransformerModel(nn.Module):
Lukasz Kaiser, and Illia Polosukhin. 2017.
"""
def __init__(self, input_dim: int = 7, output_dim: int = 7, hidden_dim=1024,
n_layer=2, dropout_percent=0.1, time_window=10, nhead=8):
def __init__(
self,
input_dim: int = 7,
output_dim: int = 7,
hidden_dim=1024,
n_layer=2,
dropout_percent=0.1,
time_window=10,
nhead=8,
):
super().__init__()
self.time_window = time_window
# ensure the input dimension to the transformer is divisible by nhead
@ -34,7 +42,8 @@ class PyTorchTransformerModel(nn.Module):
# Define the encoder block of the Transformer
self.encoder_layer = nn.TransformerEncoderLayer(
d_model=self.dim_val, nhead=nhead, dropout=dropout_percent, batch_first=True)
d_model=self.dim_val, nhead=nhead, dropout=dropout_percent, batch_first=True
)
self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=n_layer)
# the pseudo decoding FC
@ -48,7 +57,7 @@ class PyTorchTransformerModel(nn.Module):
nn.Linear(int(hidden_dim / 2), int(hidden_dim / 4)),
nn.ReLU(),
nn.Dropout(dropout_percent),
nn.Linear(int(hidden_dim / 4), output_dim)
nn.Linear(int(hidden_dim / 4), output_dim),
)
def forward(self, x, mask=None, add_positional_encoding=True):

View File

@ -12,7 +12,7 @@ class WindowDataset(torch.utils.data.Dataset):
def __getitem__(self, index):
idx_rev = len(self.xs) - self.window_size - index - 1
window_x = self.xs[idx_rev:idx_rev + self.window_size, :]
window_x = self.xs[idx_rev : idx_rev + self.window_size, :]
# Beware of indexing, these two window_x and window_y are aimed at the same row!
# this is what happens when you use :
window_y = self.ys[idx_rev + self.window_size - 1, :].unsqueeze(0)

View File

@ -31,11 +31,12 @@ def download_all_data_for_training(dp: DataProvider, config: Config) -> None:
"""
if dp._exchange is None:
raise OperationalException('No exchange object found.')
raise OperationalException("No exchange object found.")
markets = [
p for p in dp._exchange.get_markets(
tradable_only=True, active_only=not config.get('include_inactive')
).keys()
p
for p in dp._exchange.get_markets(
tradable_only=True, active_only=not config.get("include_inactive")
).keys()
]
all_pairs = dynamic_expand_pairlist(config, markets)
@ -73,42 +74,39 @@ def get_required_data_timerange(config: Config) -> TimeRange:
if secs > max_tf_seconds:
max_tf_seconds = secs
startup_candles = config.get('startup_candle_count', 0)
startup_candles = config.get("startup_candle_count", 0)
indicator_periods = config["freqai"]["feature_parameters"]["indicator_periods_candles"]
# factor the max_period as a factor of safety.
max_period = int(max(startup_candles, max(indicator_periods)) * 1.5)
config['startup_candle_count'] = max_period
logger.info(f'FreqAI auto-downloader using {max_period} startup candles.')
config["startup_candle_count"] = max_period
logger.info(f"FreqAI auto-downloader using {max_period} startup candles.")
additional_seconds = max_period * max_tf_seconds
startts = int(
time
- config["freqai"].get("train_period_days", 0) * 86400
- additional_seconds
)
startts = int(time - config["freqai"].get("train_period_days", 0) * 86400 - additional_seconds)
stopts = int(time)
data_load_timerange = TimeRange('date', 'date', startts, stopts)
data_load_timerange = TimeRange("date", "date", startts, stopts)
return data_load_timerange
def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
count_max: int = 25) -> None:
def plot_feature_importance(
model: Any, pair: str, dk: FreqaiDataKitchen, count_max: int = 25
) -> None:
"""
Plot Best and worst features by importance for a single sub-train.
:param model: Any = A model which was `fit` using a common library
such as catboost or lightgbm
:param pair: str = pair e.g. BTC/USD
:param dk: FreqaiDataKitchen = non-persistent data container for current coin/loop
:param count_max: int = the amount of features to be loaded per column
Plot Best and worst features by importance for a single sub-train.
:param model: Any = A model which was `fit` using a common library
such as catboost or lightgbm
:param pair: str = pair e.g. BTC/USD
:param dk: FreqaiDataKitchen = non-persistent data container for current coin/loop
:param count_max: int = the amount of features to be loaded per column
"""
from freqtrade.plot.plotting import go, make_subplots, store_plot_file
# Extract feature importance from model
models = {}
if 'FreqaiMultiOutputRegressor' in str(model.__class__):
if "FreqaiMultiOutputRegressor" in str(model.__class__):
for estimator, label in zip(model.estimators_, dk.label_list):
models[label] = estimator
else:
@ -123,14 +121,16 @@ def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
elif "xgb" in str(mdl.__class__):
feature_importance = mdl.feature_importances_
else:
logger.info('Model type does not support generating feature importances.')
logger.info("Model type does not support generating feature importances.")
return
# Data preparation
fi_df = pd.DataFrame({
"feature_names": np.array(dk.data_dictionary['train_features'].columns),
"feature_importance": np.array(feature_importance)
})
fi_df = pd.DataFrame(
{
"feature_names": np.array(dk.data_dictionary["train_features"].columns),
"feature_importance": np.array(feature_importance),
}
)
fi_df_top = fi_df.nlargest(count_max, "feature_importance")[::-1]
fi_df_worst = fi_df.nsmallest(count_max, "feature_importance")[::-1]
@ -140,14 +140,18 @@ def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
go.Bar(
x=fi_df["feature_importance"],
y=fi_df["feature_names"],
orientation='h', showlegend=False
), row=1, col=col
orientation="h",
showlegend=False,
),
row=1,
col=col,
)
fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.5)
fig = add_feature_trace(fig, fi_df_top, 1)
fig = add_feature_trace(fig, fi_df_worst, 2)
fig.update_layout(title_text=f"Best and worst features by importance {pair}")
label = label.replace('&', '').replace('%', '') # escape two FreqAI specific characters
label = label.replace("&", "").replace("%", "") # escape two FreqAI specific characters
store_plot_file(fig, f"{dk.model_filename}-{label}.html", dk.data_path)
@ -158,12 +162,12 @@ def record_params(config: Dict[str, Any], full_path: Path) -> None:
params_record_path = full_path / "run_params.json"
run_params = {
"freqai": config.get('freqai', {}),
"timeframe": config.get('timeframe'),
"stake_amount": config.get('stake_amount'),
"stake_currency": config.get('stake_currency'),
"max_open_trades": config.get('max_open_trades'),
"pairs": config.get('exchange', {}).get('pair_whitelist')
"freqai": config.get("freqai", {}),
"timeframe": config.get("timeframe"),
"stake_amount": config.get("stake_amount"),
"stake_currency": config.get("stake_currency"),
"max_open_trades": config.get("max_open_trades"),
"pairs": config.get("exchange", {}).get("pair_whitelist"),
}
with params_record_path.open("w") as handle:
@ -172,7 +176,7 @@ def record_params(config: Dict[str, Any], full_path: Path) -> None:
handle,
indent=4,
default=str,
number_mode=rapidjson.NM_NATIVE | rapidjson.NM_NAN
number_mode=rapidjson.NM_NATIVE | rapidjson.NM_NAN,
)
@ -191,10 +195,11 @@ def get_timerange_backtest_live_models(config: Config) -> str:
def get_tb_logger(model_type: str, path: Path, activate: bool) -> Any:
if model_type == "pytorch" and activate:
from freqtrade.freqai.tensorboard import TBLogger
return TBLogger(path, activate)
else:
from freqtrade.freqai.tensorboard.base_tensorboard import BaseTensorboardLogger
return BaseTensorboardLogger(path, activate)