freqtrade_origin/freqtrade/freqai/RL/BaseReinforcementLearningModel.py

import logging
from typing import Any, Dict, Tuple

import numpy as np
import numpy.typing as npt
import pandas as pd
from pandas import DataFrame
from abc import abstractmethod
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.freqai_interface import IFreqaiModel
from freqtrade.freqai.RL.Base3ActionRLEnv import Base3ActionRLEnv, Actions, Positions
from freqtrade.persistence import Trade
import torch.multiprocessing
import torch as th
logger = logging.getLogger(__name__)

torch.multiprocessing.set_sharing_strategy('file_system')


class BaseReinforcementLearningModel(IFreqaiModel):
    """
    User created Reinforcement Learning Model prediction model.
    """

    def __init__(self, **kwargs):
        super().__init__(config=kwargs['config'])
        th.set_num_threads(self.freqai_info.get('data_kitchen_thread_count', 4))
        self.reward_params = self.freqai_info['rl_config']['model_reward_parameters']

    def train(
        self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
    ) -> Any:
        """
        Filter the training data and train a model to it. Train makes heavy use of the datakitchen
        for storing, saving, loading, and analyzing the data.
        :param unfiltered_dataframe: Full dataframe for the current training period
        :param metadata: pair metadata from strategy.
        :returns:
        :model: Trained model which can be used to inference (self.predict)
        """

        logger.info("--------------------Starting training " f"{pair} --------------------")

        # filter the features requested by user in the configuration file and elegantly handle NaNs
        features_filtered, labels_filtered = dk.filter_features(
            unfiltered_dataframe,
            dk.training_features_list,
            dk.label_list,
            training_filter=True,
        )

        data_dictionary: Dict[str, Any] = dk.make_train_test_datasets(
            features_filtered, labels_filtered)
        dk.fit_labels()  # useless for now, but just satiating append methods

        # normalize all data based on train_dataset only
        prices_train, prices_test = self.build_ohlc_price_dataframes(dk.data_dictionary, pair, dk)
        data_dictionary = dk.normalize_data(data_dictionary)

        # optional additional data cleaning/analysis
        self.data_cleaning_train(dk)

        logger.info(
            f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
        )
        logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')

        model = self.fit_rl(data_dictionary, pair, dk, prices_train, prices_test)

        logger.info(f"--------------------done training {pair}--------------------")

        return model

    @abstractmethod
    def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen,
               prices_train: DataFrame, prices_test: DataFrame):
        """
        Agent customizations and abstract Reinforcement Learning customizations
        go in here. Abstract method, so this function must be overridden by
        user class.
        """

        return

    def get_state_info(self, pair):
        open_trades = Trade.get_trades(trade_filter=Trade.is_open.is_(True))
        market_side = 0.5
        current_profit = 0
        for trade in open_trades:
            if trade.pair == pair:
                current_value = trade.open_trade_value
                openrate = trade.open_rate
                if 'long' in trade.enter_tag:
                    market_side = 1
                else:
                    market_side = 0
                current_profit = current_value / openrate - 1

        total_profit = 0
        closed_trades = Trade.get_trades(
            trade_filter=[Trade.is_open.is_(False), Trade.pair == pair])
        for trade in closed_trades:
            total_profit += trade.close_profit

        return market_side, current_profit, total_profit

    def predict(
        self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
    ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
        """
        Filter the prediction features data and predict with it.
        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
        :return:
        :pred_df: dataframe containing the predictions
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
        data (NaNs) or felt uncertain about data (PCA and DI index)
        """

        dk.find_features(unfiltered_dataframe)
        filtered_dataframe, _ = dk.filter_features(
            unfiltered_dataframe, dk.training_features_list, training_filter=False
        )
        filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
        dk.data_dictionary["prediction_features"] = filtered_dataframe

        # optional additional data cleaning/analysis
        self.data_cleaning_predict(dk, filtered_dataframe)

        pred_df = self.rl_model_predict(
            dk.data_dictionary["prediction_features"], dk, self.model)
        pred_df.fillna(0, inplace=True)

        return (pred_df, dk.do_predict)

    def rl_model_predict(self, dataframe: DataFrame,
                         dk: FreqaiDataKitchen, model: Any) -> DataFrame:

        output = pd.DataFrame(np.zeros(len(dataframe)), columns=dk.label_list)

        def _predict(window):
            market_side, current_profit, total_profit = self.get_state_info(dk.pair)
            observations = dataframe.iloc[window.index]
            observations['current_profit'] = current_profit
            observations['position'] = market_side
            res, _ = model.predict(observations, deterministic=True)
            return res

        output = output.rolling(window=self.CONV_WIDTH).apply(_predict)

        return output

    def build_ohlc_price_dataframes(self, data_dictionary: dict,
                                    pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame,
                                                                               DataFrame]:
        """
        Builds the train prices and test prices for the environment.
        """

        coin = pair.split('/')[0]
        train_df = data_dictionary["train_features"]
        test_df = data_dictionary["test_features"]

        # price data for model training and evaluation
        tf = self.config['timeframe']
        ohlc_list = [f'%-{coin}raw_open_{tf}', f'%-{coin}raw_low_{tf}',
                     f'%-{coin}raw_high_{tf}', f'%-{coin}raw_close_{tf}']
        rename_dict = {f'%-{coin}raw_open_{tf}': 'open', f'%-{coin}raw_low_{tf}': 'low',
                       f'%-{coin}raw_high_{tf}': ' high', f'%-{coin}raw_close_{tf}': 'close'}

        prices_train = train_df.filter(ohlc_list, axis=1)
        prices_train.rename(columns=rename_dict, inplace=True)
        prices_train.reset_index(drop=True)

        prices_test = test_df.filter(ohlc_list, axis=1)
        prices_test.rename(columns=rename_dict, inplace=True)
        prices_test.reset_index(drop=True)

        return prices_train, prices_test

    # TODO take care of this appendage. Right now it needs to be called because FreqAI enforces it.
    # But FreqaiRL needs more objects passed to fit() (like DK) and we dont want to go refactor
    # all the other existing fit() functions to include dk argument. For now we instantiate and
    # leave it.
    def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any:
        """
        Most regressors use the same function names and arguments e.g. user
        can drop in LGBMRegressor in place of CatBoostRegressor and all data
        management will be properly handled by Freqai.
        :param data_dictionary: Dict = the dictionary constructed by DataHandler to hold
                                all the training and test data/labels.
        """

        return


class MyRLEnv(Base3ActionRLEnv):

    def step(self, action):
        self._done = False
        self._current_tick += 1

        if self._current_tick == self._end_tick:
            self._done = True

        self.update_portfolio_log_returns(action)

        self._update_profit(action)
        step_reward = self._calculate_reward(action)
        self.total_reward += step_reward

        trade_type = None
        if self.is_tradesignal(action):  # exclude 3 case not trade
            # Update position
            """
            Action: Neutral, position: Long ->  Close Long
            Action: Neutral, position: Short -> Close Short

            Action: Long, position: Neutral -> Open Long
            Action: Long, position: Short -> Close Short and Open Long

            Action: Short, position: Neutral -> Open Short
            Action: Short, position: Long -> Close Long and Open Short
            """

            if action == Actions.Neutral.value:
                self._position = Positions.Neutral
                trade_type = "neutral"
            elif action == Actions.Long.value:
                self._position = Positions.Long
                trade_type = "long"
            elif action == Actions.Short.value:
                self._position = Positions.Short
                trade_type = "short"
            else:
                print("case not defined")

            # Update last trade tick
            self._last_trade_tick = self._current_tick

            if trade_type is not None:
                self.trade_history.append(
                    {'price': self.current_price(), 'index': self._current_tick,
                     'type': trade_type})

        if self._total_profit < 0.2:
            self._done = True

        self._position_history.append(self._position)
        observation = self._get_observation()
        info = dict(
            tick=self._current_tick,
            total_reward=self.total_reward,
            total_profit=self._total_profit,
            position=self._position.value
        )
        self._update_history(info)

        return observation, step_reward, self._done, info
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`import logging`
			`from typing import Any, Dict, Tuple`

			`import numpy as np`
			`import numpy.typing as npt`
			`import pandas as pd`
			`from pandas import DataFrame`
			`from abc import abstractmethod`
			`from freqtrade.freqai.data_kitchen import FreqaiDataKitchen`
			`from freqtrade.freqai.freqai_interface import IFreqaiModel`
make base 3ac and base 5ac environments. TDQN defaults to 3AC. 2022-08-15 10:13:37 +00:00			`from freqtrade.freqai.RL.Base3ActionRLEnv import Base3ActionRLEnv, Actions, Positions`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`from freqtrade.persistence import Trade`
improve price df handling to enable backtesting 2022-08-17 10:51:14 +00:00			`import torch.multiprocessing`
			`import torch as th`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`logger = logging.getLogger(__name__)`
reinforce training with state info, reinforce prediction with state info, restructure config to accommodate all parameters from any user imported model type. Set 5Act to default env on TDQN. Clean example config. 2022-08-18 11:02:47 +00:00
improve price df handling to enable backtesting 2022-08-17 10:51:14 +00:00			`torch.multiprocessing.set_sharing_strategy('file_system')`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00

			`class BaseReinforcementLearningModel(IFreqaiModel):`
			`"""`
			`User created Reinforcement Learning Model prediction model.`
			`"""`

reinforce training with state info, reinforce prediction with state info, restructure config to accommodate all parameters from any user imported model type. Set 5Act to default env on TDQN. Clean example config. 2022-08-18 11:02:47 +00:00			`def __init__(self, **kwargs):`
			`super().__init__(config=kwargs['config'])`
			`th.set_num_threads(self.freqai_info.get('data_kitchen_thread_count', 4))`
			`self.reward_params = self.freqai_info['rl_config']['model_reward_parameters']`

restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`def train(`
			`self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen`
			`) -> Any:`
			`"""`
			`Filter the training data and train a model to it. Train makes heavy use of the datakitchen`
			`for storing, saving, loading, and analyzing the data.`
			`:param unfiltered_dataframe: Full dataframe for the current training period`
			`:param metadata: pair metadata from strategy.`
			`:returns:`
			`:model: Trained model which can be used to inference (self.predict)`
			`"""`

			`logger.info("--------------------Starting training " f"{pair} --------------------")`

			`# filter the features requested by user in the configuration file and elegantly handle NaNs`
			`features_filtered, labels_filtered = dk.filter_features(`
			`unfiltered_dataframe,`
			`dk.training_features_list,`
			`dk.label_list,`
			`training_filter=True,`
			`)`

			`data_dictionary: Dict[str, Any] = dk.make_train_test_datasets(`
			`features_filtered, labels_filtered)`
			`dk.fit_labels() # useless for now, but just satiating append methods`

			`# normalize all data based on train_dataset only`
improve price df handling to enable backtesting 2022-08-17 10:51:14 +00:00			`prices_train, prices_test = self.build_ohlc_price_dataframes(dk.data_dictionary, pair, dk)`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`data_dictionary = dk.normalize_data(data_dictionary)`

			`# optional additional data cleaning/analysis`
			`self.data_cleaning_train(dk)`

			`logger.info(`
			`f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"`
			`)`
			`logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')`

improve price df handling to enable backtesting 2022-08-17 10:51:14 +00:00			`model = self.fit_rl(data_dictionary, pair, dk, prices_train, prices_test)`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00
			`logger.info(f"--------------------done training {pair}--------------------")`

			`return model`

			`@abstractmethod`
improve price df handling to enable backtesting 2022-08-17 10:51:14 +00:00			`def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen,`
			`prices_train: DataFrame, prices_test: DataFrame):`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`"""`
			`Agent customizations and abstract Reinforcement Learning customizations`
			`go in here. Abstract method, so this function must be overridden by`
			`user class.`
			`"""`

			`return`

			`def get_state_info(self, pair):`
			`open_trades = Trade.get_trades(trade_filter=Trade.is_open.is_(True))`
			`market_side = 0.5`
			`current_profit = 0`
			`for trade in open_trades:`
			`if trade.pair == pair:`
			`current_value = trade.open_trade_value`
			`openrate = trade.open_rate`
			`if 'long' in trade.enter_tag:`
			`market_side = 1`
			`else:`
			`market_side = 0`
			`current_profit = current_value / openrate - 1`

			`total_profit = 0`
			`closed_trades = Trade.get_trades(`
			`trade_filter=[Trade.is_open.is_(False), Trade.pair == pair])`
			`for trade in closed_trades:`
			`total_profit += trade.close_profit`

			`return market_side, current_profit, total_profit`

			`def predict(`
			`self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False`
			`) -> Tuple[DataFrame, npt.NDArray[np.int_]]:`
			`"""`
			`Filter the prediction features data and predict with it.`
			`:param: unfiltered_dataframe: Full dataframe for the current backtest period.`
			`:return:`
			`:pred_df: dataframe containing the predictions`
			`:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove`
			`data (NaNs) or felt uncertain about data (PCA and DI index)`
			`"""`

			`dk.find_features(unfiltered_dataframe)`
			`filtered_dataframe, _ = dk.filter_features(`
			`unfiltered_dataframe, dk.training_features_list, training_filter=False`
			`)`
			`filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)`
			`dk.data_dictionary["prediction_features"] = filtered_dataframe`

			`# optional additional data cleaning/analysis`
			`self.data_cleaning_predict(dk, filtered_dataframe)`

reinforce training with state info, reinforce prediction with state info, restructure config to accommodate all parameters from any user imported model type. Set 5Act to default env on TDQN. Clean example config. 2022-08-18 11:02:47 +00:00			`pred_df = self.rl_model_predict(`
			`dk.data_dictionary["prediction_features"], dk, self.model)`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`pred_df.fillna(0, inplace=True)`

			`return (pred_df, dk.do_predict)`

			`def rl_model_predict(self, dataframe: DataFrame,`
			`dk: FreqaiDataKitchen, model: Any) -> DataFrame:`

reinforce training with state info, reinforce prediction with state info, restructure config to accommodate all parameters from any user imported model type. Set 5Act to default env on TDQN. Clean example config. 2022-08-18 11:02:47 +00:00			`output = pd.DataFrame(np.zeros(len(dataframe)), columns=dk.label_list)`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00
			`def _predict(window):`
reinforce training with state info, reinforce prediction with state info, restructure config to accommodate all parameters from any user imported model type. Set 5Act to default env on TDQN. Clean example config. 2022-08-18 11:02:47 +00:00			`market_side, current_profit, total_profit = self.get_state_info(dk.pair)`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`observations = dataframe.iloc[window.index]`
reinforce training with state info, reinforce prediction with state info, restructure config to accommodate all parameters from any user imported model type. Set 5Act to default env on TDQN. Clean example config. 2022-08-18 11:02:47 +00:00			`observations['current_profit'] = current_profit`
			`observations['position'] = market_side`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00			`res, _ = model.predict(observations, deterministic=True)`
			`return res`

			`output = output.rolling(window=self.CONV_WIDTH).apply(_predict)`

			`return output`

improve price df handling to enable backtesting 2022-08-17 10:51:14 +00:00			`def build_ohlc_price_dataframes(self, data_dictionary: dict,`
			`pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame,`
			`DataFrame]:`
			`"""`
			`Builds the train prices and test prices for the environment.`
			`"""`

			`coin = pair.split('/')[0]`
			`train_df = data_dictionary["train_features"]`
			`test_df = data_dictionary["test_features"]`

			`# price data for model training and evaluation`
			`tf = self.config['timeframe']`
			`ohlc_list = [f'%-{coin}raw_open_{tf}', f'%-{coin}raw_low_{tf}',`
			`f'%-{coin}raw_high_{tf}', f'%-{coin}raw_close_{tf}']`
			`rename_dict = {f'%-{coin}raw_open_{tf}': 'open', f'%-{coin}raw_low_{tf}': 'low',`
			`f'%-{coin}raw_high_{tf}': ' high', f'%-{coin}raw_close_{tf}': 'close'}`

			`prices_train = train_df.filter(ohlc_list, axis=1)`
			`prices_train.rename(columns=rename_dict, inplace=True)`
			`prices_train.reset_index(drop=True)`

			`prices_test = test_df.filter(ohlc_list, axis=1)`
			`prices_test.rename(columns=rename_dict, inplace=True)`
			`prices_test.reset_index(drop=True)`

			`return prices_train, prices_test`

set cpu threads in config 2022-08-15 16:01:15 +00:00			`# TODO take care of this appendage. Right now it needs to be called because FreqAI enforces it.`
			`# But FreqaiRL needs more objects passed to fit() (like DK) and we dont want to go refactor`
			`# all the other existing fit() functions to include dk argument. For now we instantiate and`
			`# leave it.`
			`def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any:`
			`"""`
			`Most regressors use the same function names and arguments e.g. user`
			`can drop in LGBMRegressor in place of CatBoostRegressor and all data`
			`management will be properly handled by Freqai.`
			`:param data_dictionary: Dict = the dictionary constructed by DataHandler to hold`
			`all the training and test data/labels.`
			`"""`

			`return`

restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00
make base 3ac and base 5ac environments. TDQN defaults to 3AC. 2022-08-15 10:13:37 +00:00			`class MyRLEnv(Base3ActionRLEnv):`
restructure RL so that user can customize environment 2022-08-15 08:26:44 +00:00
			`def step(self, action):`
			`self._done = False`
			`self._current_tick += 1`

			`if self._current_tick == self._end_tick:`
			`self._done = True`

			`self.update_portfolio_log_returns(action)`

			`self._update_profit(action)`
			`step_reward = self._calculate_reward(action)`
			`self.total_reward += step_reward`

			`trade_type = None`
			`if self.is_tradesignal(action): # exclude 3 case not trade`
			`# Update position`
			`"""`
			`Action: Neutral, position: Long -> Close Long`
			`Action: Neutral, position: Short -> Close Short`

			`Action: Long, position: Neutral -> Open Long`
			`Action: Long, position: Short -> Close Short and Open Long`

			`Action: Short, position: Neutral -> Open Short`
			`Action: Short, position: Long -> Close Long and Open Short`
			`"""`

			`if action == Actions.Neutral.value:`
			`self._position = Positions.Neutral`
			`trade_type = "neutral"`
			`elif action == Actions.Long.value:`
			`self._position = Positions.Long`
			`trade_type = "long"`
			`elif action == Actions.Short.value:`
			`self._position = Positions.Short`
			`trade_type = "short"`
			`else:`
			`print("case not defined")`

			`# Update last trade tick`
			`self._last_trade_tick = self._current_tick`

			`if trade_type is not None:`
			`self.trade_history.append(`
			`{'price': self.current_price(), 'index': self._current_tick,`
			`'type': trade_type})`

			`if self._total_profit < 0.2:`
			`self._done = True`

			`self._position_history.append(self._position)`
			`observation = self._get_observation()`
			`info = dict(`
			`tick=self._current_tick,`
			`total_reward=self.total_reward,`
			`total_profit=self._total_profit,`
			`position=self._position.value`
			`)`
			`self._update_history(info)`

			`return observation, step_reward, self._done, info`