add freqao backend machinery, user interface, documentation

2024-11-10 02:12:01 +00:00 · 2022-05-03 10:14:17 +02:00 · 2022-05-03 10:14:17 +02:00 · fc837c4daa
commit fc837c4daa
parent ebab02fce3
19 changed files with 1405 additions and 3 deletions
--- a/.gitignore
+++ b/.gitignore
@ -7,6 +7,8 @@ logfile.txt
 user_data/*
 !user_data/strategy/sample_strategy.py
 !user_data/notebooks
+!user_data/models
+user_data/models/*
 user_data/notebooks/*
 freqtrade-plot.html
 freqtrade-profit-plot.html
@ -105,3 +107,4 @@ target/
 !config_examples/config_ftx.example.json
 !config_examples/config_full.example.json
 !config_examples/config_kraken.example.json
+!config_examples/config_freqai.example.json
--- a/config_examples/config_freqai.example.json
+++ b/config_examples/config_freqai.example.json
@ -0,0 +1,100 @@
+{
+    "max_open_trades": 1,
+    "stake_currency": "USDT",
+    "stake_amount": 800,
+    "tradable_balance_ratio": 1,
+    "fiat_display_currency": "USD",
+    "dry_run": true,
+    "timeframe": "5m",
+    "dry_run_wallet":1000,
+    "cancel_open_orders_on_exit": true,
+    "unfilledtimeout": {
+        "entry": 10,
+        "exit": 30
+		},
+    "exchange": {
+        "name": "ftx",
+        "key": "",
+        "secret": "",
+        "ccxt_config": {"enableRateLimit": true},
+        "ccxt_async_config": {
+            "enableRateLimit": true,
+            "rateLimit": 200
+        },
+        "pair_whitelist": [
+            "BTC/USDT"
+        ],
+        "pair_blacklist": [
+        ]
+    },
+    "entry_pricing": {
+        "price_side": "same",
+        "use_order_book": true,
+        "order_book_top": 1,
+        "price_last_balance": 0.0,
+        "check_depth_of_market": {
+            "enabled": false,
+            "bids_to_ask_delta": 1
+        }
+    },
+    "exit_pricing": {
+        "price_side": "same",
+        "use_order_book": true,
+        "order_book_top": 1
+    },
+    "pairlists": [
+        {"method": "StaticPairList"}
+    ],
+
+    "freqai": {
+                "btc_pair" : "BTC/USDT",
+                "timeframes" : ["5m","15m","1h"],
+                "full_timerange" : "20210601-20220101",
+                "train_period" : 30,
+                "backtest_period" : 7,
+                "identifier" :  "example",
+                "base_features": [
+                        "rsi",
+                        "close_over_20sma",
+                        "relative_volume",
+                        "bb_width",
+                        "mfi",
+                        "roc",
+                        "pct-change",
+                        "adx",
+                        "macd"
+                ],
+                "corr_pairlist": [
+                        "ETH/USDT",
+                        "LINK/USDT",
+                        "DOT/USDT"
+                ],
+                "training_timerange" : "20211220-20220117",
+
+                "feature_parameters" : {
+                        "period": 12,
+                        "shift": 2,
+                        "drop_features": false,
+                        "DI_threshold": 1,
+                        "weight_factor":  0,
+                        "principal_component_analysis": false,
+                        "remove_outliers": false
+                },
+                "data_split_parameters" : {
+                    "test_size": 0.25,
+                    "random_state": 1
+                },
+                "model_training_parameters" : {
+                    "n_estimators": 2000,
+                    "random_state": 1,
+                    "learning_rate": 0.02,
+                    "task_type": "CPU"
+                }
+        },
+    "bot_name": "",
+    "initial_state": "running",
+    "forcebuy_enable": false,
+    "internals": {
+        "process_throttle_secs": 5
+    }
+}
--- a/docs/freqai.md
+++ b/docs/freqai.md
@ -0,0 +1,265 @@
+# Freqai
+
+!!! Note
+        Freqai is still experimental, and should be used at the user's own discretion.
+
+Freqai is a module designed to automate a variety of tasks associated with
+training a regressor to predict signals based on input features. Among the
+the features includes:
+
+* Easy large feature set construction based on simple user input
+* Sweep model training and backtesting to simulate consistent model retraining through time
+* Smart outlier removal of data points from prediction sets using a Dissimilarity Index.
+* Data dimensionality reduction with Principal Component Analysis
+* Automatic file management for storage of models to be reused during live
+* Smart and safe data standardization
+* Cleaning of NaNs from the data set before training and prediction.
+
+TODO:
+* live is not automated, still some architectural work to be done
+
+## Background and vocabulary
+
+**Features** are the quantities with which a model is trained. $X_i$ represents the
+vector of all features for a single candle. In Freqai, the user
+builds the features from anything they can construct in the strategy.
+
+**Labels** are the target values with which the weights inside a model are trained
+toward. Each set of features is associated with a single label, which is also
+defined within the strategy by the user. These labels look forward into the
+future, and are not available to the model during dryrun/live/backtesting.
+
+**Training** refers to the process of feeding individual feature sets into the
+model with associated labels with the goal of matching input feature sets to
+associated labels.
+
+**Train data** is a subset of the historic data which is fed to the model during
+training to adjust weights. This data directly influences weight connections
+in the model.
+
+**Test data** is a subset of the historic data which is used to evaluate the
+intermediate performance of the model during training. This data does not
+directly influence nodal weights within the model.
+
+## Configuring the bot
+### Example config file
+The user interface is isolated to the typical config file. A typical Freqai
+config setup includes:
+
+```json
+    "freqai": {
+                "timeframes" : ["5m","15m","4h"],
+                "full_timerange" : "20211220-20220220",
+                "train_period" : "month",
+                "backtest_period" : "week",
+                "identifier" :  "unique-id",
+                "base_features": [
+                        "rsi",
+                        "mfi",
+                        "roc",
+                ],
+                "corr_pairlist": [
+                        "ETH/USD",
+                        "LINK/USD",
+                        "BNB/USD"
+                ],
+                "train_params" : {
+                        "period": 24,
+                        "shift": 2,
+                        "drop_features": false,
+                        "DI_threshold": 1,
+                        "weight_factor":  0,
+                },
+                "SPLIT_PARAMS" : {
+                    "test_size": 0.25,
+                    "random_state": 42
+                },
+                "CLASSIFIER_PARAMS" : {
+                    "n_estimators": 100,
+                    "random_state": 42,
+                    "learning_rate": 0.02,
+                    "task_type": "CPU",
+                },
+        },
+
+```
+
+### Building the feature set
+
+Most of these parameters are controlling the feature data set. The `base_features`
+indicates the basic indicators the user wishes to include in the feature set.
+The `timeframes` are the timeframes of each base_feature that the user wishes to
+include in the feature set. In the present case, the user is asking for the
+`5m`, `15m`, and `4h` timeframes of the `rsi`, `mfi`, `roc`, etc. to be included
+in the feature set.
+
+In addition, the user can ask for each of these features to be included from
+informative pairs using the `corr_pairlist`. This means that the present feature
+set will include all the `base_features` on all the `timeframes` for each of
+`ETH/USD`, `LINK/USD`, and `BNB/USD`.
+
+`shift` is another user controlled parameter which indicates the number of previous
+candles to include in the present feature set. In other words, `shift: 2`, tells
+Freqai to include the the past 2 candles for each of the features included
+in the dataset.
+
+In total, the number of features the present user has created is:_
+
+no. `timeframes` * no. `base_features` * no. `corr_pairlist` * no. `shift`_
+3 * 3 * 3 * 2 = 54._
+
+### Deciding the sliding training window and backtesting duration
+
+`full_timerange` lets the user set the full backtesting range to train and
+backtest through. Meanwhile `train_period` is the sliding training window and
+`backtest_period` is the sliding backtesting window. In the present example,
+the user is asking Freqai to train and backtest the range of `20211220-20220220` (`month`).
+The user wishes to backtest each `week` with a newly trained model. This means that
+Freqai will train 8 separate models (because the full range comprises 8 weeks),
+and then backtest the subsequent week associated with each of the 8 training
+data set timerange months. Users can think of this as a "sliding window" which
+emulates Freqai retraining itself once per week in live using the previous
+month of data.
+
+
+## Running Freqai
+### Training and backtesting
+
+The freqai training/backtesting module can be executed with the following command:
+
+```bash
+freqtrade backtesting --strategy FreqaiExampleStrategy --config config_freqai.example.json --freqaimodel ExamplePredictionModel
+```
+
+where the user needs to have a FreqaiExampleStrategy that fits to the requirements outlined
+below. The ExamplePredictionModel is a user built class which lets users design their 
+own training procedures and data analysis. 
+
+### Building a freqai strategy
+
+The Freqai strategy requires the user to include the following lines of code in `populate_ any _indicators()`
+
+```python
+        from freqtrade.freqai.strategy_bridge import CustomModel
+
+        def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+                # the configuration file parameters are stored here
+                self.freqai_info = self.config['freqai']
+
+                # the model is instantiated here
+                self.model = CustomModel(self.config)
+
+                print('Populating indicators...')
+
+                # the following loops are necessary for building the features 
+                # indicated by the user in the configuration file.
+                for tf in self.freqai_info['timeframes']:
+                        dataframe = self.populate_any_indicators(metadata['pair'],
+                                                                dataframe.copy(), tf)
+                        for i in self.freqai_info['corr_pairlist']:
+                        dataframe = self.populate_any_indicators(i,
+                                        dataframe.copy(), tf, coin=i.split("/")[0]+'-')
+
+                # the model will return 4 values, its prediction, an indication of whether or not the prediction 
+                # should be accepted, the target mean/std values from the labels used during each training period.
+                (dataframe['prediction'], dataframe['do_predict'], 
+                        dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata)
+
+                return dataframe
+```
+The user should also include `populate_any_indicators()` from `templates/FreqaiExampleStrategy.py` which builds 
+the feature set with a proper naming convention for the IFreqaiModel to use later.
+
+### Building an IFreqaiModel
+
+Freqai has a base example model in `templates/ExamplePredictionModel.py`, but users can customize and create
+their own prediction models using the `IFreqaiModel` class. Users are encouraged to inherit `train()`, `predict()`, 
+and `make_labels()` to let them customize various aspects of their training procedures.
+
+### Running the model live
+
+After the user has designed a desirable featureset, Freqai can be run in dry/live
+using the typical trade command:
+
+```bash
+freqtrade trade --strategy FreqaiExampleStrategy --config config_freqai.example.json --training_timerange '20211220-20220120'
+```
+
+Where the user has now specified exactly which of the models from the sliding window
+that they wish to run live using `--training_timerange` (typically this would be the most
+recent model trained). As of right now, freqai will
+not automatically retain itself, so the user needs to manually retrain and then
+reload the config file with a new `--training_timerange` in order to update the
+model.
+
+
+## Data anylsis techniques
+### Controlling the model learning process
+
+The user can define model settings for the data split `data_split_parameters` and learning parameters
+`model_training_parameters`. Users are encouraged to visit the Catboost documentation
+for more information on how to select these values. `n_estimators` increases the
+computational effort and the fit to the training data. If a user has a GPU
+installed in their system, they may benefit from changing `task_type` to `GPU`.
+The `weight_factor` allows the user to weight more recent data more strongly
+than past data via an exponential function:
+
+$$ W_i = \exp(\frac{-i}{\alpha*n}) $$
+
+where $W_i$ is the weight of data point $i$ in a total set of $n$ data points._
+
+`drop_features` tells Freqai to train the model on the user defined features,
+followed by a feature importance evaluation where it drops the top and bottom
+performing features (there is evidence to suggest the top features may not be
+helpful in equity/crypto trading since the ultimate objective is to predict low
+frequency patterns, source: numerai)._
+
+Finally, `period` defines the offset used for the `labels`. In the present example,
+the user is asking for `labels` that are 24 candles in the future.
+
+### Removing outliers with the Dissimilarity Index
+
+The Dissimilarity Index (DI) aims to quantiy the uncertainty associated with each
+prediction by the model. To do so, Freqai measures the distance between each training
+data point and all other training data points:
+
+$$ d_{ab} = \sqrt{\sum_{j=1}^p(X_{a,j}-X_{b,j})^2} $$
+
+where $d_{ab}$ is the distance between the standardized points $a$ and $b$. $p$
+is the number of features i.e. the length of the vector $X$. The
+characteristic distance, $\overline{d}$ for a set of training data points is simply the mean
+of the average distances:
+
+$$ \overline{d} = \sum_{a=1}^n(\sum_{b=1}^n(d_{ab}/n)/n) $$
+
+$\overline{d}$ quantifies the spread of the training data, which is compared to
+the distance between the new prediction feature vectors, $X_k$ and all the training
+data:
+
+$$ d_k = \argmin_i d_{k,i} $$
+
+which enables the estimation of a Dissimilarity Index:
+
+$$ DI_k = d_k/\overline{d} $$
+
+Equity and crypto markets suffer from a high level of non-patterned noise in the
+form of outlier data points. The dissimilarity index allows predictions which
+are outliers and not existent in the model feature space, to be thrown out due
+to low levels of certainty. The user can tweak the DI with `DI_threshold` to increase
+or decrease the extrapolation of the trained model.
+
+### Reducing data dimensionality with Principal Component Analysis
+
+TO BE WRITTEN
+
+## Additional information
+### Feature standardization
+
+The feature set created by the user is automatically standardized to the training
+data only. This includes all test data and unseen prediction data (dry/live/backtest).
+
+### File structure
+
+`user_data_dir/models/` contains all the data associated with the trainings and
+backtestings. This file structure is heavily controlled and read by the `DataHandler()`
+and should thus not be modified. 
--- a/freqtrade/commands/init.py
+++ b/freqtrade/commands/init.py
@ -19,6 +19,7 @@ from freqtrade.commands.list_commands import (start_list_exchanges, start_list_m
                                              start_show_trades)
 from freqtrade.commands.optimize_commands import (start_backtesting, start_backtesting_show,
                                                  start_edge, start_hyperopt)
+from freqtrade.commands.freqai_commands import (start_training)
 from freqtrade.commands.pairlist_commands import start_test_pairlist
 from freqtrade.commands.plot_commands import start_plot_dataframe, start_plot_profit
 from freqtrade.commands.trade_commands import start_trading
--- a/freqtrade/commands/arguments.py
+++ b/freqtrade/commands/arguments.py
@ -12,7 +12,7 @@ from freqtrade.constants import DEFAULT_CONFIG

 ARGS_COMMON = ["verbosity", "logfile", "version", "config", "datadir", "user_data_dir"]

-ARGS_STRATEGY = ["strategy", "strategy_path", "recursive_strategy_search"]
+ARGS_STRATEGY = ["strategy", "strategy_path", "recursive_strategy_search", "freqaimodel", "freqaimodel_path"]

 ARGS_TRADE = ["db_url", "sd_notify", "dry_run", "dry_run_wallet", "fee"]

@ -190,7 +190,8 @@ class Arguments:
                                        start_list_markets, start_list_strategies,
                                        start_list_timeframes, start_new_config, start_new_strategy,
                                        start_plot_dataframe, start_plot_profit, start_show_trades,
-                                        start_test_pairlist, start_trading, start_webserver)
+                                        start_test_pairlist, start_trading, start_webserver,
+                                        start_training)

        subparsers = self.parser.add_subparsers(dest='command',
                                                # Use custom message when no subhandler is added
--- a/freqtrade/commands/cli_options.py
+++ b/freqtrade/commands/cli_options.py
@ -614,4 +614,16 @@ AVAILABLE_CLI_OPTIONS = {
              "that do not contain any parameters."),
        action="store_true",
    ),
+
+    "freqaimodel": Arg(
+        '--freqaimodel',
+        help='Specify a custom freqaimodels.',
+        metavar='NAME',
+    ),
+
+    "freqaimodel_path": Arg(
+        '--freqaimodel-path',
+        help='Specify additional lookup path for freqaimodels.',
+        metavar='PATH',
+    ),
 }
--- a/freqtrade/commands/freqai_commands.py
+++ b/freqtrade/commands/freqai_commands.py
@ -0,0 +1,24 @@
+import logging
+from typing import Any, Dict
+
+from freqtrade import constants
+from freqtrade.configuration import setup_utils_configuration
+from freqtrade.enums import RunMode
+from freqtrade.exceptions import OperationalException
+from freqtrade.misc import round_coin_value
+
+
+logger = logging.getLogger(__name__)
+
+def start_training(args: Dict[str, Any]) -> None:
+    """
+    Train a model for predicting signals
+    :param args: Cli args from Arguments()
+    :return: None
+    """
+    from freqtrade.freqai.training import Training
+
+    config = setup_utils_configuration(args, RunMode.FREQAI)
+
+    training = Training(config)
+    training.start()
--- a/freqtrade/configuration/configuration.py
+++ b/freqtrade/configuration/configuration.py
@ -95,6 +95,8 @@ class Configuration:

        self._process_data_options(config)

+        self._process_freqai_options(config)
+
        # Check if the exchange set by the user is supported
        check_exchange(config, config.get('experimental', {}).get('block_bad_exchanges', True))

@ -446,6 +448,16 @@ class Configuration:

        config.update({'runmode': self.runmode})

+    def _process_freqai_options(self, config: Dict[str, Any]) -> None:
+
+        self._args_to_config(config, argname='freqaimodel',
+                             logstring='Using freqaimodel class name: {}')
+
+        self._args_to_config(config, argname='freqaimodel_path',
+                             logstring='Using freqaimodel path: {}')
+
+        return
+
    def _args_to_config(self, config: Dict[str, Any], argname: str,
                        logstring: str, logfun: Optional[Callable] = None,
                        deprecated_msg: Optional[str] = None) -> None:
--- a/freqtrade/constants.py
+++ b/freqtrade/constants.py
@ -55,6 +55,7 @@ FTHYPT_FILEVERSION = 'fthypt_fileversion'
 USERPATH_HYPEROPTS = 'hyperopts'
 USERPATH_STRATEGIES = 'strategies'
 USERPATH_NOTEBOOKS = 'notebooks'
+USERPATH_FREQAIMODELS = 'freqaimodels'

 TELEGRAM_SETTING_OPTIONS = ['on', 'off', 'silent']
 WEBHOOK_FORMAT_OPTIONS = ['form', 'json', 'raw']
--- a/freqtrade/enums/runmode.py
+++ b/freqtrade/enums/runmode.py
@ -15,9 +15,10 @@ class RunMode(Enum):
    UTIL_NO_EXCHANGE = "util_no_exchange"
    PLOT = "plot"
    WEBSERVER = "webserver"
+    FREQAI = "freqai"
    OTHER = "other"


 TRADING_MODES = [RunMode.LIVE, RunMode.DRY_RUN]
-OPTIMIZE_MODES = [RunMode.BACKTEST, RunMode.EDGE, RunMode.HYPEROPT]
+OPTIMIZE_MODES = [RunMode.BACKTEST, RunMode.EDGE, RunMode.HYPEROPT, RunMode.FREQAI]
 NON_UTIL_MODES = TRADING_MODES + OPTIMIZE_MODES
--- a/freqtrade/freqai/data_handler.py
+++ b/freqtrade/freqai/data_handler.py
@ -0,0 +1,434 @@
+import json
+import os
+import copy
+import numpy as np
+import pandas as pd
+from pandas import DataFrame
+from joblib import dump
+from joblib import load
+from sklearn.model_selection import train_test_split
+from sklearn.metrics.pairwise import pairwise_distances
+import datetime
+from typing import Any, Dict, List, Tuple
+import pickle as pk
+from freqtrade.configuration import TimeRange
+
+SECONDS_IN_DAY = 86400
+
+class DataHandler:
+    """
+    Class designed to handle all the data for the IFreqaiModel class model. 
+    Functionalities include holding, saving, loading, and analyzing the data.
+    """
+
+    def __init__(self, config: Dict[str, Any], dataframe: DataFrame, data: List):
+        self.full_dataframe = dataframe
+        (self.training_timeranges,
+        self.backtesting_timeranges) = self.split_timerange(
+                                    config['freqai']['full_timerange'],
+                                    config['freqai']['train_period'],
+                                    config['freqai']['backtest_period'])
+        self.data = data
+        self.data_dictionary = {}
+        self.config = config
+        self.freq_config = config['freqai']
+
+    def save_data(self, model: Any) -> None:
+        """
+        Saves all data associated with a model for a single sub-train time range
+        :params:
+        :model: User trained model which can be reused for inferencing to generate 
+        predictions
+        """
+
+        if not os.path.exists(self.model_path): os.mkdir(self.model_path)
+        save_path = self.model_path + self.model_filename
+        # Save the trained model
+        dump(model, save_path+"_model.joblib")
+        self.data['model_path'] = self.model_path
+        self.data['model_filename'] = self.model_filename
+        self.data['training_features_list'] = list(self.data_dictionary['train_features'].columns)
+        # store the metadata
+        with open(save_path+"_metadata.json", 'w') as fp:
+           json.dump(self.data, fp, default=self.np_encoder)
+
+        # save the train data to file so we can check preds for area of applicability later
+        self.data_dictionary['train_features'].to_pickle(save_path+"_trained_df.pkl")
+
+        return
+
+    def load_data(self) -> Any:
+        """
+        loads all data required to make a prediction on a sub-train time range
+        :returns:
+        :model: User trained model which can be inferenced for new predictions
+        """
+        model = load(self.model_path+self.model_filename+"_model.joblib")
+
+        with open(self.model_path+self.model_filename+"_metadata.json", 'r') as fp:
+            self.data = json.load(fp)
+            if self.data.get('training_features_list'):
+                self.training_features_list = [*self.data.get('training_features_list')]
+
+        self.data_dictionary['train_features'] = pd.read_pickle(self.model_path+
+                                        self.model_filename+"_trained_df.pkl")
+
+        self.model_path = self.data['model_path']
+        self.model_filename = self.data['model_filename']
+        if self.config['freqai']['feature_parameters']['principal_component_analysis']:
+            self.pca = pk.load(open(self.model_path+self.model_filename+"_pca_object.pkl","rb"))
+
+        return model
+
+    def make_train_test_datasets(self, filtered_dataframe: DataFrame, labels: DataFrame) -> None:
+        '''
+        Given the dataframe for the full history for training, split the data into 
+        training and test data according to user specified parameters in configuration 
+        file. 
+        :filtered_dataframe: cleaned dataframe ready to be split.
+        :labels: cleaned labels ready to be split.
+        '''
+
+        if self.config['freqai']['feature_parameters']['weight_factor'] > 0:
+            weights = self.set_weights_higher_recent(len(filtered_dataframe))
+        else: weights = np.ones(len(filtered_dataframe))
+
+        (train_features, test_features, train_labels,
+            test_labels, train_weights, test_weights) = train_test_split(
+            filtered_dataframe[:filtered_dataframe.shape[0]],
+            labels,
+            weights,
+            **self.config['freqai']['data_split_parameters']
+        )
+
+        return self.build_data_dictionary(
+                                    train_features,test_features,
+                                    train_labels,test_labels,
+                                    train_weights,test_weights)
+
+
+
+    def filter_features(self, unfiltered_dataframe: DataFrame, training_feature_list: List, 
+            labels: DataFrame = None, training_filter: bool=True) -> Tuple[DataFrame, DataFrame]:
+        '''
+        Filter the unfiltered dataframe to extract the user requested features and properly 
+        remove all NaNs. Any row with a NaN is removed from training dataset or replaced with 
+        0s in the prediction dataset. However, prediction dataset do_predict will reflect any 
+        row that had a NaN and will shield user from that prediction.
+        :params:
+        :unfiltered_dataframe: the full dataframe for the present training period
+        :training_feature_list: list, the training feature list constructed by self.build_feature_list()
+        according to user specified parameters in the configuration file.
+        :labels: the labels for the dataset
+        :training_filter: boolean which lets the function know if it is training data or 
+        prediction data to be filtered. 
+        :returns:
+        :filtered_dataframe: dataframe cleaned of NaNs and only containing the user
+        requested feature set.
+        :labels: labels cleaned of NaNs.
+        '''
+        filtered_dataframe = unfiltered_dataframe.filter(training_feature_list, axis=1)
+        drop_index = pd.isnull(filtered_dataframe).any(1) # get the rows that have NaNs,
+
+        if training_filter: # we don't care about total row number (total no. datapoints) in training, we only care about removing any row with NaNs
+            drop_index_labels = pd.isnull(labels)
+            filtered_dataframe = filtered_dataframe[(drop_index==False) & (drop_index_labels==False)] # dropping values
+            labels = labels[(drop_index==False) & (drop_index_labels==False)] # assuming the labels depend entirely on the dataframe here.
+            print('dropped',len(unfiltered_dataframe)-len(filtered_dataframe),
+                    'training data points due to NaNs, ensure you have downloaded all historical training data')
+            self.data['filter_drop_index_training'] = drop_index
+
+        else: # we are backtesting so we need to preserve row number to send back to strategy, so now we use do_predict to avoid any prediction based on a NaN
+            drop_index = pd.isnull(filtered_dataframe).any(1)
+            self.data['filter_drop_index_prediction'] = drop_index
+            filtered_dataframe.fillna(0, inplace=True) # replacing all NaNs with zeros to avoid issues in 'prediction', but any prediction that was based on a single NaN is ultimately protected from buys with do_predict
+            drop_index = ~drop_index
+            self.do_predict = np.array(drop_index.replace(True,1).replace(False,0))
+            print('dropped',len(self.do_predict) - self.do_predict.sum(),'of',len(filtered_dataframe),
+            'prediction data points due to NaNs. These are protected from prediction with do_predict vector returned to strategy.')
+
+
+        return filtered_dataframe, labels
+
+    def build_data_dictionary(self, train_df: DataFrame, test_df: DataFrame,
+        train_labels: DataFrame, test_labels: DataFrame,
+        train_weights: Any, test_weights: Any) -> Dict:
+
+        self.data_dictionary = {'train_features': train_df,
+                                'test_features': test_df,
+                                'train_labels': train_labels,
+                                'test_labels': test_labels,
+                                'train_weights': train_weights,
+                                'test_weights': test_weights}
+
+        return self.data_dictionary
+
+    def standardize_data(self, data_dictionary: Dict) -> None:
+        '''
+        Standardize all data in the data_dictionary according to the training dataset
+        :params:
+        :data_dictionary: dictionary containing the cleaned and split training/test data/labels
+        :returns:
+        :data_dictionary: updated dictionary with standardized values.
+        '''
+        # standardize the data by training stats
+        train_mean = data_dictionary['train_features'].mean()
+        train_std = data_dictionary['train_features'].std()
+        data_dictionary['train_features'] = (data_dictionary['train_features'] - train_mean) / train_std
+        data_dictionary['test_features'] = (data_dictionary['test_features'] - train_mean) / train_std
+
+        train_labels_std = data_dictionary['train_labels'].std()
+        train_labels_mean = data_dictionary['train_labels'].mean()
+        data_dictionary['train_labels'] = (data_dictionary['train_labels'] - train_labels_mean) / train_labels_std
+        data_dictionary['test_labels'] = (data_dictionary['test_labels'] - train_labels_mean) / train_labels_std
+
+        for item in train_std.keys():
+            self.data[item+'_std'] = train_std[item]
+            self.data[item+'_mean'] = train_mean[item]
+
+        self.data['labels_std'] = train_labels_std
+        self.data['labels_mean'] = train_labels_mean
+
+        return data_dictionary
+
+    def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
+        '''
+        Standardizes a set of data using the mean and standard deviation from 
+        the associated training data.
+        :params:
+        :df: Dataframe to be standardized
+        '''
+
+        for item in df.keys():
+            df[item] = (df[item] - self.data[item+'_mean']) / self.data[item+'_std']
+
+        return df
+
+    def split_timerange(self, tr: Dict, train_split: int=28, bt_split: int=7) -> list:
+        '''
+        Function which takes a single time range (tr) and splits it
+        into sub timeranges to train and backtest on based on user input
+        tr: str, full timerange to train on
+        train_split: the period length for the each training (days). Specified in user
+        configuration file
+        bt_split: the backtesting length (dats). Specified in user configuration file
+        '''
+
+        train_period = train_split * SECONDS_IN_DAY
+        bt_period = bt_split * SECONDS_IN_DAY
+
+        full_timerange = TimeRange.parse_timerange(tr)
+        timerange_train = copy.deepcopy(full_timerange)
+        timerange_backtest = copy.deepcopy(full_timerange)
+
+        tr_training_list = []
+        tr_backtesting_list = []
+        first = True
+        while True:
+            if not first: timerange_train.startts = timerange_train.startts + bt_period
+            timerange_train.stopts = timerange_train.startts + train_period
+
+            # if a full training period doesnt fit, we stop
+            if timerange_train.stopts > full_timerange.stopts: break 
+            first = False
+            start = datetime.datetime.utcfromtimestamp(timerange_train.startts)
+            stop = datetime.datetime.utcfromtimestamp(timerange_train.stopts)
+            tr_training_list.append(start.strftime("%Y%m%d")+'-'+stop.strftime("%Y%m%d"))
+
+            ## associated backtest period
+            timerange_backtest.startts = timerange_train.stopts 
+            timerange_backtest.stopts = timerange_backtest.startts + bt_period 
+            start = datetime.datetime.utcfromtimestamp(timerange_backtest.startts)
+            stop = datetime.datetime.utcfromtimestamp(timerange_backtest.stopts)
+            tr_backtesting_list.append(start.strftime("%Y%m%d")+'-'+stop.strftime("%Y%m%d"))
+
+        return tr_training_list, tr_backtesting_list
+
+    def slice_dataframe(self, tr: str, df: DataFrame) -> DataFrame:
+        """
+        Given a full dataframe, extract the user desired window
+        :params:
+        :tr: timerange string that we wish to extract from df
+        :df: Dataframe containing all candles to run the entire backtest. Here
+        it is sliced down to just the present training period.
+        """
+        timerange = TimeRange.parse_timerange(tr)
+        start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc)
+        stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc)
+        df = df.loc[df['date'] >= start, :]
+        df = df.loc[df['date'] <= stop, :]
+
+        return df
+
+    def principal_component_analysis(self) -> None:
+        """
+        Performs Principal Component Analysis on the data for dimensionality reduction
+        and outlier detection (see self.remove_outliers())
+        No parameters or returns, it acts on the data_dictionary held by the DataHandler.
+        """
+
+        from sklearn.decomposition import PCA # avoid importing if we dont need it
+
+        n_components = self.data_dictionary['train_features'].shape[1]
+        pca = PCA(n_components=n_components)
+        pca = pca.fit(self.data_dictionary['train_features'])
+        n_keep_components = np.argmin(pca.explained_variance_ratio_.cumsum() < 0.999)
+        pca2 = PCA(n_components=n_keep_components)
+        self.data['n_kept_components'] = n_keep_components
+        pca2 = pca2.fit(self.data_dictionary['train_features'])
+        print('reduced feature dimension by',n_components-n_keep_components)
+        print("explained variance",np.sum(pca2.explained_variance_ratio_))
+        train_components = pca2.transform(self.data_dictionary['train_features'])
+        test_components = pca2.transform(self.data_dictionary['test_features'])
+
+        self.data_dictionary['train_features'] = pd.DataFrame(data=train_components,
+                          columns = ['PC'+str(i) for i in range(0,n_keep_components)],
+                          index = self.data_dictionary['train_features'].index)
+
+        self.data_dictionary['test_features'] = pd.DataFrame(data=test_components,
+                          columns = ['PC'+str(i) for i in range(0,n_keep_components)],
+                          index = self.data_dictionary['test_features'].index)
+
+        self.data['n_kept_components'] = n_keep_components
+        self.pca = pca2
+        if not os.path.exists(self.model_path): os.mkdir(self.model_path)
+        pk.dump(pca2, open(self.model_path + self.model_filename+"_pca_object.pkl","wb"))
+
+        return None
+
+    def compute_distances(self) -> float:
+        print('computing average mean distance for all training points')
+        pairwise = pairwise_distances(self.data_dictionary['train_features'],n_jobs=-1)
+        avg_mean_dist = pairwise.mean(axis=1).mean()
+        print('avg_mean_dist',avg_mean_dist)
+
+        return avg_mean_dist
+
+    def remove_outliers(self,predict: bool) -> None:
+        """
+        Remove data that looks like an outlier based on the distribution of each 
+        variable. 
+        :params:
+        :predict: boolean which tells the function if this is prediction data or 
+        training data coming in. 
+        """
+
+        lower_quantile = self.data_dictionary['train_features'].quantile(0.001)
+        upper_quantile = self.data_dictionary['train_features'].quantile(0.999)
+
+        if predict:
+
+            df = self.data_dictionary['prediction_features'][(self.data_dictionary['prediction_features']<upper_quantile) & (self.data_dictionary['prediction_features']>lower_quantile)]
+            drop_index = pd.isnull(df).any(1)
+            self.data_dictionary['prediction_features'].fillna(0,inplace=True)
+            drop_index = ~drop_index
+            do_predict = np.array(drop_index.replace(True,1).replace(False,0))
+            
+            print('remove_outliers() tossed',len(do_predict)-do_predict.sum(),'predictions because they were beyond 3 std deviations from training data.')
+            self.do_predict += do_predict
+            self.do_predict -= 1
+
+        else:
+
+            filter_train_df = self.data_dictionary['train_features'][(self.data_dictionary['train_features']<upper_quantile) & (self.data_dictionary['train_features']>lower_quantile)]
+            drop_index = pd.isnull(filter_train_df).any(1)
+            self.data_dictionary['train_features'] = self.data_dictionary['train_features'][(drop_index==False)]
+            self.data_dictionary['train_labels'] = self.data_dictionary['train_labels'][(drop_index==False)]
+            self.data_dictionary['train_weights'] = self.data_dictionary['train_weights'][(drop_index==False)]
+
+            # do the same for the test data
+            filter_test_df = self.data_dictionary['test_features'][(self.data_dictionary['test_features']<upper_quantile) & (self.data_dictionary['test_features']>lower_quantile)]
+            drop_index = pd.isnull(filter_test_df).any(1)
+            #pdb.set_trace()
+            self.data_dictionary['test_labels'] = self.data_dictionary['test_labels'][(drop_index==False)]
+            self.data_dictionary['test_features'] = self.data_dictionary['test_features'][(drop_index==False)]
+            self.data_dictionary['test_weights'] = self.data_dictionary['test_weights'][(drop_index==False)]
+
+        return
+
+    def build_feature_list(self, config: dict) -> int:
+        """
+        Build the list of features that will be used to filter 
+        the full dataframe. Feature list is construced from the 
+        user configuration file.
+        :params:
+        :config: Canonical freqtrade config file containing all
+        user defined input in config['freqai] dictionary.
+        """
+        features = []
+        for tf in config['freqai']['timeframes']:
+            for ft in config['freqai']['base_features']:
+                for n in range(config['freqai']['feature_parameters']['shift']+1):
+                    shift=''
+                    if n>0: shift = '_shift-'+str(n)
+                    features.append(ft+shift+'_'+tf)
+                    for p in config['freqai']['corr_pairlist']:
+                        features.append(p.split("/")[0]+'-'+ft+shift+'_'+tf)
+
+        print('number of features',len(features))
+        return features
+
+    def check_if_pred_in_training_spaces(self) -> None:
+        """
+        Compares the distance from each prediction point to each training data 
+        point. It uses this information to estimate a Dissimilarity Index (DI)
+        and avoid making predictions on any points that are too far away 
+        from the training data set. 
+        """
+
+        print('checking if prediction features are in AOA')
+        distance = pairwise_distances(self.data_dictionary['train_features'],
+                    self.data_dictionary['prediction_features'],n_jobs=-1)
+
+        do_predict = np.where(distance.min(axis=0) /
+            self.data['avg_mean_dist'] < self.config['freqai']['feature_parameters']['DI_threshold'],1,0)
+
+        print('Distance checker tossed',len(do_predict)-do_predict.sum(),
+            'predictions for being too far from training data')
+
+        self.do_predict += do_predict 
+        self.do_predict -= 1
+        
+    def set_weights_higher_recent(self, num_weights: int) -> int:
+        """
+        Set weights so that recent data is more heavily weighted during
+        training than older data.
+        """
+        weights = np.zeros(num_weights)
+        for i in range(1, len(weights)):
+            weights[len(weights) - i] = np.exp(-i/
+                            (self.config['freqai']['feature_parameters']['weight_factor']*num_weights))
+        return weights
+
+    def append_predictions(self, predictions, do_predict, len_dataframe):
+        """
+        Append backtest prediction from current backtest period to all previous periods
+        """
+
+        ones = np.ones(len_dataframe)
+        s_mean, s_std = ones*self.data['s_mean'], ones*self.data['s_std']
+
+        self.predictions = np.append(self.predictions,predictions)
+        self.do_predict = np.append(self.do_predict,do_predict)
+        self.target_mean = np.append(self.target_mean,s_mean)
+        self.target_std = np.append(self.target_std,s_std)
+
+        return
+
+    def fill_predictions(self, len_dataframe):
+        """
+        Back fill values to before the backtesting range so that the dataframe matches size
+        when it goes back to the strategy. These rows are not included in the backtest.
+        """
+
+        filler = np.zeros(len_dataframe -len(self.predictions)) # startup_candle_count
+        self.predictions = np.append(filler,self.predictions)
+        self.do_predict = np.append(filler,self.do_predict)
+        self.target_mean = np.append(filler,self.target_mean)
+        self.target_std = np.append(filler,self.target_std)
+
+        return
+        
+    def np_encoder(self, object):
+        if isinstance(object, np.generic):
+            return object.item()
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@ -0,0 +1,158 @@
+
+import os
+import numpy as np
+import pandas as pd
+from pandas import DataFrame
+import shutil
+import gc
+from typing import Any, Dict, Optional, Tuple
+from abc import ABC
+from freqtrade.freqai.data_handler import DataHandler
+
+pd.options.mode.chained_assignment = None
+
+class IFreqaiModel(ABC):
+    """
+    Class containing all tools for training and prediction in the strategy.
+    User models should inherit from this class as shown in 
+    templates/ExamplePredictionModel.py where the user overrides
+    train(), predict(), fit(), and make_labels().
+    """
+
+    def __init__(self, config: Dict[str, Any]) -> None:
+
+        self.config = config
+        self.freqai_info = config['freqai']
+        self.data_split_parameters = config['freqai']['data_split_parameters']
+        self.model_training_parameters = config['freqai']['model_training_parameters']
+        self.feature_parameters = config['freqai']['feature_parameters']
+        self.full_path = (str(config['user_data_dir'])+
+                            "/models/"+self.freqai_info['full_timerange']+
+                            '-'+self.freqai_info['identifier'])
+        self.metadata = {}
+        self.data = {}
+        self.time_last_trained = None
+        self.current_time = None
+        self.model = None
+        self.predictions = None
+
+        if not os.path.exists(self.full_path):
+            os.mkdir(self.full_path)
+            shutil.copy(self.config['config_files'][0],self.full_path+"/"+self.config['config_files'][0])
+
+    def start(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+        """
+        Entry point to the FreqaiModel, it will train a new model if 
+        necesssary before making the prediction.
+        The backtesting and training paradigm is a sliding training window
+        with a following backtest window. Both windows slide according to the
+        length of the backtest window. This function is not intended to be 
+        overridden by children of IFreqaiModel, but technically, it can be 
+        if the user wishes to make deeper changes to the sliding window
+        logic.
+        :params:
+        :dataframe: Full dataframe coming from strategy - it contains entire
+        backtesting timerange + additional historical data necessary to train 
+        the model.
+        :metadata: pair metadataa coming from strategy. 
+        """
+        self.pair = metadata['pair']
+        self.dh = DataHandler(self.config, dataframe, self.data)
+
+        print('going to train',len(self.dh.training_timeranges),
+            'timeranges:',self.dh.training_timeranges)
+        predictions = np.array([])
+        do_predict = np.array([])
+        target_mean = np.array([])
+        target_std = np.array([])
+
+        # Loop enforcing the sliding window training/backtesting paragigm
+        # tr_train is the training time range e.g. 1 historical month
+        # tr_backtest is the backtesting time range e.g. the week directly 
+        # following tr_train. Both of these windows slide through the 
+        # entire backtest
+        for tr_train, tr_backtest in zip(self.dh.training_timeranges,
+                                         self.dh.backtesting_timeranges):
+            gc.collect()
+            #self.config['timerange'] = tr_train
+            self.dh.data = {} # clean the pair specific data between models
+            self.freqai_info['training_timerange'] = tr_train
+            dataframe_train = self.dh.slice_dataframe(tr_train, dataframe)
+            dataframe_backtest = self.dh.slice_dataframe(tr_backtest, dataframe)
+            print("training",self.pair,"for",tr_train)
+            self.dh.model_path = self.full_path+"/"+ 'sub-train'+'-'+str(tr_train)+'/'
+            if not self.model_exists(self.pair, training_timerange=tr_train):
+                self.model = self.train(dataframe_train, metadata)
+                self.dh.save_data(self.model)
+            else:
+                self.model = self.dh.load_data(self.dh.model_path)
+
+            preds, do_preds = self.predict(dataframe_backtest)
+
+            self.dh.append_predictions(preds,do_preds,len(dataframe_backtest))
+        
+        self.dh.fill_predictions(len(dataframe))
+
+        return self.dh.predictions, self.dh.do_predict, self.dh.target_mean, self.dh.target_std
+
+    def make_labels(self, dataframe: DataFrame) -> DataFrame:
+        """
+        User defines the labels here (target values).
+        :params:
+        :dataframe: the full dataframe for the present training period
+        """
+
+        return dataframe
+
+    def train(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame, DataFrame]:
+        """
+        Filter the training data and train a model to it. Train makes heavy use of the datahandler
+        for storing, saving, loading, and managed.
+        :params:
+        :unfiltered_dataframe: Full dataframe for the current training period
+        :metadata: pair metadata from strategy. 
+        :returns:
+        :model: Trained model which can be used to inference (self.predict)
+        """
+
+        return unfiltered_dataframe, unfiltered_dataframe
+
+    def fit(self) -> Any:
+        """
+        Most regressors use the same function names and arguments e.g. user 
+        can drop in LGBMRegressor in place of CatBoostRegressor and all data
+        management will be properly handled by Freqai.
+        :params:
+        :data_dictionary: the dictionary constructed by DataHandler to hold 
+        all the training and test data/labels.
+        """
+
+        return None
+    
+    def predict(self) -> Optional[Tuple[DataFrame, DataFrame]]:
+        """
+        Filter the prediction features data and predict with it.
+        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
+        :return: 
+        :predictions: np.array of predictions
+        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
+        data (NaNs) or felt uncertain about data (PCA and DI index)
+        """
+
+        return None
+
+    def model_exists(self, pair: str, training_timerange: str = None) -> bool:
+        """
+        Given a pair and path, check if a model already exists
+        :param pair: pair e.g. BTC/USD
+        :param path: path to model
+        """
+        coin,_ = pair.split('/')
+        self.dh.model_filename = f"cb_"+coin.lower()+"_"+self.freqai_info['trained_stake']+"_"+training_timerange
+        file_exists = os.path.isfile(self.dh.model_path+
+                              self.dh.model_filename+"_model.joblib")
+        if file_exists:
+            print("Found model at", self.dh.model_path+self.dh.model_filename)
+        else: print("Could not find model at",
+              self.dh.model_path+self.dh.model_filename)
+        return file_exists
--- a/freqtrade/freqai/strategy_bridge.py
+++ b/freqtrade/freqai/strategy_bridge.py
@ -0,0 +1,12 @@
+from freqtrade.resolvers.freqaimodel_resolver import FreqaiModelResolver
+
+
+class CustomModel:
+    """
+    A bridge between the user defined IFreqaiModel class 
+    and the strategy.
+    """
+
+    def __init__(self,config):
+
+        self.bridge = FreqaiModelResolver.load_freqaimodel(config)
--- a/freqtrade/optimize/backtesting.py
+++ b/freqtrade/optimize/backtesting.py
@ -204,6 +204,12 @@ class Backtesting:
        """
        self.progress.init_step(BacktestState.DATALOAD, 1)

+        if self.config['freqaimodel']:
+            self.required_startup += int((self.config['freqai']['train_period']*86400) /
+                                    timeframe_to_seconds(self.config['timeframe']))
+            self.config['startup_candle_count'] = self.required_startup
+
+
        data = history.load_data(
            datadir=self.config['datadir'],
            pairs=self.pairlists.whitelist,
--- a/freqtrade/resolvers/freqaimodel_resolver.py
+++ b/freqtrade/resolvers/freqaimodel_resolver.py
@ -0,0 +1,45 @@
+# pragma pylint: disable=attribute-defined-outside-init
+
+"""
+This module load a custom model for freqai
+"""
+import logging
+from pathlib import Path
+from typing import Dict
+
+from freqtrade.constants import USERPATH_FREQAIMODELS
+from freqtrade.exceptions import OperationalException
+from freqtrade.freqai.freqai_interface import IFreqaiModel
+from freqtrade.resolvers import IResolver
+
+logger = logging.getLogger(__name__)
+
+
+class FreqaiModelResolver(IResolver):
+    """
+    This class contains all the logic to load custom hyperopt loss class
+    """
+    object_type = IFreqaiModel
+    object_type_str = "FreqaiModel"
+    user_subdir = USERPATH_FREQAIMODELS
+    initial_search_path = Path(__file__).parent.parent.joinpath('optimize').resolve()
+
+    @staticmethod
+    def load_freqaimodel(config: Dict) -> IFreqaiModel:
+        """
+        Load the custom class from config parameter
+        :param config: configuration dictionary
+        """
+
+        freqaimodel_name = config.get('freqaimodel')
+        if not freqaimodel_name:
+            raise OperationalException(
+                "No freqaimodel set. Please use `--freqaimodel` to "
+                "specify the FreqaiModel class to use.\n"
+            )
+        freqaimodel = FreqaiModelResolver.load_object(freqaimodel_name,
+                                                        config, kwargs={'config': config},
+                                                        extra_dir=config.get('freqaimodel_path'))
+
+
+        return freqaimodel
--- a/freqtrade/templates/ExamplePredictionModel.py
+++ b/freqtrade/templates/ExamplePredictionModel.py
@ -0,0 +1,139 @@
+import numpy as np
+import pandas as pd
+from catboost import CatBoostRegressor, Pool
+from pandas import DataFrame
+from typing import Any, Dict, Tuple
+from freqtrade.freqai.freqai_interface import IFreqaiModel
+
+class ExamplePredictionModel(IFreqaiModel):
+    """
+    User created prediction model. The class needs to override three necessary
+    functions, predict(), train(), fit(). The class inherits ModelHandler which
+    has its own DataHandler where data is held, saved, loaded, and managed. 
+    """
+
+    def make_labels(self, dataframe: DataFrame) -> DataFrame:
+        """
+        User defines the labels here (target values).
+        :params:
+        :dataframe: the full dataframe for the present training period
+        """
+
+        dataframe['s'] = (dataframe['close'].shift(-self.feature_parameters['period']).rolling(
+            self.feature_parameters['period']).max() / dataframe['close'] - 1)
+        self.dh.data['s_mean'] = dataframe['s'].mean()
+        self.dh.data['s_std'] = dataframe['s'].std()
+
+        print('label mean',self.dh.data['s_mean'],'label std',self.dh.data['s_std'])
+
+        return dataframe['s']
+
+
+    def train(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame, DataFrame]:
+        """
+        Filter the training data and train a model to it. Train makes heavy use of the datahandler
+        for storing, saving, loading, and managed.
+        :params:
+        :unfiltered_dataframe: Full dataframe for the current training period
+        :metadata: pair metadata from strategy. 
+        :returns:
+        :model: Trained model which can be used to inference (self.predict)
+        """
+        print("--------------------Starting training--------------------")
+
+        # create the full feature list based on user config info
+        self.dh.training_features_list = self.dh.build_feature_list(self.config)
+        unfiltered_labels = self.make_labels(unfiltered_dataframe)
+
+        # filter the features requested by user in the configuration file and elegantly handle NaNs
+        features_filtered, labels_filtered = self.dh.filter_features(unfiltered_dataframe, 
+                            self.dh.training_features_list, unfiltered_labels, training_filter=True)
+
+        # split data into train/test data.
+        data_dictionary = self.dh.make_train_test_datasets(features_filtered, labels_filtered)
+        # standardize all data based on train_dataset only
+        data_dictionary = self.dh.standardize_data(data_dictionary)
+
+        # optional additional data cleaning
+        if self.feature_parameters['principal_component_analysis']:
+            self.dh.principal_component_analysis()
+        if self.feature_parameters["remove_outliers"]:
+            self.dh.remove_outliers(predict=False)
+        if self.feature_parameters['DI_threshold']:
+            self.dh.data['avg_mean_dist'] = self.dh.compute_distances()
+
+        print("length of train data", len(data_dictionary['train_features']))
+
+        model = self.fit(data_dictionary)
+
+        print('Finished training')
+        print(f'--------------------done training {metadata["pair"]}--------------------')
+
+        return model
+
+    def fit(self, data_dictionary: Dict) -> Any:
+        """
+        Most regressors use the same function names and arguments e.g. user 
+        can drop in LGBMRegressor in place of CatBoostRegressor and all data
+        management will be properly handled by Freqai.
+        :params:
+        :data_dictionary: the dictionary constructed by DataHandler to hold 
+        all the training and test data/labels.
+        """
+
+        train_data = Pool(
+            data=data_dictionary['train_features'],
+            label=data_dictionary['train_labels'],
+            weight=data_dictionary['train_weights']
+        )
+
+        test_data = Pool(
+            data=data_dictionary['test_features'],
+            label=data_dictionary['test_labels'],
+            weight=data_dictionary['test_weights']
+        )
+
+        model = CatBoostRegressor(verbose=100, early_stopping_rounds=400,
+                            **self.model_training_parameters)
+        model.fit(X=train_data, eval_set=test_data)
+
+        return model
+
+    def predict(self, unfiltered_dataframe: DataFrame) -> Tuple[DataFrame, DataFrame]:
+        """
+        Filter the prediction features data and predict with it.
+        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
+        :return: 
+        :predictions: np.array of predictions
+        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
+        data (NaNs) or felt uncertain about data (PCA and DI index)
+        """
+
+        print("--------------------Starting prediction--------------------")
+
+        original_feature_list = self.dh.build_feature_list(self.config)
+        filtered_dataframe, _ = self.dh.filter_features(unfiltered_dataframe, original_feature_list, training_filter=False)
+        filtered_dataframe = self.dh.standardize_data_from_metadata(filtered_dataframe)
+        self.dh.data_dictionary['prediction_features'] = filtered_dataframe
+
+        # optional additional data cleaning 
+        if self.feature_parameters['principal_component_analysis']:
+            pca_components = self.dh.pca.transform(filtered_dataframe)
+            self.dh.data_dictionary['prediction_features'] = pd.DataFrame(data=pca_components,
+                              columns = ['PC'+str(i) for i in range(0,self.dh.data['n_kept_components'])],
+                              index = filtered_dataframe.index)
+        
+        if self.feature_parameters["remove_outliers"]:
+            self.dh.remove_outliers(predict=True) # creates dropped index
+
+        if self.feature_parameters['DI_threshold']:
+            self.dh.check_if_pred_in_training_spaces() # sets do_predict
+
+        predictions = self.model.predict(self.dh.data_dictionary['prediction_features'])
+
+        # compute the non-standardized predictions
+        predictions = predictions * self.dh.data['labels_std'] + self.dh.data['labels_mean']
+
+        print("--------------------Finished prediction--------------------")
+
+        return (predictions, self.dh.do_predict)
--- a/freqtrade/templates/FreqaiExampleStrategy.py
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@ -0,0 +1,179 @@
+import logging
+import talib.abstract as ta
+from pandas import DataFrame
+import pandas as pd
+from technical import qtpylib
+import numpy as np
+from freqtrade.strategy import (merge_informative_pair)
+from freqtrade.strategy.interface import IStrategy
+from freqtrade.freqai.strategy_bridge import CustomModel
+from functools import reduce
+logger = logging.getLogger(__name__)
+
+class FreqaiExampleStrategy(IStrategy):
+    """
+    Example strategy showing how the user connects their own 
+    IFreqaiModel to the strategy. Namely, the user uses:
+    self.model = CustomModel(self.config)
+    self.model.bridge.start(dataframe, metadata)
+
+    to make predictions on their data. populate_any_indicators() automatically 
+    generates the variety of features indicated by the user in the
+    canonical freqtrade configuration file under config['freqai'].
+    """
+
+    minimal_roi = {
+          "0": 0.01,
+          "240": -1
+     }
+
+    plot_config = {
+        'main_plot': {
+        },
+        'subplots': {
+            "prediction":{
+                'prediction':{'color':'blue'}
+            },
+            "target_roi":{
+                'target_roi':{'color':'brown'},
+            },
+            "do_predict":{
+                'do_predict':{'color':'brown'},
+            },
+        }
+    }
+
+    stoploss = -0.05
+    use_sell_signal = True
+    startup_candle_count: int = 1000 
+
+
+    def informative_pairs(self):
+        pairs = self.freqai_info['corr_pairlist'] 
+        informative_pairs = []
+        for tf in self.timeframes:
+            informative_pairs.append([(pair, tf) for pair in pairs])
+        return informative_pairs
+
+    def populate_any_indicators(self, pair, df, tf, informative=None,coin=''):
+        """
+        Function designed to automatically generate, name and merge features
+        from user indicated timeframes in the configuration file. User can add
+        additional features here, but must follow the naming convention.
+        :params:
+        :pair: pair to be used as informative
+        :df: strategy dataframe which will receive merges from informatives
+        :tf: timeframe of the dataframe which will modify the feature names
+        :informative: the dataframe associated with the informative pair
+        :coin: the name of the coin which will modify the feature names.
+        """
+        if informative is None:
+            informative = self.dp.get_pair_dataframe(pair, tf)
+
+        informative[coin+'rsi'] = ta.RSI(informative, timeperiod=14)
+        informative[coin+'mfi'] = ta.MFI(informative, timeperiod=25)
+        informative[coin+'adx'] = ta.ADX(informative, window=20)
+
+        informative[coin+'20sma'] = ta.SMA(informative,timeperiod=20)
+        informative[coin+'21ema'] = ta.EMA(informative,timeperiod=21)
+        informative[coin+'bmsb'] = np.where(informative[coin+'20sma'].lt(informative[coin+'21ema']),1,0)
+        informative[coin+'close_over_20sma'] = informative['close']/informative[coin+'20sma']
+
+        informative[coin+'mfi'] = ta.MFI(informative, timeperiod=25)
+
+        informative[coin+'ema21'] = ta.EMA(informative, timeperiod=21)
+        informative[coin+'sma20'] = ta.SMA(informative, timeperiod=20)
+        stoch = ta.STOCHRSI(informative, 15, 20, 2, 2)
+        informative[coin+'srsi-fk'] = stoch['fastk']
+        informative[coin+'srsi-fd'] = stoch['fastd']
+
+        bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(informative), window=14, stds=2.2)
+        informative[coin+'bb_lowerband'] = bollinger['lower']
+        informative[coin+'bb_middleband'] = bollinger['mid']
+        informative[coin+'bb_upperband'] = bollinger['upper']
+        informative[coin+'bb_width'] = ((informative[coin+"bb_upperband"] - informative[coin+"bb_lowerband"]) / informative[coin+"bb_middleband"])
+        informative[coin+'close-bb_lower'] = informative['close'] / informative[coin+'bb_lowerband']
+
+        informative[coin+'roc'] = ta.ROC(informative, timeperiod=3)
+        informative[coin+'adx'] = ta.ADX(informative, window=14)
+
+        macd = ta.MACD(informative)
+        informative[coin+'macd'] = macd['macd']
+        informative[coin+'pct-change'] = informative['close'].pct_change()
+        informative[coin+'relative_volume'] = informative['volume'] / informative['volume'].rolling(10).mean()
+
+        informative[coin+'pct-change'] = informative['close'].pct_change()
+
+        indicators = [col for col in informative if col.startswith(coin)]
+
+        for n in range(self.freqai_info['feature_parameters']['shift']+1):
+            if n==0: continue
+            informative_shift = informative[indicators].shift(n)
+            informative_shift = informative_shift.add_suffix('_shift-'+str(n))
+            informative = pd.concat((informative,informative_shift),axis=1)
+
+        df = merge_informative_pair(df, informative, self.config['timeframe'], tf, ffill=True)
+        skip_columns = [(s + '_'+tf) for s in
+                        ['date', 'open', 'high', 'low', 'close', 'volume']]
+        df = df.drop(columns=skip_columns)
+
+        return df
+
+
+    def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+
+        # the configuration file parameters are stored here
+        self.freqai_info = self.config['freqai']
+
+        # the model is instantiated here
+        self.model = CustomModel(self.config)
+
+        print('Populating indicators...')
+
+        # the following loops are necessary for building the features 
+        # indicated by the user in the configuration file.
+        for tf in self.freqai_info['timeframes']:
+            dataframe = self.populate_any_indicators(metadata['pair'],
+                                                        dataframe.copy(), tf)
+            for i in self.freqai_info['corr_pairlist']:
+                dataframe = self.populate_any_indicators(i,
+                            dataframe.copy(), tf, coin=i.split("/")[0]+'-')
+
+        # the model will return 4 values, its prediction, an indication of whether or not the prediction 
+        # should be accepted, the target mean/std values from the labels used during each training period.
+        (dataframe['prediction'], dataframe['do_predict'], 
+            dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata)
+
+        dataframe['target_roi'] = dataframe['target_mean']+dataframe['target_std']*0.5
+        dataframe['sell_roi'] = dataframe['target_mean']-dataframe['target_std']*1.5
+        return dataframe
+
+
+    def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+
+        buy_conditions = [
+                    (dataframe['prediction'] > dataframe['target_roi'])
+                    &
+                    (dataframe['do_predict'] == 1)
+        ]
+
+        if buy_conditions:
+            dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), 'buy'] = 1
+
+        return dataframe
+
+
+    def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+       # sell_goal = eval('self.'+metadata['pair'].split("/")[0]+'_sell_goal.value')
+        sell_conditions = [
+                    (dataframe['prediction'] < dataframe['sell_roi'])
+                    &
+                    (dataframe['do_predict'] == 1)
+        ]
+        if sell_conditions:
+            dataframe.loc[reduce(lambda x, y: x | y, sell_conditions), 'sell'] = 1
+
+        return dataframe
+
+    def get_ticker_indicator(self):
+        return int(self.config['timeframe'][:-1])
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -36,6 +36,7 @@ nav:
        - Advanced Strategy: strategy-advanced.md
        - Advanced Hyperopt: advanced-hyperopt.md
        - Sandbox Testing: sandbox-testing.md
+        - Freqai: freqai.md
    - FAQ: faq.md
    - SQL Cheat-sheet: sql_cheatsheet.md
    - Strategy migration: strategy_migration.md
--- a/requirements-freqai.txt
+++ b/requirements-freqai.txt
@ -0,0 +1,8 @@
+# Include all requirements to run the bot.
+-r requirements.txt
+
+# Required for freqai
+scikit-learn==1.0.2
+scikit-optimize==0.9.0
+joblib==1.1.0
+catboost==1.0.4