diff --git a/.gitignore b/.gitignore
index 97f77f779..a066001db 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,10 @@ logfile.txt
 user_data/*
 !user_data/strategy/sample_strategy.py
 !user_data/notebooks
+!user_data/models
+!user_data/freqaimodels
+user_data/freqaimodels/*
+user_data/models/*
 user_data/notebooks/*
 freqtrade-plot.html
 freqtrade-profit-plot.html
@@ -105,3 +109,5 @@ target/
 !config_examples/config_ftx.example.json
 !config_examples/config_full.example.json
 !config_examples/config_kraken.example.json
+!config_examples/config_freqai_futures.example.json
+!config_examples/config_freqai_spot.example.json
diff --git a/config_examples/config_freqai_futures.example.json b/config_examples/config_freqai_futures.example.json
new file mode 100644
index 000000000..e5207a906
--- /dev/null
+++ b/config_examples/config_freqai_futures.example.json
@@ -0,0 +1,102 @@
+{
+    "trading_mode": "futures",
+    "margin_mode": "isolated",
+    "max_open_trades": 5,
+    "stake_currency": "USDT",
+    "stake_amount": 200,
+    "tradable_balance_ratio": 1,
+    "fiat_display_currency": "USD",
+    "dry_run": true,
+    "timeframe": "3m",
+    "dry_run_wallet": 1000,
+    "cancel_open_orders_on_exit": true,
+    "unfilledtimeout": {
+        "entry": 10,
+        "exit": 30
+    },
+    "exchange": {
+        "name": "okx",
+        "key": "",
+        "secret": "",
+        "ccxt_config": {
+            "enableRateLimit": true
+        },
+        "ccxt_async_config": {
+            "enableRateLimit": true,
+            "rateLimit": 200
+        },
+        "pair_whitelist": [
+            "AGLD/USDT:USDT",
+            "1INCH/USDT:USDT",
+            "AAVE/USDT:USDT",
+            "ALGO/USDT:USDT",
+            "ALPHA/USDT:USDT",
+            "API3/USDT:USDT",
+            "AVAX/USDT:USDT",
+            "AXS/USDT:USDT",
+            "BCH/USDT:USDT"
+        ],
+        "pair_blacklist": []
+    },
+    "entry_pricing": {
+        "price_side": "same",
+        "use_order_book": true,
+        "order_book_top": 1,
+        "price_last_balance": 0.0,
+        "check_depth_of_market": {
+            "enabled": false,
+            "bids_to_ask_delta": 1
+        }
+    },
+    "exit_pricing": {
+        "price_side": "other",
+        "use_order_book": true,
+        "order_book_top": 1
+    },
+    "pairlists": [
+        {
+            "method": "StaticPairList"
+        }
+    ],
+    "freqai": {
+        "startup_candles": 10000,
+        "timeframes": [
+            "3m",
+            "15m",
+            "1h"
+        ],
+        "train_period": 20,
+        "backtest_period": 0.001,
+        "identifier": "constant_retrain_live",
+        "live_trained_timestamp": 0,
+        "corr_pairlist": [
+            "BTC/USDT:USDT",
+            "ETH/USDT:USDT"
+        ],
+        "feature_parameters": {
+            "period": 20,
+            "shift": 2,
+            "DI_threshold": 0.9,
+            "weight_factor": 0.9,
+            "principal_component_analysis": false,
+            "use_SVM_to_remove_outliers": true,
+            "stratify": 0,
+            "indicator_max_period": 20,
+            "indicator_periods": [10, 20]
+        },
+        "data_split_parameters": {
+            "test_size": 0.33,
+            "random_state": 1
+        },
+        "model_training_parameters": {
+            "n_estimators": 1000,
+            "task_type": "CPU"
+        }
+    },
+    "bot_name": "",
+    "force_entry_enable": true,
+    "initial_state": "running",
+    "internals": {
+        "process_throttle_secs": 5
+    }
+}
diff --git a/config_examples/config_freqai_spot.example.json b/config_examples/config_freqai_spot.example.json
new file mode 100644
index 000000000..7ff01cc07
--- /dev/null
+++ b/config_examples/config_freqai_spot.example.json
@@ -0,0 +1,97 @@
+{
+    "max_open_trades": 1,
+    "stake_currency": "USDT",
+    "stake_amount": 900,
+    "tradable_balance_ratio": 1,
+    "fiat_display_currency": "USD",
+    "dry_run": true,
+    "timeframe": "5m",
+    "dry_run_wallet": 4000,
+    "dataformat_ohlcv": "json",
+    "cancel_open_orders_on_exit": true,
+    "unfilledtimeout": {
+        "entry": 10,
+        "exit": 30
+    },
+    "exchange": {
+        "name": "binance",
+        "key": "",
+        "secret": "",
+        "ccxt_config": {
+            "enableRateLimit": true
+        },
+        "ccxt_async_config": {
+            "enableRateLimit": true,
+            "rateLimit": 200
+        },
+        "pair_whitelist": [
+            "BTC/USDT",
+            "ETH/USDT"
+        ],
+        "pair_blacklist": []
+    },
+    "entry_pricing": {
+        "price_side": "same",
+        "use_order_book": true,
+        "order_book_top": 1,
+        "price_last_balance": 0.0,
+        "check_depth_of_market": {
+            "enabled": false,
+            "bids_to_ask_delta": 1
+        }
+    },
+    "exit_pricing": {
+        "price_side": "other",
+        "use_order_book": true,
+        "order_book_top": 1
+    },
+    "pairlists": [
+        {
+            "method": "StaticPairList"
+        }
+    ],
+    "freqai": {
+        "startup_candles": 10000,
+        "timeframes": [
+            "5m",
+            "15m",
+            "4h"
+        ],
+        "train_period": 30,
+        "backtest_period": 7,
+        "identifier": "example",
+        "live_trained_timestamp": 0,
+        "corr_pairlist": [
+            "BTC/USDT",
+            "ETH/USDT",
+            "DOT/USDT",
+            "MATIC/USDT",
+            "SOL/USDT"
+        ],
+        "feature_parameters": {
+            "period": 500,
+            "shift": 1,
+            "DI_threshold": 0,
+            "weight_factor": 0.9,
+            "principal_component_analysis": false,
+            "use_SVM_to_remove_outliers": false,
+            "stratify": 0,
+            "indicator_max_period": 50,
+            "indicator_periods": [10, 20]
+        },
+        "data_split_parameters": {
+            "test_size": 0.33,
+            "random_state": 1
+        },
+        "model_training_parameters": {
+            "n_estimators": 1000,
+            "task_type": "CPU"
+        }
+    },
+    "bot_name": "",
+    "initial_state": "running",
+    "forcebuy_enable": false,
+    "internals": {
+        "process_throttle_secs": 5
+    }
+}
diff --git a/docker/Dockerfile.freqai b/docker/Dockerfile.freqai
new file mode 100644
index 000000000..98e6e31d5
--- /dev/null
+++ b/docker/Dockerfile.freqai
@@ -0,0 +1,17 @@
+ARG sourceimage=freqtradeorg/freqtrade
+ARG sourcetag=develop
+FROM ${sourceimage}:${sourcetag}
+
+USER root
+
+RUN apt-get install -y libgomp1
+
+USER ftuser
+
+# Install dependencies
+COPY requirements-freqai.txt /freqtrade/
+
+RUN pip install -r requirements-freqai.txt --user --no-cache-dir
+# Temporary step - as the source image will contain the wrong (non-freqai) sourcecode
+COPY --chown=ftuser:ftuser . /freqtrade/
+
diff --git a/docs/assets/weights_factor.png b/docs/assets/weights_factor.png
new file mode 100644
index 000000000..1171a49ba
Binary files /dev/null and b/docs/assets/weights_factor.png differ
diff --git a/docs/freqai.md b/docs/freqai.md
new file mode 100644
index 000000000..647ffcecd
--- /dev/null
+++ b/docs/freqai.md
@@ -0,0 +1,534 @@
+# Freqai
+
+!!! Note
+        Freqai is still experimental, and should be used at the user's own discretion.
+
+Freqai is a module designed to automate a variety of tasks associated with
+training a predictive model to provide signals based on input features.
+
+Among the the features included:
+
+* Easy large feature set construction based on simple user input
+* Sweep model training and backtesting to simulate consistent model retraining through time
+* Smart outlier removal of data points from prediction sets using a Dissimilarity Index.
+* Data dimensionality reduction with Principal Component Analysis
+* Automatic file management for storage of models to be reused during live
+* Smart and safe data standardization
+* Cleaning of NaNs from the data set before training and prediction.
+* Automated live retraining (still VERY experimental. Proceed with caution.)
+
+## General approach
+
+The user provides FreqAI with a set of custom indicators (created inside the strategy the same way
+a typical Freqtrade strategy is created) as well as a target value (typically some price change into
+the future). FreqAI trains a model to predict the target value based on the input of custom indicators. 
+FreqAI will train and save a new model for each pair in the config whitelist. 
+Users employ FreqAI to backtest a strategy (emulate reality with retraining a model as new data is 
+introduced) and run the model live to generate buy and sell signals. 
+
+## Background and vocabulary
+
+**Features** are the quantities with which a model is trained. $X_i$ represents the
+vector of all features for a single candle. In Freqai, the user
+builds the features from anything they can construct in the strategy.
+
+**Labels** are the target values with which the weights inside a model are trained
+toward. Each set of features is associated with a single label, which is also
+defined within the strategy by the user. These labels look forward into the
+future, and are not available to the model during dryrun/live/backtesting.
+
+**Training** refers to the process of feeding individual feature sets into the
+model with associated labels with the goal of matching input feature sets to
+associated labels.
+
+**Train data** is a subset of the historic data which is fed to the model during
+training to adjust weights. This data directly influences weight connections
+in the model.
+
+**Test data** is a subset of the historic data which is used to evaluate the
+intermediate performance of the model during training. This data does not
+directly influence nodal weights within the model.
+
+## Install prerequisites
+
+Use `pip` to install the prerequisites with:
+
+`pip install -r requirements-freqai.txt`
+
+## Running from the example files
+
+An example strategy, an example prediction model, and example config can all be found in 
+`freqtrade/templates/ExampleFreqaiStrategy.py`, 
+`freqtrade/freqai/prediction_models/CatboostPredictionModel.py`, 
+`config_examples/config_freqai.example.json`, respectively. Assuming the user has downloaded
+the necessary data, Freqai can be executed from these templates with:
+
+```bash
+freqtrade backtesting --config config_examples/config_freqai.example.json --strategy FreqaiExampleStrategy --freqaimodel CatboostPredictionModel --strategy-path freqtrade/templates --timerange 20220101-20220201
+```
+
+## Configuring the bot
+
+### Example config file
+
+The user interface is isolated to the typical config file. A typical Freqai
+config setup includes:
+
+```json
+    "freqai": {
+                "startup_candles": 10000,
+                "timeframes" : ["5m","15m","4h"],
+                "train_period" : 30,
+                "backtest_period" : 7,
+                "identifier" :  "unique-id",
+                "corr_pairlist": [
+                        "ETH/USD",
+                        "LINK/USD",
+                        "BNB/USD"
+                ],
+                "feature_parameters" : {
+                        "period": 24,
+                        "shift": 2,
+                        "weight_factor":  0,
+                },
+                "data_split_parameters" : {
+                    "test_size": 0.25,
+                    "random_state": 42
+                },
+                "model_training_parameters" : {
+                    "n_estimators": 100,
+                    "random_state": 42,
+                    "learning_rate": 0.02,
+                    "task_type": "CPU",
+                },
+        }
+```
+
+### Building the feature set
+
+!! slightly out of date, please refer to templates/FreqaiExampleStrategy.py for updated method !!
+Features are added by the user inside the `populate_any_indicators()` method of the strategy 
+by prepending indicators with `%`:
+
+```python
+    def populate_any_indicators(self, metadata, pair, df, tf, informative=None, coin=""):
+        informative['%-' + coin + "rsi"] = ta.RSI(informative, timeperiod=14)
+        informative['%-' + coin + "mfi"] = ta.MFI(informative, timeperiod=25)
+        informative['%-' + coin + "adx"] = ta.ADX(informative, window=20)
+        bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(informative), window=14, stds=2.2)
+        informative[coin + "bb_lowerband"] = bollinger["lower"]
+        informative[coin + "bb_middleband"] = bollinger["mid"]
+        informative[coin + "bb_upperband"] = bollinger["upper"]
+        informative['%-' + coin + "bb_width"] = (
+            informative[coin + "bb_upperband"] - informative[coin + "bb_lowerband"]
+        ) / informative[coin + "bb_middleband"]
+
+
+        
+        # The following code automatically adds features according to the `shift` parameter passed
+        # in the config. Do not remove
+        indicators = [col for col in informative if col.startswith('%')]
+        for n in range(self.freqai_info["feature_parameters"]["shift"] + 1):
+            if n == 0:
+                continue
+            informative_shift = informative[indicators].shift(n)
+            informative_shift = informative_shift.add_suffix("_shift-" + str(n))
+            informative = pd.concat((informative, informative_shift), axis=1)
+
+        # The following code safely merges into the base timeframe.
+        # Do not remove.
+        df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True)
+        skip_columns = [(s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]]
+        df = df.drop(columns=skip_columns)
+```
+The user of the present example does not want to pass the `bb_lowerband` as a feature to the model, 
+and has therefore not prepended it with `%`. The user does, however, wish to pass `bb_width` to the
+model for training/prediction and has therfore prepended it with `%`._
+
+Note: features **must** be defined in `populate_any_indicators()`. Making features in `populate_indicators()`
+will fail in live/dry. If the user wishes to add generalized features that are not associated with 
+a specific pair or timeframe, they should use the following structure inside `populate_any_indicators()`
+(as exemplified in `freqtrade/templates/FreqaiExampleStrategy.py`:
+
+```python
+    def populate_any_indicators(self, metadata, pair, df, tf, informative=None, coin=""):
+
+
+        # Add generalized indicators here (because in live, it will call only this function to populate 
+        # indicators for retraining). Notice how we ensure not to add them multiple times by associating
+        # these generalized indicators to the basepair/timeframe
+        if pair == metadata['pair'] and tf == self.timeframe:
+            df['%-day_of_week'] = (df["date"].dt.dayofweek + 1) / 7
+            df['%-hour_of_day'] = (df['date'].dt.hour + 1) / 25
+```
+
+(Please see the example script located in `freqtrade/templates/FreqaiExampleStrategy.py` for a full example of `populate_any_indicators()`)
+
+The `timeframes` from the example config above are the timeframes of each `populate_any_indicator()`
+ included metric for inclusion in the feature set. In the present case, the user is asking for the
+`5m`, `15m`, and `4h` timeframes of the `rsi`, `mfi`, `roc`, and `bb_width` to be included
+in the feature set.
+
+In addition, the user can ask for each of these features to be included from
+informative pairs using the `corr_pairlist`. This means that the present feature
+set will include all the `base_features` on all the `timeframes` for each of
+`ETH/USD`, `LINK/USD`, and `BNB/USD`.
+
+`shift` is another user controlled parameter which indicates the number of previous
+candles to include in the present feature set. In other words, `shift: 2`, tells
+Freqai to include the the past 2 candles for each of the features included
+in the dataset.
+
+In total, the number of features the present user has created is:_
+
+no. `timeframes` * no. `base_features` * no. `corr_pairlist` * no. `shift`_
+3 * 3 * 3 * 2 = 54._
+
+### Deciding the sliding training window and backtesting duration
+
+Users define the backtesting timerange with the typical `--timerange` parameter in the user
+configuration file. `train_period` is the duration of the sliding training window, while
+`backtest_period` is the sliding backtesting window, both in number of days (backtest_period can be
+a float to indicate sub daily retraining in live/dry mode). In the present example,
+the user is asking Freqai to use a training period of 30 days and backtest the subsequent 7 days.
+This means that if the user sets `--timerange 20210501-20210701`, 
+Freqai will train 8 separate models (because the full range comprises 8 weeks),
+and then backtest the subsequent week associated with each of the 8 training
+data set timerange months. Users can think of this as a "sliding window" which
+emulates Freqai retraining itself once per week in live using the previous
+month of data._
+
+In live, the required training data is automatically computed and downloaded. However, in backtesting
+the user must manually enter the required number of `startup_candles` in the config. This value
+is used to increase the available data to FreqAI and should be sufficient to enable all indicators 
+to be NaN free at the beginning of the first training timerange. This boils down to identifying the 
+highest timeframe (`4h` in present example)  and the longest indicator period (25 in present example)
+and adding this to the `train_period`. The units need to be in the base candle time frame:_
+
+`startup_candles` = ( 4 hours * 25 max period * 60 minutes/hour + 30 day train_period * 1440 minutes per day ) / 5 min (base time frame) = 1488.
+
+!!! Note: in dry/live, this is all precomputed and handled automatically. Thus, `startup_candle` has no 
+influence on dry/live.
+
+## Running Freqai
+
+### Training and backtesting
+
+The freqai training/backtesting module can be executed with the following command:
+
+```bash
+freqtrade backtesting --strategy FreqaiExampleStrategy --config config_freqai.example.json --freqaimodel CatboostPredictionModel --timerange 20210501-20210701
+```
+
+If this command has never been executed with the existing config file, then it will train a new model 
+for each pair, for each backtesting window within the bigger `--timerange`._
+
+---
+**NOTE**
+Once the training is completed, the user can execute this again with the same config file and 
+FreqAI will find the trained models and load them instead of spending time training. This is useful 
+if the user wants to tweak (or even hyperopt) buy and sell criteria inside the strategy. IF the user
+*wants* to retrain a new model with the same config file, then he/she should simply change the `identifier`. 
+This way, the user can return to using any model they wish by simply changing the `identifier`.
+
+---
+
+### Building a freqai strategy
+
+The Freqai strategy requires the user to include the following lines of code in the strategy:
+
+```python
+    from freqtrade.freqai.strategy_bridge import CustomModel
+
+    def informative_pairs(self):
+        whitelist_pairs = self.dp.current_whitelist()
+        corr_pairs = self.config["freqai"]["corr_pairlist"]
+        informative_pairs = []
+        for tf in self.config["freqai"]["timeframes"]:
+            for pair in whitelist_pairs:
+                informative_pairs.append((pair, tf))
+            for pair in corr_pairs:
+                if pair in whitelist_pairs:
+                    continue  # avoid duplication
+                informative_pairs.append((pair, tf))
+        return informative_pairs
+
+    def bot_start(self):
+        self.model = CustomModel(self.config)
+
+    def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+            self.freqai_info = self.config['freqai']
+
+            # the following loops are necessary for building the features 
+            # indicated by the user in the configuration file.
+            for tf in self.freqai_info['timeframes']:
+                    for i in self.freqai_info['corr_pairlist']:
+                    dataframe = self.populate_any_indicators(i,
+                                    dataframe.copy(), tf, coin=i.split("/")[0]+'-')
+
+            # the model will return 4 values, its prediction, an indication of whether or not the prediction 
+            # should be accepted, the target mean/std values from the labels used during each training period.
+            (dataframe['prediction'], dataframe['do_predict'], 
+                    dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata)
+
+            return dataframe
+```
+
+The user should also include `populate_any_indicators()` from `templates/FreqaiExampleStrategy.py` which builds 
+the feature set with a proper naming convention for the IFreqaiModel to use later.
+
+### Building an IFreqaiModel
+
+Freqai has an example prediction model based on the popular `Catboost` regression (`freqai/prediction_models/CatboostPredictionModel.py`). However, users can customize and create
+their own prediction models using the `IFreqaiModel` class. Users are encouraged to inherit `train()`, `predict()`, 
+and `make_labels()` to let them customize various aspects of their training procedures.
+
+### Running the model live
+
+Freqai can be run dry/live using the following command
+
+```bash
+freqtrade trade --strategy FreqaiExampleStrategy --config config_freqai.example.json --freqaimodel ExamplePredictionModel
+```
+
+By default, Freqai will not find find any existing models and will start by training a new one 
+given the user configuration settings. Following training, it will use that model to predict for the
+duration of `backtest_period`. After a full `backtest_period` has elapsed, Freqai will auto retrain 
+a new model, and begin making predictions with the updated model. FreqAI backtesting and live both
+permit the user to use fractional days (i.e. 0.1) in the `backtest_period`, which enables more frequent 
+retraining. But the user should be careful that using a fractional `backtest_period` with a large
+`--timerange` in backtesting will result in a huge amount of required trainings/models.
+
+If the user wishes to start dry/live from a backtested saved model, the user only needs to reuse
+the same `identifier` parameter
+
+```json
+    "freqai": {
+        "identifier": "example",
+    }
+```
+
+In this case, although Freqai will initiate with a 
+pre-trained model, it will still check to see how much time has elapsed since the model was trained,
+and if a full `backtest_period` has elapsed since the end of the loaded model, FreqAI will self retrain.
+
+## Data anylsis techniques
+
+### Controlling the model learning process
+
+The user can define model settings for the data split `data_split_parameters` and learning parameters
+`model_training_parameters`. Users are encouraged to visit the Catboost documentation
+for more information on how to select these values. `n_estimators` increases the
+computational effort and the fit to the training data. If a user has a GPU
+installed in their system, they may benefit from changing `task_type` to `GPU`.
+The `weight_factor` allows the user to weight more recent data more strongly
+than past data via an exponential function:
+
+$$ W_i = \exp(\frac{-i}{\alpha*n}) $$
+
+where $W_i$ is the weight of data point $i$ in a total set of $n$ data points._
+
+![weight-factor](assets/weights_factor.png)
+
+Finally, `period` defines the offset used for the `labels`. In the present example,
+the user is asking for `labels` that are 24 candles in the future.
+
+### Removing outliers with the Dissimilarity Index
+
+The Dissimilarity Index (DI) aims to quantify the uncertainty associated with each
+prediction by the model. To do so, Freqai measures the distance between each training
+data point and all other training data points:
+
+$$ d_{ab} = \sqrt{\sum_{j=1}^p(X_{a,j}-X_{b,j})^2} $$
+
+where $d_{ab}$ is the distance between the normalized points $a$ and $b$. $p$
+is the number of features i.e. the length of the vector $X$. The
+characteristic distance, $\overline{d}$ for a set of training data points is simply the mean
+of the average distances:
+
+$$ \overline{d} = \sum_{a=1}^n(\sum_{b=1}^n(d_{ab}/n)/n) $$
+
+$\overline{d}$ quantifies the spread of the training data, which is compared to
+the distance between the new prediction feature vectors, $X_k$ and all the training
+data:
+
+$$ d_k = \argmin_i d_{k,i} $$
+
+which enables the estimation of a Dissimilarity Index:
+
+$$ DI_k = d_k/\overline{d} $$
+
+Equity and crypto markets suffer from a high level of non-patterned noise in the
+form of outlier data points. The dissimilarity index allows predictions which
+are outliers and not existent in the model feature space, to be thrown out due
+to low levels of certainty. Activating the Dissimilarity Index can be achieved with:
+
+```json
+    "freqai": {
+        "feature_parameters" : {
+                "DI_threshold": 1
+        }
+    }
+```
+
+The user can tweak the DI with `DI_threshold` to increase or decrease the extrapolation of the 
+trained model.
+
+### Reducing data dimensionality with Principal Component Analysis
+
+Users can reduce the dimensionality of their features by activating the `principal_component_analysis`:
+
+```json
+    "freqai": {
+        "feature_parameters" : {
+                "principal_component_analysis": true
+        }
+    }
+```
+
+Which will perform PCA on the features and reduce the dimensionality of the data so that the explained
+variance of the data set is >= 0.999.
+
+### Removing outliers using a Support Vector Machine (SVM)
+
+The user can tell Freqai to remove outlier data points from the training/test data sets by setting:
+
+```json
+    "freqai": {
+        "feature_parameters" : {
+            "use_SVM_to_remove_outliers: true
+        }
+    }
+```
+
+Freqai will train an SVM on the training data (or components if the user activated
+`principal_component_analysis`) and remove any data point that it deems to be sit beyond the 
+feature space.
+
+### Stratifying the data
+
+The user can stratify the training/testing data using:
+
+```json
+    "freqai": {
+        "feature_parameters" : {
+            "stratify": 3
+        }
+    }
+```
+
+which will split the data chronologically so that every Xth data points is a testing data point. In the
+present example, the user is asking for every third data point in the dataframe to be used for 
+testing, the other points are used for training. 
+
+### Setting up a follower
+
+The user can define:
+
+```json
+    "freqai": {
+        "follow_mode": true,
+        "identifier": "example"
+    }
+```
+
+to indicate to the bot that it should not train models, but instead should look for models trained 
+by a leader with the same `identifier`. In this example, the user has a leader bot with the 
+`identifier: "example"` already running or launching simultaneously as the present follower. 
+The follower will load models created by the leader and inference them to obtain predictions.
+
+### Purging old model data
+
+FreqAI stores new model files each time it retrains. These files become obsolete as new models 
+are trained and FreqAI adapts to the new market conditions. Users planning to leave FreqAI running 
+for extended periods of time with high frequency retraining should set `purge_old_models` in their 
+config:
+
+```json
+    "freqai": {
+        "purge_old_models": true,
+    }
+```
+
+which will automatically purge all models older than the two most recently trained ones.
+
+## Defining model expirations
+
+During dry/live, FreqAI trains each pair sequentially (on separate threads/GPU from the main
+Freqtrade bot). This means there is always an age discrepancy between models. If a user is training
+on 50 pairs, and each pair requires 5 minutes to train, the oldest model will be over 4 hours old. 
+This may be undesirable if the characteristic time scale (read trade duration target) for a strategy 
+is much less than 4 hours. The user can decide to only make trade entries if the model is less than 
+a certain number of hours in age by setting the `expiration_hours` in the config file:
+
+```json
+    "freqai": {
+        "expiration_hours": 0.5,
+    }
+```
+
+In the present example, the user will only allow predictions on models that are less than 1/2 hours
+old. 
+
+<!-- ## Dynamic target expectation
+
+The labels used for model training have a unique statistical distribution for each separate model training. 
+We can use this information to know if our current prediction is in the realm of what the model was trained on, 
+and if so, what is the statistical probability of the current prediction. With this information, we can
+make more informed prediction._
+FreqAI builds this label distribution and provides a quantile to the strategy, which can be optionally used as a
+dynamic threshold. The `target_quantile: X` means that X% of the labels are below this value. So setting:
+
+```json
+    "freqai": {
+        "feature_parameters" : {
+            "target_quantile": 0.9
+        }
+    }
+```
+
+Means the user will get back in the strategy the label threshold at which 90% of the labels were 
+below this value. An example usage in the strategy may look something like:
+
+```python
+
+    def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+
+        # ... #
+
+        (
+            dataframe["prediction"],
+            dataframe["do_predict"],
+            dataframe["target_upper_quantile"],
+            dataframe["target_lower_quantile"],
+        ) = self.model.bridge.start(dataframe, metadata, self)
+
+        return dataframe
+
+    def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+
+        buy_conditions = [
+            (dataframe["prediction"] > dataframe["target_upper_quantile"]) & (dataframe["do_predict"] == 1)
+        ]
+
+        if buy_conditions:
+            dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), "buy"] = 1
+
+        return dataframe
+
+``` -->
+
+
+
+## Additional information
+
+### Feature normalization
+
+The feature set created by the user is automatically normalized to the training
+data only. This includes all test data and unseen prediction data (dry/live/backtest).
+
+### File structure
+
+`user_data_dir/models/` contains all the data associated with the trainings and
+backtests. This file structure is heavily controlled and read by the `FreqaiDataKitchen()`
+and should thus not be modified.
diff --git a/freqtrade/commands/arguments.py b/freqtrade/commands/arguments.py
index 1e3e2845a..661fc6fcb 100644
--- a/freqtrade/commands/arguments.py
+++ b/freqtrade/commands/arguments.py
@@ -12,7 +12,8 @@ from freqtrade.constants import DEFAULT_CONFIG
 
 ARGS_COMMON = ["verbosity", "logfile", "version", "config", "datadir", "user_data_dir"]
 
-ARGS_STRATEGY = ["strategy", "strategy_path", "recursive_strategy_search"]
+ARGS_STRATEGY = ["strategy", "strategy_path", "recursive_strategy_search", "freqaimodel",
+                 "freqaimodel_path"]
 
 ARGS_TRADE = ["db_url", "sd_notify", "dry_run", "dry_run_wallet", "fee"]
 
diff --git a/freqtrade/commands/cli_options.py b/freqtrade/commands/cli_options.py
index 3370ce64b..f85b75af1 100644
--- a/freqtrade/commands/cli_options.py
+++ b/freqtrade/commands/cli_options.py
@@ -647,4 +647,14 @@ AVAILABLE_CLI_OPTIONS = {
         nargs='+',
         default=[],
     ),
+    "freqaimodel": Arg(
+        '--freqaimodel',
+        help='Specify a custom freqaimodels.',
+        metavar='NAME',
+    ),
+    "freqaimodel_path": Arg(
+        '--freqaimodel-path',
+        help='Specify additional lookup path for freqaimodels.',
+        metavar='PATH',
+    ),
 }
diff --git a/freqtrade/commands/data_commands.py b/freqtrade/commands/data_commands.py
index 61a99782e..311590e64 100644
--- a/freqtrade/commands/data_commands.py
+++ b/freqtrade/commands/data_commands.py
@@ -12,7 +12,7 @@ from freqtrade.enums import CandleType, RunMode, TradingMode
 from freqtrade.exceptions import OperationalException
 from freqtrade.exchange import timeframe_to_minutes
 from freqtrade.exchange.exchange import market_is_active
-from freqtrade.plugins.pairlist.pairlist_helpers import expand_pairlist
+from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist, expand_pairlist
 from freqtrade.resolvers import ExchangeResolver
 
 
@@ -50,7 +50,8 @@ def start_download_data(args: Dict[str, Any]) -> None:
     exchange = ExchangeResolver.load_exchange(config['exchange']['name'], config, validate=False)
     markets = [p for p, m in exchange.markets.items() if market_is_active(m)
                or config.get('include_inactive')]
-    expanded_pairs = expand_pairlist(config['pairs'], markets)
+
+    expanded_pairs = dynamic_expand_pairlist(config, markets)
 
     # Manual validations of relevant settings
     if not config['exchange'].get('skip_pair_validation', False):
diff --git a/freqtrade/configuration/config_validation.py b/freqtrade/configuration/config_validation.py
index ee846e7e6..5f1f68554 100644
--- a/freqtrade/configuration/config_validation.py
+++ b/freqtrade/configuration/config_validation.py
@@ -85,6 +85,7 @@ def validate_config_consistency(conf: Dict[str, Any], preliminary: bool = False)
     _validate_unlimited_amount(conf)
     _validate_ask_orderbook(conf)
     validate_migrated_strategy_settings(conf)
+    _validate_freqai(conf)
 
     # validate configuration before returning
     logger.info('Validating configuration ...')
@@ -163,6 +164,21 @@ def _validate_edge(conf: Dict[str, Any]) -> None:
         )
 
 
+def _validate_freqai(conf: Dict[str, Any]) -> None:
+    """
+    Freqai param validator
+    """
+
+    if not conf.get('freqai', {}):
+        return
+
+    for param in constants.SCHEMA_FREQAI_REQUIRED:
+        if param not in conf.get('freqai', {}):
+            raise OperationalException(
+                f'{param} not found in Freqai config'
+            )
+
+
 def _validate_whitelist(conf: Dict[str, Any]) -> None:
     """
     Dynamic whitelist does not require pair_whitelist to be set - however StaticWhitelist does.
diff --git a/freqtrade/configuration/configuration.py b/freqtrade/configuration/configuration.py
index d46d54cb0..b4f36aa3c 100644
--- a/freqtrade/configuration/configuration.py
+++ b/freqtrade/configuration/configuration.py
@@ -97,6 +97,8 @@ class Configuration:
 
         self._process_analyze_options(config)
 
+        self._process_freqai_options(config)
+
         # Check if the exchange set by the user is supported
         check_exchange(config, config.get('experimental', {}).get('block_bad_exchanges', True))
 
@@ -461,6 +463,16 @@ class Configuration:
 
         config.update({'runmode': self.runmode})
 
+    def _process_freqai_options(self, config: Dict[str, Any]) -> None:
+
+        self._args_to_config(config, argname='freqaimodel',
+                             logstring='Using freqaimodel class name: {}')
+
+        self._args_to_config(config, argname='freqaimodel_path',
+                             logstring='Using freqaimodel path: {}')
+
+        return
+
     def _args_to_config(self, config: Dict[str, Any], argname: str,
                         logstring: str, logfun: Optional[Callable] = None,
                         deprecated_msg: Optional[str] = None) -> None:
diff --git a/freqtrade/constants.py b/freqtrade/constants.py
index 18dbea259..1b5f01d8b 100644
--- a/freqtrade/constants.py
+++ b/freqtrade/constants.py
@@ -55,6 +55,7 @@ FTHYPT_FILEVERSION = 'fthypt_fileversion'
 USERPATH_HYPEROPTS = 'hyperopts'
 USERPATH_STRATEGIES = 'strategies'
 USERPATH_NOTEBOOKS = 'notebooks'
+USERPATH_FREQAIMODELS = 'freqaimodels'
 
 TELEGRAM_SETTING_OPTIONS = ['on', 'off', 'silent']
 WEBHOOK_FORMAT_OPTIONS = ['form', 'json', 'raw']
@@ -472,7 +473,44 @@ CONF_SCHEMA = {
                 'remove_pumps': {'type': 'boolean'}
             },
             'required': ['process_throttle_secs', 'allowed_risk']
-        }
+        },
+        "freqai": {
+            "type": "object",
+            "properties": {
+                "timeframes": {"type": "list"},
+                "train_period": {"type": "integer", "default": 0},
+                "backtest_period": {"type": "float", "default": 7},
+                "identifier": {"type": "str", "default": "example"},
+                "corr_pairlist": {"type": "list"},
+                "feature_parameters": {
+                    "type": "object",
+                    "properties": {
+                        "period": {"type": "integer"},
+                        "shift": {"type": "integer", "default": 0},
+                        "DI_threshold": {"type": "float", "default": 0},
+                        "weight_factor": {"type": "number", "default": 0},
+                        "principal_component_analysis": {"type": "boolean", "default": False},
+                        "use_SVM_to_remove_outliers": {"type": "boolean", "default": False},
+                    },
+                },
+                "data_split_parameters": {
+                    "type": "object",
+                    "properties": {
+                        "test_size": {"type": "number"},
+                        "random_state": {"type": "integer"},
+                    },
+                },
+                "model_training_parameters": {
+                    "type": "object",
+                    "properties": {
+                        "n_estimators": {"type": "integer", "default": 2000},
+                        "random_state": {"type": "integer", "default": 1},
+                        "learning_rate": {"type": "number", "default": 0.02},
+                        "task_type": {"type": "string", "default": "CPU"},
+                    },
+                },
+            },
+        },
     },
 }
 
@@ -516,6 +554,17 @@ SCHEMA_MINIMAL_REQUIRED = [
     'dataformat_trades',
 ]
 
+SCHEMA_FREQAI_REQUIRED = [
+    'timeframes',
+    'train_period',
+    'backtest_period',
+    'identifier',
+    'corr_pairlist',
+    'feature_parameters',
+    'data_split_parameters',
+    'model_training_parameters'
+]
+
 CANCEL_REASON = {
     "TIMEOUT": "cancelled due to timeout",
     "PARTIALLY_FILLED_KEEP_OPEN": "partially filled - keeping order open",
diff --git a/freqtrade/exchange/exchange.py b/freqtrade/exchange/exchange.py
index 4febe5652..dc378a438 100644
--- a/freqtrade/exchange/exchange.py
+++ b/freqtrade/exchange/exchange.py
@@ -86,7 +86,7 @@ class Exchange:
         # TradingMode.SPOT always supported and not required in this list
     ]
 
-    def __init__(self, config: Dict[str, Any], validate: bool = True) -> None:
+    def __init__(self, config: Dict[str, Any], validate: bool = True, freqai: bool = False) -> None:
         """
         Initializes this module with the given config,
         it does basic validation whether the specified exchange and pairs are valid.
@@ -196,7 +196,7 @@ class Exchange:
         self.markets_refresh_interval: int = exchange_config.get(
             "markets_refresh_interval", 60) * 60
 
-        if self.trading_mode != TradingMode.SPOT:
+        if self.trading_mode != TradingMode.SPOT and freqai is False:
             self.fill_leverage_tiers()
         self.additional_exchange_init()
 
diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py
new file mode 100644
index 000000000..3099c98a2
--- /dev/null
+++ b/freqtrade/freqai/data_drawer.py
@@ -0,0 +1,314 @@
+import collections
+import json
+import logging
+import re
+import shutil
+import threading
+from pathlib import Path
+from typing import Any, Dict, Tuple
+
+# import pickle as pk
+import numpy as np
+import pandas as pd
+from pandas import DataFrame
+
+
+# from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
+
+
+logger = logging.getLogger(__name__)
+
+
+class FreqaiDataDrawer:
+    """
+    Class aimed at holding all pair models/info in memory for better inferencing/retrainig/saving
+    /loading to/from disk.
+    This object remains persistent throughout live/dry, unlike FreqaiDataKitchen, which is
+    reinstantiated for each coin.
+    """
+
+    def __init__(self, full_path: Path, config: dict, follow_mode: bool = False):
+
+        self.config = config
+        self.freqai_info = config.get("freqai", {})
+        # dictionary holding all pair metadata necessary to load in from disk
+        self.pair_dict: Dict[str, Any] = {}
+        # dictionary holding all actively inferenced models in memory given a model filename
+        self.model_dictionary: Dict[str, Any] = {}
+        self.model_return_values: Dict[str, Any] = {}
+        self.pair_data_dict: Dict[str, Any] = {}
+        self.historic_data: Dict[str, Any] = {}
+        self.follower_dict: Dict[str, Any] = {}
+        self.full_path = full_path
+        self.follow_mode = follow_mode
+        if follow_mode:
+            self.create_follower_dict()
+        self.load_drawer_from_disk()
+        self.training_queue: Dict[str, int] = {}
+        self.history_lock = threading.Lock()
+
+    def load_drawer_from_disk(self):
+        """
+        Locate and load a previously saved data drawer full of all pair model metadata in
+        present model folder.
+        :returns:
+        exists: bool = whether or not the drawer was located
+        """
+        exists = Path(self.full_path / str("pair_dictionary.json")).resolve().exists()
+        if exists:
+            with open(self.full_path / str("pair_dictionary.json"), "r") as fp:
+                self.pair_dict = json.load(fp)
+        elif not self.follow_mode:
+            logger.info("Could not find existing datadrawer, starting from scratch")
+        else:
+            logger.warning(
+                f"Follower could not find pair_dictionary at {self.full_path} "
+                "sending null values back to strategy"
+            )
+
+        return exists
+
+    def save_drawer_to_disk(self):
+        """
+        Save data drawer full of all pair model metadata in present model folder.
+        """
+        with open(self.full_path / str("pair_dictionary.json"), "w") as fp:
+            json.dump(self.pair_dict, fp, default=self.np_encoder)
+
+    def save_follower_dict_to_disk(self):
+        """
+        Save follower dictionary to disk (used by strategy for persistent prediction targets)
+        """
+        follower_name = self.config.get("bot_name", "follower1")
+        with open(
+            self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w"
+        ) as fp:
+            json.dump(self.follower_dict, fp, default=self.np_encoder)
+
+    def create_follower_dict(self):
+        """
+        Create or dictionary for each follower to maintain unique persistent prediction targets
+        """
+        follower_name = self.config.get("bot_name", "follower1")
+        whitelist_pairs = self.config.get("exchange", {}).get("pair_whitelist")
+
+        exists = (
+            Path(self.full_path / str("follower_dictionary-" + follower_name + ".json"))
+            .resolve()
+            .exists()
+        )
+
+        if exists:
+            logger.info("Found an existing follower dictionary")
+
+        for pair in whitelist_pairs:
+            self.follower_dict[pair] = {}
+
+        with open(
+            self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w"
+        ) as fp:
+            json.dump(self.follower_dict, fp, default=self.np_encoder)
+
+    def np_encoder(self, object):
+        if isinstance(object, np.generic):
+            return object.item()
+
+    def get_pair_dict_info(self, pair: str) -> Tuple[str, int, bool, bool]:
+        """
+        Locate and load existing model metadata from persistent storage. If not located,
+        create a new one and append the current pair to it and prepare it for its first
+        training
+        :params:
+        metadata: dict = strategy furnished pair metadata
+        :returns:
+        model_filename: str = unique filename used for loading persistent objects from disk
+        trained_timestamp: int = the last time the coin was trained
+        coin_first: bool = If the coin is fresh without metadata
+        return_null_array: bool = Follower could not find pair metadata
+        """
+        pair_in_dict = self.pair_dict.get(pair)
+        data_path_set = self.pair_dict.get(pair, {}).get("data_path", None)
+        return_null_array = False
+
+        if pair_in_dict:
+            model_filename = self.pair_dict[pair]["model_filename"]
+            trained_timestamp = self.pair_dict[pair]["trained_timestamp"]
+            coin_first = self.pair_dict[pair]["first"]
+        elif not self.follow_mode:
+            self.pair_dict[pair] = {}
+            model_filename = self.pair_dict[pair]["model_filename"] = ""
+            coin_first = self.pair_dict[pair]["first"] = True
+            trained_timestamp = self.pair_dict[pair]["trained_timestamp"] = 0
+            self.pair_dict[pair]["priority"] = len(self.pair_dict)
+
+        if not data_path_set and self.follow_mode:
+            logger.warning(
+                f"Follower could not find current pair {pair} in "
+                f"pair_dictionary at path {self.full_path}, sending null values "
+                "back to strategy."
+            )
+            return_null_array = True
+
+        return model_filename, trained_timestamp, coin_first, return_null_array
+
+    def set_pair_dict_info(self, metadata: dict) -> None:
+        pair_in_dict = self.pair_dict.get(metadata["pair"])
+        if pair_in_dict:
+            return
+        else:
+            self.pair_dict[metadata["pair"]] = {}
+            self.pair_dict[metadata["pair"]]["model_filename"] = ""
+            self.pair_dict[metadata["pair"]]["first"] = True
+            self.pair_dict[metadata["pair"]]["trained_timestamp"] = 0
+            self.pair_dict[metadata["pair"]]["priority"] = len(self.pair_dict)
+            return
+
+    def pair_to_end_of_training_queue(self, pair: str) -> None:
+        # march all pairs up in the queue
+        for p in self.pair_dict:
+            self.pair_dict[p]["priority"] -= 1
+        # send pair to end of queue
+        self.pair_dict[pair]["priority"] = len(self.pair_dict)
+
+    def set_initial_return_values(self, pair: str, dk, pred_df, do_preds) -> None:
+        """
+        Set the initial return values to a persistent dataframe. This avoids needing to repredict on
+        historical candles, and also stores historical predictions despite retrainings (so stored
+        predictions are true predictions, not just inferencing on trained data)
+        """
+        self.model_return_values[pair] = pd.DataFrame()
+        for label in dk.label_list:
+            self.model_return_values[pair][label] = pred_df[label]
+            self.model_return_values[pair][f"{label}_mean"] = dk.data["labels_mean"][label]
+            self.model_return_values[pair][f"{label}_std"] = dk.data["labels_std"][label]
+
+        if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
+            self.model_return_values[pair]["DI_values"] = dk.DI_values
+
+        self.model_return_values[pair]["do_predict"] = do_preds
+
+    def append_model_predictions(self, pair: str, predictions, do_preds, dk, len_df) -> None:
+
+        # strat seems to feed us variable sized dataframes - and since we are trying to build our
+        # own return array in the same shape, we need to figure out how the size has changed
+        # and adapt our stored/returned info accordingly.
+        length_difference = len(self.model_return_values[pair]) - len_df
+        i = 0
+
+        if length_difference == 0:
+            i = 1
+        elif length_difference > 0:
+            i = length_difference + 1
+
+        df = self.model_return_values[pair] = self.model_return_values[pair].shift(-i)
+
+        for label in dk.label_list:
+            df[label].iloc[-1] = predictions[label].iloc[-1]
+            df[f"{label}_mean"].iloc[-1] = dk.data["labels_mean"][label]
+            df[f"{label}_std"].iloc[-1] = dk.data["labels_std"][label]
+        # df['prediction'].iloc[-1] = predictions[-1]
+        df["do_predict"].iloc[-1] = do_preds[-1]
+
+        if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
+            df["DI_values"].iloc[-1] = dk.DI_values[-1]
+
+        if length_difference < 0:
+            prepend_df = pd.DataFrame(
+                np.zeros((abs(length_difference) - 1, len(df.columns))), columns=df.columns
+            )
+            df = pd.concat([prepend_df, df], axis=0)
+
+    def attach_return_values_to_return_dataframe(self, pair: str, dataframe) -> DataFrame:
+        """
+        Attach the return values to the strat dataframe
+        :params:
+        dataframe: DataFrame = strat dataframe
+        :returns:
+        dataframe: DataFrame = strat dataframe with return values attached
+        """
+        df = self.model_return_values[pair]
+        to_keep = [col for col in dataframe.columns if not col.startswith("&")]
+        dataframe = pd.concat([dataframe[to_keep], df], axis=1)
+        return dataframe
+
+    def return_null_values_to_strategy(self, dataframe: DataFrame, dk) -> None:
+        """
+        Build 0 filled dataframe to return to strategy
+        """
+
+        dk.find_features(dataframe)
+
+        for label in dk.label_list:
+            dataframe[label] = 0
+            dataframe[f"{label}_mean"] = 0
+            dataframe[f"{label}_std"] = 0
+
+        # dataframe['prediction'] = 0
+        dataframe["do_predict"] = 0
+
+        if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
+            dataframe["DI_value"] = 0
+
+        dk.return_dataframe = dataframe
+
+    def purge_old_models(self) -> None:
+
+        model_folders = [x for x in self.full_path.iterdir() if x.is_dir()]
+
+        pattern = re.compile(r"sub-train-(\w+)(\d{10})")
+
+        delete_dict: Dict[str, Any] = {}
+
+        for dir in model_folders:
+            result = pattern.match(str(dir.name))
+            if result is None:
+                break
+            coin = result.group(1)
+            timestamp = result.group(2)
+
+            if coin not in delete_dict:
+                delete_dict[coin] = {}
+                delete_dict[coin]["num_folders"] = 1
+                delete_dict[coin]["timestamps"] = {int(timestamp): dir}
+            else:
+                delete_dict[coin]["num_folders"] += 1
+                delete_dict[coin]["timestamps"][int(timestamp)] = dir
+
+        for coin in delete_dict:
+            if delete_dict[coin]["num_folders"] > 2:
+                sorted_dict = collections.OrderedDict(
+                    sorted(delete_dict[coin]["timestamps"].items())
+                )
+                num_delete = len(sorted_dict) - 2
+                deleted = 0
+                for k, v in sorted_dict.items():
+                    if deleted >= num_delete:
+                        break
+                    logger.info(f"Freqai purging old model file {v}")
+                    shutil.rmtree(v)
+                    deleted += 1
+
+    def update_follower_metadata(self):
+        # follower needs to load from disk to get any changes made by leader to pair_dict
+        self.load_drawer_from_disk()
+        if self.config.get("freqai", {}).get("purge_old_models", False):
+            self.purge_old_models()
+
+    # to be used if we want to send predictions directly to the follower instead of forcing
+    # follower to load models and inference
+    # def save_model_return_values_to_disk(self) -> None:
+    #     with open(self.full_path / str('model_return_values.json'), "w") as fp:
+    #         json.dump(self.model_return_values, fp, default=self.np_encoder)
+
+    # def load_model_return_values_from_disk(self, dk: FreqaiDataKitchen) -> FreqaiDataKitchen:
+    #     exists = Path(self.full_path / str('model_return_values.json')).resolve().exists()
+    #     if exists:
+    #         with open(self.full_path / str('model_return_values.json'), "r") as fp:
+    #             self.model_return_values = json.load(fp)
+    #     elif not self.follow_mode:
+    #         logger.info("Could not find existing datadrawer, starting from scratch")
+    #     else:
+    #         logger.warning(f'Follower could not find pair_dictionary at {self.full_path} '
+    #                        'sending null values back to strategy')
+
+    #     return exists, dk
diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py
new file mode 100644
index 000000000..1f2ee61b3
--- /dev/null
+++ b/freqtrade/freqai/data_kitchen.py
@@ -0,0 +1,1305 @@
+import copy
+import datetime
+import json
+import logging
+import pickle as pk
+import shutil
+from pathlib import Path
+from typing import Any, Dict, List, Tuple
+
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+from joblib import dump, load  # , Parallel, delayed # used for auto distribution assignment
+from pandas import DataFrame
+from sklearn import linear_model
+from sklearn.metrics.pairwise import pairwise_distances
+from sklearn.model_selection import train_test_split
+
+from freqtrade.configuration import TimeRange
+from freqtrade.data.history import load_pair_history
+from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
+from freqtrade.exceptions import OperationalException
+from freqtrade.freqai.data_drawer import FreqaiDataDrawer
+from freqtrade.resolvers import ExchangeResolver
+from freqtrade.strategy.interface import IStrategy
+
+
+SECONDS_IN_DAY = 86400
+
+logger = logging.getLogger(__name__)
+
+
+class FreqaiDataKitchen:
+    """
+    Class designed to analyze data for a single pair. Employed by the IFreqaiModel class.
+    Functionalities include holding, saving, loading, and analyzing the data.
+    author: Robert Caulk, rob.caulk@gmail.com
+    """
+
+    def __init__(
+        self,
+        config: Dict[str, Any],
+        data_drawer: FreqaiDataDrawer,
+        live: bool = False,
+        pair: str = "",
+    ):
+        self.data: Dict[Any, Any] = {}
+        self.data_dictionary: Dict[Any, Any] = {}
+        self.config = config
+        self.freqai_config = config["freqai"]
+        self.predictions: npt.ArrayLike = np.array([])
+        self.do_predict: npt.ArrayLike = np.array([])
+        self.target_mean: npt.ArrayLike = np.array([])
+        self.target_std: npt.ArrayLike = np.array([])
+        self.full_predictions: npt.ArrayLike = np.array([])
+        self.full_do_predict: npt.ArrayLike = np.array([])
+        self.full_DI_values: npt.ArrayLike = np.array([])
+        self.full_target_mean: npt.ArrayLike = np.array([])
+        self.full_target_std: npt.ArrayLike = np.array([])
+        self.data_path = Path()
+        self.label_list: List = []
+        self.model_filename: str = ""
+        self.live = live
+        self.pair = pair
+        self.svm_model: linear_model.SGDOneClassSVM = None
+        self.set_all_pairs()
+        if not self.live:
+            self.full_timerange = self.create_fulltimerange(
+                self.config["timerange"], self.freqai_config.get("train_period")
+            )
+
+            (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
+                self.full_timerange,
+                config["freqai"]["train_period"],
+                config["freqai"]["backtest_period"],
+            )
+        # self.strat_dataframe: DataFrame = strat_dataframe
+        self.dd = data_drawer
+
+    def set_paths(
+        self,
+        pair: str,
+        trained_timestamp: int = None,
+    ) -> None:
+        """
+        Set the paths to the data for the present coin/botloop
+        :params:
+        metadata: dict = strategy furnished pair metadata
+        trained_timestamp: int = timestamp of most recent training
+        """
+        self.full_path = Path(
+            self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier"))
+        )
+
+        self.data_path = Path(
+            self.full_path / str("sub-train" + "-" + pair.split("/")[0] + str(trained_timestamp))
+        )
+
+        return
+
+    def save_data(self, model: Any, coin: str = "", keras_model=False, label=None) -> None:
+        """
+        Saves all data associated with a model for a single sub-train time range
+        :params:
+        :model: User trained model which can be reused for inferencing to generate
+        predictions
+        """
+
+        if not self.data_path.is_dir():
+            self.data_path.mkdir(parents=True, exist_ok=True)
+
+        save_path = Path(self.data_path)
+
+        # Save the trained model
+        if not keras_model:
+            dump(model, save_path / f"{self.model_filename}_model.joblib")
+        else:
+            model.save(save_path / f"{self.model_filename}_model.h5")
+
+        if self.svm_model is not None:
+            dump(self.svm_model, save_path / str(self.model_filename + "_svm_model.joblib"))
+
+        self.data["data_path"] = str(self.data_path)
+        self.data["model_filename"] = str(self.model_filename)
+        self.data["training_features_list"] = list(self.data_dictionary["train_features"].columns)
+        self.data["label_list"] = self.label_list
+        # store the metadata
+        with open(save_path / str(self.model_filename + "_metadata.json"), "w") as fp:
+            json.dump(self.data, fp, default=self.np_encoder)
+
+        # save the train data to file so we can check preds for area of applicability later
+        self.data_dictionary["train_features"].to_pickle(
+            save_path / str(self.model_filename + "_trained_df.pkl")
+        )
+
+        if self.freqai_config.get("feature_parameters", {}).get("principal_component_analysis"):
+            pk.dump(
+                self.pca, open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "wb")
+            )
+
+        # if self.live:
+        self.dd.model_dictionary[self.model_filename] = model
+        self.dd.pair_dict[coin]["model_filename"] = self.model_filename
+        self.dd.pair_dict[coin]["data_path"] = str(self.data_path)
+        self.dd.save_drawer_to_disk()
+
+        # TODO add a helper function to let user save/load any data they are custom adding. We
+        # do not want them having to edit the default save/load methods here. Below is an example
+        # of what we do NOT want.
+
+        # if self.freqai_config.get('feature_parameters','determine_statistical_distributions'):
+        #     self.data_dictionary["upper_quantiles"].to_pickle(
+        #         save_path / str(self.model_filename + "_upper_quantiles.pkl")
+        #     )
+
+        #     self.data_dictionary["lower_quantiles"].to_pickle(
+        #         save_path / str(self.model_filename + "_lower_quantiles.pkl")
+        #     )
+
+        return
+
+    def load_data(self, coin: str = "", keras_model=False) -> Any:
+        """
+        loads all data required to make a prediction on a sub-train time range
+        :returns:
+        :model: User trained model which can be inferenced for new predictions
+        """
+
+        if not self.dd.pair_dict[coin]["model_filename"]:
+            return None
+
+        if self.live:
+            self.model_filename = self.dd.pair_dict[coin]["model_filename"]
+            self.data_path = Path(self.dd.pair_dict[coin]["data_path"])
+            if self.freqai_config.get("follow_mode", False):
+                # follower can be on a different system which is rsynced to the leader:
+                self.data_path = Path(
+                    self.config["user_data_dir"]
+                    / "models"
+                    / self.data_path.parts[-2]
+                    / self.data_path.parts[-1]
+                )
+
+        with open(self.data_path / str(self.model_filename + "_metadata.json"), "r") as fp:
+            self.data = json.load(fp)
+            self.training_features_list = self.data["training_features_list"]
+            self.label_list = self.data["label_list"]
+
+        self.data_dictionary["train_features"] = pd.read_pickle(
+            self.data_path / str(self.model_filename + "_trained_df.pkl")
+        )
+
+        # TODO add a helper function to let user save/load any data they are custom adding. We
+        # do not want them having to edit the default save/load methods here. Below is an example
+        # of what we do NOT want.
+
+        # if self.freqai_config.get('feature_parameters','determine_statistical_distributions'):
+        #     self.data_dictionary["upper_quantiles"] = pd.read_pickle(
+        #         self.data_path / str(self.model_filename + "_upper_quantiles.pkl")
+        #     )
+
+        #     self.data_dictionary["lower_quantiles"] = pd.read_pickle(
+        #         self.data_path / str(self.model_filename + "_lower_quantiles.pkl")
+        #     )
+
+        # self.data_path = Path(self.data["data_path"])
+        # self.model_filename = self.data["model_filename"]
+
+        # try to access model in memory instead of loading object from disk to save time
+        if self.live and self.model_filename in self.dd.model_dictionary:
+            model = self.dd.model_dictionary[self.model_filename]
+        elif not keras_model:
+            model = load(self.data_path / str(self.model_filename + "_model.joblib"))
+        else:
+            from tensorflow import keras
+
+            model = keras.models.load_model(self.data_path / str(self.model_filename + "_model.h5"))
+
+        if Path(self.data_path / str(self.model_filename + "_svm_model.joblib")).resolve().exists():
+            self.svm_model = load(self.data_path / str(self.model_filename + "_svm_model.joblib"))
+
+        if not model:
+            raise OperationalException(
+                f"Unable to load model, ensure model exists at " f"{self.data_path} "
+            )
+
+        if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
+            self.pca = pk.load(
+                open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "rb")
+            )
+
+        return model
+
+    def make_train_test_datasets(
+        self, filtered_dataframe: DataFrame, labels: DataFrame
+    ) -> Dict[Any, Any]:
+        """
+        Given the dataframe for the full history for training, split the data into
+        training and test data according to user specified parameters in configuration
+        file.
+        :filtered_dataframe: cleaned dataframe ready to be split.
+        :labels: cleaned labels ready to be split.
+        """
+
+        weights: npt.ArrayLike
+        if self.freqai_config["feature_parameters"].get("weight_factor", 0) > 0:
+            weights = self.set_weights_higher_recent(len(filtered_dataframe))
+        else:
+            weights = np.ones(len(filtered_dataframe))
+
+        if self.freqai_config["feature_parameters"].get("stratify", 0) > 0:
+            stratification = np.zeros(len(filtered_dataframe))
+            for i in range(1, len(stratification)):
+                if i % self.freqai_config.get("feature_parameters", {}).get("stratify", 0) == 0:
+                    stratification[i] = 1
+        else:
+            stratification = None
+
+        (
+            train_features,
+            test_features,
+            train_labels,
+            test_labels,
+            train_weights,
+            test_weights,
+        ) = train_test_split(
+            filtered_dataframe[: filtered_dataframe.shape[0]],
+            labels,
+            weights,
+            stratify=stratification,
+            # shuffle=False,
+            **self.config["freqai"]["data_split_parameters"],
+        )
+
+        return self.build_data_dictionary(
+            train_features, test_features, train_labels, test_labels, train_weights, test_weights
+        )
+
+    def filter_features(
+        self,
+        unfiltered_dataframe: DataFrame,
+        training_feature_list: List,
+        label_list: List = list(),
+        # labels: DataFrame = pd.DataFrame(),
+        training_filter: bool = True,
+    ) -> Tuple[DataFrame, DataFrame]:
+        """
+        Filter the unfiltered dataframe to extract the user requested features/labels and properly
+        remove all NaNs. Any row with a NaN is removed from training dataset or replaced with
+        0s in the prediction dataset. However, prediction dataset do_predict will reflect any
+        row that had a NaN and will shield user from that prediction.
+        :params:
+        :unfiltered_dataframe: the full dataframe for the present training period
+        :training_feature_list: list, the training feature list constructed by
+        self.build_feature_list() according to user specified parameters in the configuration file.
+        :labels: the labels for the dataset
+        :training_filter: boolean which lets the function know if it is training data or
+        prediction data to be filtered.
+        :returns:
+        :filtered_dataframe: dataframe cleaned of NaNs and only containing the user
+        requested feature set.
+        :labels: labels cleaned of NaNs.
+        """
+        filtered_dataframe = unfiltered_dataframe.filter(training_feature_list, axis=1)
+        filtered_dataframe = filtered_dataframe.replace([np.inf, -np.inf], np.nan)
+
+        drop_index = pd.isnull(filtered_dataframe).any(1)  # get the rows that have NaNs,
+        drop_index = drop_index.replace(True, 1).replace(False, 0)  # pep8 requirement.
+        if (
+            training_filter
+        ):  # we don't care about total row number (total no. datapoints) in training, we only care
+            # about removing any row with NaNs
+            # if labels has multiple columns (user wants to train multiple models), we detect here
+            labels = unfiltered_dataframe.filter(label_list, axis=1)
+            drop_index_labels = pd.isnull(labels).any(1)
+            drop_index_labels = drop_index_labels.replace(True, 1).replace(False, 0)
+            filtered_dataframe = filtered_dataframe[
+                (drop_index == 0) & (drop_index_labels == 0)
+            ]  # dropping values
+            labels = labels[
+                (drop_index == 0) & (drop_index_labels == 0)
+            ]  # assuming the labels depend entirely on the dataframe here.
+            logger.info(
+                f"dropped {len(unfiltered_dataframe) - len(filtered_dataframe)} training points"
+                f" due to NaNs in populated dataset {len(unfiltered_dataframe)}."
+            )
+            if (1 - len(filtered_dataframe) / len(unfiltered_dataframe)) > 0.1 and self.live:
+                logger.warning(
+                    f" {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100:.2f} percent"
+                    " of training data dropped due to NaNs, model may perform inconsistent"
+                    "with expectations"
+                )
+            self.data["filter_drop_index_training"] = drop_index
+
+        else:
+            # we are backtesting so we need to preserve row number to send back to strategy,
+            # so now we use do_predict to avoid any prediction based on a NaN
+            drop_index = pd.isnull(filtered_dataframe).any(1)
+            self.data["filter_drop_index_prediction"] = drop_index
+            filtered_dataframe.fillna(0, inplace=True)
+            # replacing all NaNs with zeros to avoid issues in 'prediction', but any prediction
+            # that was based on a single NaN is ultimately protected from buys with do_predict
+            drop_index = ~drop_index
+            self.do_predict = np.array(drop_index.replace(True, 1).replace(False, 0))
+            if (len(self.do_predict) - self.do_predict.sum()) > 0:
+                logger.info(
+                    "dropped %s of %s prediction data points due to NaNs.",
+                    len(self.do_predict) - self.do_predict.sum(),
+                    len(filtered_dataframe),
+                )
+            labels = []
+
+        return filtered_dataframe, labels
+
+    def build_data_dictionary(
+        self,
+        train_df: DataFrame,
+        test_df: DataFrame,
+        train_labels: DataFrame,
+        test_labels: DataFrame,
+        train_weights: Any,
+        test_weights: Any,
+    ) -> Dict:
+
+        self.data_dictionary = {
+            "train_features": train_df,
+            "test_features": test_df,
+            "train_labels": train_labels,
+            "test_labels": test_labels,
+            "train_weights": train_weights,
+            "test_weights": test_weights,
+        }
+
+        return self.data_dictionary
+
+    def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
+        """
+        Normalize all data in the data_dictionary according to the training dataset
+        :params:
+        :data_dictionary: dictionary containing the cleaned and split training/test data/labels
+        :returns:
+        :data_dictionary: updated dictionary with standardized values.
+        """
+        # standardize the data by training stats
+        train_max = data_dictionary["train_features"].max()
+        train_min = data_dictionary["train_features"].min()
+        data_dictionary["train_features"] = (
+            2 * (data_dictionary["train_features"] - train_min) / (train_max - train_min) - 1
+        )
+        data_dictionary["test_features"] = (
+            2 * (data_dictionary["test_features"] - train_min) / (train_max - train_min) - 1
+        )
+
+        train_labels_max = data_dictionary["train_labels"].max()
+        train_labels_min = data_dictionary["train_labels"].min()
+        data_dictionary["train_labels"] = (
+            2
+            * (data_dictionary["train_labels"] - train_labels_min)
+            / (train_labels_max - train_labels_min)
+            - 1
+        )
+        data_dictionary["test_labels"] = (
+            2
+            * (data_dictionary["test_labels"] - train_labels_min)
+            / (train_labels_max - train_labels_min)
+            - 1
+        )
+
+        for item in train_max.keys():
+            self.data[item + "_max"] = train_max[item]
+            self.data[item + "_min"] = train_min[item]
+
+        self.data["labels_max"] = train_labels_max.to_dict()
+        self.data["labels_min"] = train_labels_min.to_dict()
+
+        return data_dictionary
+
+    def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
+        """
+        Normalize a set of data using the mean and standard deviation from
+        the associated training data.
+        :params:
+        :df: Dataframe to be standardized
+        """
+
+        for item in df.keys():
+            df[item] = (
+                2
+                * (df[item] - self.data[item + "_min"])
+                / (self.data[item + "_max"] - self.data[item + "_min"])
+                - 1
+            )
+
+        return df
+
+    def split_timerange(
+        self, tr: str, train_split: int = 28, bt_split: int = 7
+    ) -> Tuple[list, list]:
+        """
+        Function which takes a single time range (tr) and splits it
+        into sub timeranges to train and backtest on based on user input
+        tr: str, full timerange to train on
+        train_split: the period length for the each training (days). Specified in user
+        configuration file
+        bt_split: the backtesting length (dats). Specified in user configuration file
+        """
+
+        train_period = train_split * SECONDS_IN_DAY
+        bt_period = bt_split * SECONDS_IN_DAY
+
+        full_timerange = TimeRange.parse_timerange(tr)
+        config_timerange = TimeRange.parse_timerange(self.config["timerange"])
+        if config_timerange.stopts == 0:
+            config_timerange.stopts = int(
+                datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
+            )
+        timerange_train = copy.deepcopy(full_timerange)
+        timerange_backtest = copy.deepcopy(full_timerange)
+
+        tr_training_list = []
+        tr_backtesting_list = []
+        tr_training_list_timerange = []
+        tr_backtesting_list_timerange = []
+        first = True
+        # within_config_timerange = True
+        while True:
+            if not first:
+                timerange_train.startts = timerange_train.startts + bt_period
+            timerange_train.stopts = timerange_train.startts + train_period
+
+            first = False
+            start = datetime.datetime.utcfromtimestamp(timerange_train.startts)
+            stop = datetime.datetime.utcfromtimestamp(timerange_train.stopts)
+            tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
+            tr_training_list_timerange.append(copy.deepcopy(timerange_train))
+
+            # associated backtest period
+
+            timerange_backtest.startts = timerange_train.stopts
+
+            timerange_backtest.stopts = timerange_backtest.startts + bt_period
+
+            if timerange_backtest.stopts > config_timerange.stopts:
+                timerange_backtest.stopts = config_timerange.stopts
+
+            start = datetime.datetime.utcfromtimestamp(timerange_backtest.startts)
+            stop = datetime.datetime.utcfromtimestamp(timerange_backtest.stopts)
+            tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
+            tr_backtesting_list_timerange.append(copy.deepcopy(timerange_backtest))
+
+            # ensure we are predicting on exactly same amount of data as requested by user defined
+            #  --timerange
+            if timerange_backtest.stopts == config_timerange.stopts:
+                break
+
+        # print(tr_training_list, tr_backtesting_list)
+        return tr_training_list_timerange, tr_backtesting_list_timerange
+
+    def slice_dataframe(self, timerange: TimeRange, df: DataFrame) -> DataFrame:
+        """
+        Given a full dataframe, extract the user desired window
+        :params:
+        :tr: timerange string that we wish to extract from df
+        :df: Dataframe containing all candles to run the entire backtest. Here
+        it is sliced down to just the present training period.
+        """
+        # timerange = TimeRange.parse_timerange(tr)
+        start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc)
+        stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc)
+        df = df.loc[df["date"] >= start, :]
+        df = df.loc[df["date"] <= stop, :]
+
+        return df
+
+    def principal_component_analysis(self) -> None:
+        """
+        Performs Principal Component Analysis on the data for dimensionality reduction
+        and outlier detection (see self.remove_outliers())
+        No parameters or returns, it acts on the data_dictionary held by the DataHandler.
+        """
+
+        from sklearn.decomposition import PCA  # avoid importing if we dont need it
+
+        n_components = self.data_dictionary["train_features"].shape[1]
+        pca = PCA(n_components=n_components)
+        pca = pca.fit(self.data_dictionary["train_features"])
+        n_keep_components = np.argmin(pca.explained_variance_ratio_.cumsum() < 0.999)
+        pca2 = PCA(n_components=n_keep_components)
+        self.data["n_kept_components"] = n_keep_components
+        pca2 = pca2.fit(self.data_dictionary["train_features"])
+        logger.info("reduced feature dimension by %s", n_components - n_keep_components)
+        logger.info("explained variance %f", np.sum(pca2.explained_variance_ratio_))
+        train_components = pca2.transform(self.data_dictionary["train_features"])
+        test_components = pca2.transform(self.data_dictionary["test_features"])
+
+        self.data_dictionary["train_features"] = pd.DataFrame(
+            data=train_components,
+            columns=["PC" + str(i) for i in range(0, n_keep_components)],
+            index=self.data_dictionary["train_features"].index,
+        )
+
+        # keeping a copy of the non-transformed features so we can check for errors during
+        # model load from disk
+        self.data["training_features_list_raw"] = copy.deepcopy(self.training_features_list)
+        self.training_features_list = self.data_dictionary["train_features"].columns
+
+        self.data_dictionary["test_features"] = pd.DataFrame(
+            data=test_components,
+            columns=["PC" + str(i) for i in range(0, n_keep_components)],
+            index=self.data_dictionary["test_features"].index,
+        )
+
+        self.data["n_kept_components"] = n_keep_components
+        self.pca = pca2
+
+        logger.info(f"PCA reduced total features from  {n_components} to {n_keep_components}")
+
+        if not self.data_path.is_dir():
+            self.data_path.mkdir(parents=True, exist_ok=True)
+
+        return None
+
+    def pca_transform(self, filtered_dataframe: DataFrame) -> None:
+        """
+        Use an existing pca transform to transform data into components
+        :params:
+        filtered_dataframe: DataFrame = the cleaned dataframe
+        """
+        pca_components = self.pca.transform(filtered_dataframe)
+        self.data_dictionary["prediction_features"] = pd.DataFrame(
+            data=pca_components,
+            columns=["PC" + str(i) for i in range(0, self.data["n_kept_components"])],
+            index=filtered_dataframe.index,
+        )
+
+    def compute_distances(self) -> float:
+        """
+        Compute distances between each training point and every other training
+        point. This metric defines the neighborhood of trained data and is used
+        for prediction confidence in the Dissimilarity Index
+        """
+        logger.info("computing average mean distance for all training points")
+        tc = self.freqai_config.get("model_training_parameters", {}).get("thread_count", -1)
+        pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=tc)
+        avg_mean_dist = pairwise.mean(axis=1).mean()
+        logger.info(f"avg_mean_dist {avg_mean_dist:.2f}")
+
+        return avg_mean_dist
+
+    def use_SVM_to_remove_outliers(self, predict: bool) -> None:
+        """
+        Build/inference a Support Vector Machine to detect outliers
+        in training data and prediction
+        :params:
+        predict: bool = If true, inference an existing SVM model, else construct one
+        """
+
+        if predict:
+            assert self.svm_model, "No svm model available for outlier removal"
+            y_pred = self.svm_model.predict(self.data_dictionary["prediction_features"])
+            do_predict = np.where(y_pred == -1, 0, y_pred)
+
+            if (len(do_predict) - do_predict.sum()) > 0:
+                logger.info(
+                    f"svm_remove_outliers() tossed {len(do_predict) - do_predict.sum()} predictions"
+                )
+            self.do_predict += do_predict
+            self.do_predict -= 1
+
+        else:
+            # use SGDOneClassSVM to increase speed?
+            nu = self.freqai_config.get("feature_parameters", {}).get("svm_nu", 0.2)
+            self.svm_model = linear_model.SGDOneClassSVM(nu=nu).fit(
+                self.data_dictionary["train_features"]
+            )
+            y_pred = self.svm_model.predict(self.data_dictionary["train_features"])
+            dropped_points = np.where(y_pred == -1, 0, y_pred)
+            # keep_index = np.where(y_pred == 1)
+            self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
+                (y_pred == 1)
+            ]
+            self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
+                (y_pred == 1)
+            ]
+            self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
+                (y_pred == 1)
+            ]
+
+            logger.info(
+                f"svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}"
+                f" train points from {len(y_pred)}"
+            )
+
+            # same for test data
+            y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
+            dropped_points = np.where(y_pred == -1, 0, y_pred)
+            self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
+                (y_pred == 1)
+            ]
+            self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][(y_pred == 1)]
+            self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
+                (y_pred == 1)
+            ]
+
+            logger.info(
+                f"svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}"
+                f" test points from {len(y_pred)}"
+            )
+
+        return
+
+    def find_features(self, dataframe: DataFrame) -> None:
+        """
+        Find features in the strategy provided dataframe
+        :params:
+        dataframe: DataFrame = strategy provided dataframe
+        :returns:
+        features: list = the features to be used for training/prediction
+        """
+        column_names = dataframe.columns
+        features = [c for c in column_names if "%" in c]
+        labels = [c for c in column_names if "&" in c]
+        if not features:
+            raise OperationalException("Could not find any features!")
+
+        self.training_features_list = features
+        self.label_list = labels
+        # return features, labels
+
+    def check_if_pred_in_training_spaces(self) -> None:
+        """
+        Compares the distance from each prediction point to each training data
+        point. It uses this information to estimate a Dissimilarity Index (DI)
+        and avoid making predictions on any points that are too far away
+        from the training data set.
+        """
+
+        distance = pairwise_distances(
+            self.data_dictionary["train_features"],
+            self.data_dictionary["prediction_features"],
+            n_jobs=-1,
+        )
+
+        self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"]
+
+        do_predict = np.where(
+            self.DI_values < self.freqai_config.get("feature_parameters", {}).get("DI_threshold"),
+            1,
+            0,
+        )
+
+        if (len(do_predict) - do_predict.sum()) > 0:
+            logger.info(
+                f"DI tossed {len(do_predict) - do_predict.sum():.2f} predictions for "
+                "being too far from training data"
+            )
+
+        self.do_predict += do_predict
+        self.do_predict -= 1
+
+    def set_weights_higher_recent(self, num_weights: int) -> npt.ArrayLike:
+        """
+        Set weights so that recent data is more heavily weighted during
+        training than older data.
+        """
+
+        weights = np.zeros(num_weights)
+        for i in range(1, len(weights)):
+            weights[len(weights) - i] = np.exp(
+                -i / (self.config["freqai"]["feature_parameters"]["weight_factor"] * num_weights)
+            )
+        return weights
+
+    def append_predictions(self, predictions, do_predict, len_dataframe):
+        """
+        Append backtest prediction from current backtest period to all previous periods
+        """
+
+        ones = np.ones(len(predictions))
+        target_mean, target_std = ones * self.data["target_mean"], ones * self.data["target_std"]
+
+        self.full_predictions = np.append(self.full_predictions, predictions)
+        self.full_do_predict = np.append(self.full_do_predict, do_predict)
+        if self.freqai_config.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
+            self.full_DI_values = np.append(self.full_DI_values, self.DI_values)
+        self.full_target_mean = np.append(self.full_target_mean, target_mean)
+        self.full_target_std = np.append(self.full_target_std, target_std)
+
+        return
+
+    def fill_predictions(self, len_dataframe):
+        """
+        Back fill values to before the backtesting range so that the dataframe matches size
+        when it goes back to the strategy. These rows are not included in the backtest.
+        """
+
+        filler = np.zeros(len_dataframe - len(self.full_predictions))  # startup_candle_count
+        self.full_predictions = np.append(filler, self.full_predictions)
+        self.full_do_predict = np.append(filler, self.full_do_predict)
+        if self.freqai_config.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
+            self.full_DI_values = np.append(filler, self.full_DI_values)
+        self.full_target_mean = np.append(filler, self.full_target_mean)
+        self.full_target_std = np.append(filler, self.full_target_std)
+
+        return
+
+    def create_fulltimerange(self, backtest_tr: str, backtest_period: int) -> str:
+        backtest_timerange = TimeRange.parse_timerange(backtest_tr)
+
+        if backtest_timerange.stopts == 0:
+            backtest_timerange.stopts = int(
+                datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
+            )
+
+        backtest_timerange.startts = backtest_timerange.startts - backtest_period * SECONDS_IN_DAY
+        start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts)
+        stop = datetime.datetime.utcfromtimestamp(backtest_timerange.stopts)
+        full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
+
+        self.full_path = Path(
+            self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier"))
+        )
+
+        config_path = Path(self.config["config_files"][0])
+
+        if not self.full_path.is_dir():
+            self.full_path.mkdir(parents=True, exist_ok=True)
+            shutil.copy(
+                config_path.resolve(),
+                Path(self.full_path / config_path.parts[-1]),
+            )
+
+        return full_timerange
+
+    def check_if_model_expired(self, trained_timestamp: int) -> bool:
+        """
+        A model age checker to determine if the model is trustworthy based on user defined
+        `expiration_hours` in the configuration file.
+        :params:
+        trained_timestamp: int = The time of training for the most recent model.
+        :returns:
+        bool = If the model is expired or not.
+        """
+        time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
+        elapsed_time = (time - trained_timestamp) / 3600  # hours
+        max_time = self.freqai_config.get("expiration_hours", 0)
+        if max_time > 0:
+            return elapsed_time > max_time
+        else:
+            return False
+
+    def check_if_new_training_required(
+        self, trained_timestamp: int
+    ) -> Tuple[bool, TimeRange, TimeRange]:
+
+        time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
+        trained_timerange = TimeRange()
+        data_load_timerange = TimeRange()
+
+        # find the max indicator length required
+        max_timeframe_chars = self.freqai_config.get("timeframes")[-1]
+        max_period = self.freqai_config.get("feature_parameters", {}).get(
+            "indicator_max_period", 50
+        )
+        additional_seconds = 0
+        if max_timeframe_chars[-1] == "d":
+            additional_seconds = max_period * SECONDS_IN_DAY * int(max_timeframe_chars[-2])
+        elif max_timeframe_chars[-1] == "h":
+            additional_seconds = max_period * 3600 * int(max_timeframe_chars[-2])
+        elif max_timeframe_chars[-1] == "m":
+            if len(max_timeframe_chars) == 2:
+                additional_seconds = max_period * 60 * int(max_timeframe_chars[-2])
+            elif len(max_timeframe_chars) == 3:
+                additional_seconds = max_period * 60 * int(float(max_timeframe_chars[0:2]))
+            else:
+                logger.warning(
+                    "FreqAI could not detect max timeframe and therefore may not "
+                    "download the proper amount of data for training"
+                )
+
+        # logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY:.2f} days')
+
+        if trained_timestamp != 0:
+            elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY
+            retrain = elapsed_time > self.freqai_config.get("backtest_period")
+            if retrain:
+                trained_timerange.startts = int(
+                    time - self.freqai_config.get("train_period", 0) * SECONDS_IN_DAY
+                )
+                trained_timerange.stopts = int(time)
+                # we want to load/populate indicators on more data than we plan to train on so
+                # because most of the indicators have a rolling timeperiod, and are thus NaNs
+                # unless they have data further back in time before the start of the train period
+                data_load_timerange.startts = int(
+                    time
+                    - self.freqai_config.get("train_period", 0) * SECONDS_IN_DAY
+                    - additional_seconds
+                )
+                data_load_timerange.stopts = int(time)
+        else:  # user passed no live_trained_timerange in config
+            trained_timerange.startts = int(
+                time - self.freqai_config.get("train_period") * SECONDS_IN_DAY
+            )
+            trained_timerange.stopts = int(time)
+
+            data_load_timerange.startts = int(
+                time
+                - self.freqai_config.get("train_period", 0) * SECONDS_IN_DAY
+                - additional_seconds
+            )
+            data_load_timerange.stopts = int(time)
+            retrain = True
+
+        return retrain, trained_timerange, data_load_timerange
+
+    def set_new_model_names(self, pair: str, trained_timerange: TimeRange):
+
+        coin, _ = pair.split("/")
+        # set the new data_path
+        self.data_path = Path(
+            self.full_path
+            / str("sub-train" + "-" + pair.split("/")[0] + str(int(trained_timerange.stopts)))
+        )
+
+        self.model_filename = "cb_" + coin.lower() + "_" + str(int(trained_timerange.stopts))
+
+        # self.freqai_config['live_trained_timerange'] = str(int(trained_timerange.stopts))
+        # enables persistence, but not fully implemented into save/load data yer
+        # self.data['live_trained_timerange'] = str(int(trained_timerange.stopts))
+
+    # SUPERCEDED
+    # def download_new_data_for_retraining(self, timerange: TimeRange, metadata: dict,
+    #                                      strategy: IStrategy) -> None:
+
+    #     exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
+    #                                               self.config, validate=False, freqai=True)
+    #     # exchange = strategy.dp._exchange # closes ccxt session
+    #     pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', []))
+    #     if str(metadata['pair']) not in pairs:
+    #         pairs.append(str(metadata['pair']))
+
+    #     refresh_backtest_ohlcv_data(
+    #                     exchange, pairs=pairs, timeframes=self.freqai_config.get('timeframes'),
+    #                     datadir=self.config['datadir'], timerange=timerange,
+    #                     new_pairs_days=self.config['new_pairs_days'],
+    #                     erase=False, data_format=self.config.get('dataformat_ohlcv', 'json'),
+    #                     trading_mode=self.config.get('trading_mode', 'spot'),
+    #                     prepend=self.config.get('prepend_data', False)
+    #                 )
+
+    def download_all_data_for_training(self, timerange: TimeRange) -> None:
+        """
+        Called only once upon start of bot to download the necessary data for
+        populating indicators and training the model.
+        :params:
+        timerange: TimeRange = The full data timerange for populating the indicators
+        and training the model.
+        """
+        exchange = ExchangeResolver.load_exchange(
+            self.config["exchange"]["name"], self.config, validate=False, freqai=True
+        )
+
+        new_pairs_days = int((timerange.stopts - timerange.startts) / SECONDS_IN_DAY)
+
+        refresh_backtest_ohlcv_data(
+            exchange,
+            pairs=self.all_pairs,
+            timeframes=self.freqai_config.get("timeframes"),
+            datadir=self.config["datadir"],
+            timerange=timerange,
+            new_pairs_days=new_pairs_days,
+            erase=False,
+            data_format=self.config.get("dataformat_ohlcv", "json"),
+            trading_mode=self.config.get("trading_mode", "spot"),
+            prepend=self.config.get("prepend_data", False),
+        )
+
+    def update_historic_data(self, strategy: IStrategy) -> None:
+        """
+        Append new candles to our stores historic data (in memory) so that
+        we do not need to load candle history from disk and we dont need to
+        pinging exchange multiple times for the same candle.
+        :params:
+        dataframe: DataFrame = strategy provided dataframe
+        """
+
+        with self.dd.history_lock:
+            history_data = self.dd.historic_data
+
+            for pair in self.all_pairs:
+                for tf in self.freqai_config.get("timeframes"):
+
+                    # check if newest candle is already appended
+                    df_dp = strategy.dp.get_pair_dataframe(pair, tf)
+                    if len(df_dp.index) == 0:
+                        continue
+                    if str(history_data[pair][tf].iloc[-1]["date"]) == str(
+                        df_dp.iloc[-1:]["date"].iloc[-1]
+                    ):
+                        continue
+
+                    index = (
+                        df_dp.loc[df_dp["date"] == history_data[pair][tf].iloc[-1]["date"]].index[0]
+                        + 1
+                    )
+                    history_data[pair][tf] = pd.concat(
+                        [
+                            history_data[pair][tf],
+                            strategy.dp.get_pair_dataframe(pair, tf).iloc[index:],
+                        ],
+                        ignore_index=True,
+                        axis=0,
+                    )
+
+            # logger.info(f'Length of history data {len(history_data[pair][tf])}')
+
+    def set_all_pairs(self) -> None:
+
+        self.all_pairs = copy.deepcopy(self.freqai_config.get("corr_pairlist", []))
+        for pair in self.config.get("exchange", "").get("pair_whitelist"):
+            if pair not in self.all_pairs:
+                self.all_pairs.append(pair)
+
+    def load_all_pair_histories(self, timerange: TimeRange) -> None:
+        """
+        Load pair histories for all whitelist and corr_pairlist pairs.
+        Only called once upon startup of bot.
+        :params:
+        timerange: TimeRange = full timerange required to populate all indicators
+        for training according to user defined train_period
+        """
+        history_data = self.dd.historic_data
+
+        for pair in self.all_pairs:
+            if pair not in history_data:
+                history_data[pair] = {}
+            for tf in self.freqai_config.get("timeframes"):
+                history_data[pair][tf] = load_pair_history(
+                    datadir=self.config["datadir"],
+                    timeframe=tf,
+                    pair=pair,
+                    timerange=timerange,
+                    data_format=self.config.get("dataformat_ohlcv", "json"),
+                    candle_type=self.config.get("trading_mode", "spot"),
+                )
+
+    def get_base_and_corr_dataframes(
+        self, timerange: TimeRange, pair: str
+    ) -> Tuple[Dict[Any, Any], Dict[Any, Any]]:
+        """
+        Searches through our historic_data in memory and returns the dataframes relevant
+        to the present pair.
+        :params:
+        timerange: TimeRange = full timerange required to populate all indicators
+        for training according to user defined train_period
+        metadata: dict = strategy furnished pair metadata
+        """
+
+        with self.dd.history_lock:
+            corr_dataframes: Dict[Any, Any] = {}
+            base_dataframes: Dict[Any, Any] = {}
+            historic_data = self.dd.historic_data
+            pairs = self.freqai_config.get("corr_pairlist", [])
+
+            for tf in self.freqai_config.get("timeframes"):
+                base_dataframes[tf] = self.slice_dataframe(timerange, historic_data[pair][tf])
+                if pairs:
+                    for p in pairs:
+                        if pair in p:
+                            continue  # dont repeat anything from whitelist
+                        if p not in corr_dataframes:
+                            corr_dataframes[p] = {}
+                        corr_dataframes[p][tf] = self.slice_dataframe(
+                            timerange, historic_data[p][tf]
+                        )
+
+        return corr_dataframes, base_dataframes
+
+    # SUPERCEDED
+    # def load_pairs_histories(self, timerange: TimeRange, metadata: dict) -> Tuple[Dict[Any, Any],
+    #                                                                               DataFrame]:
+    #     corr_dataframes: Dict[Any, Any] = {}
+    #     base_dataframes: Dict[Any, Any] = {}
+    #     pairs = self.freqai_config.get('corr_pairlist', [])  # + [metadata['pair']]
+    #     # timerange = TimeRange.parse_timerange(new_timerange)
+
+    #     for tf in self.freqai_config.get('timeframes'):
+    #         base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'],
+    #                                                 timeframe=tf,
+    #                                                 pair=metadata['pair'], timerange=timerange,
+    #                                                 data_format=self.config.get(
+    #                                                 'dataformat_ohlcv', 'json'),
+    #                                                 candle_type=self.config.get(
+    #                                                 'trading_mode', 'spot'))
+    #         if pairs:
+    #             for p in pairs:
+    #                 if metadata['pair'] in p:
+    #                     continue  # dont repeat anything from whitelist
+    #                 if p not in corr_dataframes:
+    #                     corr_dataframes[p] = {}
+    #                 corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'],
+    #                                                            timeframe=tf,
+    #                                                            pair=p, timerange=timerange,
+    #                                                            data_format=self.config.get(
+    #                                                            'dataformat_ohlcv', 'json'),
+    #                                                            candle_type=self.config.get(
+    #                                                            'trading_mode', 'spot'))
+
+    #     return corr_dataframes, base_dataframes
+
+    def use_strategy_to_populate_indicators(
+        self, strategy: IStrategy, corr_dataframes: dict, base_dataframes: dict, pair: str
+    ) -> DataFrame:
+        """
+        Use the user defined strategy for populating indicators during
+        retrain
+        :params:
+        strategy: IStrategy = user defined strategy object
+        corr_dataframes: dict = dict containing the informative pair dataframes
+        (for user defined timeframes)
+        base_dataframes: dict = dict containing the current pair dataframes
+        (for user defined timeframes)
+        metadata: dict = strategy furnished pair metadata
+        :returns:
+        dataframe: DataFrame = dataframe containing populated indicators
+        """
+        dataframe = base_dataframes[self.config["timeframe"]].copy()
+        pairs = self.freqai_config.get("corr_pairlist", [])
+        sgi = True
+        for tf in self.freqai_config.get("timeframes"):
+            dataframe = strategy.populate_any_indicators(
+                pair,
+                pair,
+                dataframe.copy(),
+                tf,
+                base_dataframes[tf],
+                coin=pair.split("/")[0] + "-",
+                set_generalized_indicators=sgi,
+            )
+            sgi = False
+            if pairs:
+                for i in pairs:
+                    if pair in i:
+                        continue  # dont repeat anything from whitelist
+                    dataframe = strategy.populate_any_indicators(
+                        pair,
+                        i,
+                        dataframe.copy(),
+                        tf,
+                        corr_dataframes[i][tf],
+                        coin=i.split("/")[0] + "-",
+                    )
+
+        return dataframe
+
+    def fit_labels(self) -> None:
+        """
+        Fit the labels with a gaussian distribution
+        """
+        import scipy as spy
+
+        self.data["labels_mean"], self.data["labels_std"] = {}, {}
+        for label in self.label_list:
+            f = spy.stats.norm.fit(self.data_dictionary["train_labels"][label])
+            self.data["labels_mean"][label], self.data["labels_std"][label] = f[0], f[1]
+
+        # KEEPME incase we want to let user start to grab quantiles.
+        # upper_q = spy.stats.norm.ppf(self.freqai_config['feature_parameters'][
+        #                                                   'target_quantile'], *f)
+        # lower_q = spy.stats.norm.ppf(1 - self.freqai_config['feature_parameters'][
+        #                                                       'target_quantile'], *f)
+        # self.data["upper_quantile"] = upper_q
+        # self.data["lower_quantile"] = lower_q
+        return
+
+    def np_encoder(self, object):
+        if isinstance(object, np.generic):
+            return object.item()
+
+    # Functions containing useful data manpulation examples. but not actively in use.
+
+    # def build_feature_list(self, config: dict, metadata: dict) -> list:
+    #     """
+    #     SUPERCEDED BY self.find_features()
+    #     Build the list of features that will be used to filter
+    #     the full dataframe. Feature list is construced from the
+    #     user configuration file.
+    #     :params:
+    #     :config: Canonical freqtrade config file containing all
+    #     user defined input in config['freqai] dictionary.
+    #     """
+    #     features = []
+    #     for tf in config["freqai"]["timeframes"]:
+    #         for ft in config["freqai"]["base_features"]:
+    #             for n in range(config["freqai"]["feature_parameters"]["shift"] + 1):
+    #                 shift = ""
+    #                 if n > 0:
+    #                     shift = "_shift-" + str(n)
+    #                 features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf)
+    #                 for p in config["freqai"]["corr_pairlist"]:
+    #                     if metadata['pair'] in p:
+    #                         continue  # avoid duplicate features
+    #                     features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)
+
+    #     # logger.info("number of features %s", len(features))
+    #     return features
+
+    # Possibly phasing these outlier removal methods below out in favor of
+    # use_SVM_to_remove_outliers (computationally more efficient and apparently higher performance).
+    # But these have good data manipulation examples, so keep them commented here for now.
+
+    # def determine_statistical_distributions(self) -> None:
+    #     from fitter import Fitter
+
+    #     logger.info('Determining best model for all features, may take some time')
+
+    #     def compute_quantiles(ft):
+    #         f = Fitter(self.data_dictionary["train_features"][ft],
+    #                    distributions=['gamma', 'cauchy', 'laplace',
+    #                                   'beta', 'uniform', 'lognorm'])
+    #         f.fit()
+    #         # f.summary()
+    #         dist = list(f.get_best().items())[0][0]
+    #         params = f.get_best()[dist]
+    #         upper_q = getattr(spy.stats, list(f.get_best().items())[0][0]).ppf(0.999, **params)
+    #         lower_q = getattr(spy.stats, list(f.get_best().items())[0][0]).ppf(0.001, **params)
+
+    #         return ft, upper_q, lower_q, dist
+
+    #     quantiles_tuple = Parallel(n_jobs=-1)(
+    #         delayed(compute_quantiles)(ft) for ft in self.data_dictionary[
+    #                                                       'train_features'].columns)
+
+    #     df = pd.DataFrame(quantiles_tuple, columns=['features', 'upper_quantiles',
+    #                                                 'lower_quantiles', 'dist'])
+    #     self.data_dictionary['upper_quantiles'] = df['upper_quantiles']
+    #     self.data_dictionary['lower_quantiles'] = df['lower_quantiles']
+
+    #     return
+
+    # def remove_outliers(self, predict: bool) -> None:
+    #     """
+    #     Remove data that looks like an outlier based on the distribution of each
+    #     variable.
+    #     :params:
+    #     :predict: boolean which tells the function if this is prediction data or
+    #     training data coming in.
+    #     """
+
+    #     lower_quantile = self.data_dictionary["lower_quantiles"].to_numpy()
+    #     upper_quantile = self.data_dictionary["upper_quantiles"].to_numpy()
+
+    #     if predict:
+
+    #         df = self.data_dictionary["prediction_features"][
+    #             (self.data_dictionary["prediction_features"] < upper_quantile)
+    #             & (self.data_dictionary["prediction_features"] > lower_quantile)
+    #         ]
+    #         drop_index = pd.isnull(df).any(1)
+    #         self.data_dictionary["prediction_features"].fillna(0, inplace=True)
+    #         drop_index = ~drop_index
+    #         do_predict = np.array(drop_index.replace(True, 1).replace(False, 0))
+
+    #         logger.info(
+    #             "remove_outliers() tossed %s predictions",
+    #             len(do_predict) - do_predict.sum(),
+    #         )
+    #         self.do_predict += do_predict
+    #         self.do_predict -= 1
+
+    #     else:
+
+    #         filter_train_df = self.data_dictionary["train_features"][
+    #             (self.data_dictionary["train_features"] < upper_quantile)
+    #             & (self.data_dictionary["train_features"] > lower_quantile)
+    #         ]
+    #         drop_index = pd.isnull(filter_train_df).any(1)
+    #         drop_index = drop_index.replace(True, 1).replace(False, 0)
+    #         self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
+    #             (drop_index == 0)
+    #         ]
+    #         self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
+    #             (drop_index == 0)
+    #         ]
+    #         self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
+    #             (drop_index == 0)
+    #         ]
+
+    #         logger.info(
+    #             f'remove_outliers() tossed {drop_index.sum()}'
+    #             f' training points from {len(filter_train_df)}'
+    #         )
+
+    #         # do the same for the test data
+    #         filter_test_df = self.data_dictionary["test_features"][
+    #             (self.data_dictionary["test_features"] < upper_quantile)
+    #             & (self.data_dictionary["test_features"] > lower_quantile)
+    #         ]
+    #         drop_index = pd.isnull(filter_test_df).any(1)
+    #         drop_index = drop_index.replace(True, 1).replace(False, 0)
+    #         self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][
+    #             (drop_index == 0)
+    #         ]
+    #         self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
+    #             (drop_index == 0)
+    #         ]
+    #         self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
+    #             (drop_index == 0)
+    #         ]
+
+    #         logger.info(
+    #             f'remove_outliers() tossed {drop_index.sum()}'
+    #             f' test points from {len(filter_test_df)}'
+    #         )
+
+    #     return
+
+    # def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
+    #     """
+    #     standardize all data in the data_dictionary according to the training dataset
+    #     :params:
+    #     :data_dictionary: dictionary containing the cleaned and split training/test data/labels
+    #     :returns:
+    #     :data_dictionary: updated dictionary with standardized values.
+    #     """
+    #     # standardize the data by training stats
+    #     train_mean = data_dictionary["train_features"].mean()
+    #     train_std = data_dictionary["train_features"].std()
+    #     data_dictionary["train_features"] = (
+    #         data_dictionary["train_features"] - train_mean
+    #     ) / train_std
+    #     data_dictionary["test_features"] = (
+    #         data_dictionary["test_features"] - train_mean
+    #     ) / train_std
+
+    #     train_labels_std = data_dictionary["train_labels"].std()
+    #     train_labels_mean = data_dictionary["train_labels"].mean()
+    #     data_dictionary["train_labels"] = (
+    #         data_dictionary["train_labels"] - train_labels_mean
+    #     ) / train_labels_std
+    #     data_dictionary["test_labels"] = (
+    #         data_dictionary["test_labels"] - train_labels_mean
+    #     ) / train_labels_std
+
+    #     for item in train_std.keys():
+    #         self.data[item + "_std"] = train_std[item]
+    #         self.data[item + "_mean"] = train_mean[item]
+
+    #     self.data["labels_std"] = train_labels_std
+    #     self.data["labels_mean"] = train_labels_mean
+
+    #     return data_dictionary
+
+    # def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
+    # """
+    # Normalizes a set of data using the mean and standard deviation from
+    # the associated training data.
+    # :params:
+    # :df: Dataframe to be standardized
+    # """
+
+    # for item in df.keys():
+    #     df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]
+
+    # return df
diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
new file mode 100644
index 000000000..b5ae78549
--- /dev/null
+++ b/freqtrade/freqai/freqai_interface.py
@@ -0,0 +1,559 @@
+# import contextlib
+import datetime
+import gc
+import logging
+import shutil
+import threading
+import time
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Any, Dict, Tuple
+
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+from pandas import DataFrame
+
+from freqtrade.configuration import TimeRange
+from freqtrade.enums import RunMode
+from freqtrade.exceptions import OperationalException
+from freqtrade.freqai.data_drawer import FreqaiDataDrawer
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
+from freqtrade.strategy.interface import IStrategy
+
+
+pd.options.mode.chained_assignment = None
+logger = logging.getLogger(__name__)
+
+
+def threaded(fn):
+    def wrapper(*args, **kwargs):
+        threading.Thread(target=fn, args=args, kwargs=kwargs).start()
+
+    return wrapper
+
+
+class IFreqaiModel(ABC):
+    """
+    Class containing all tools for training and prediction in the strategy.
+    User models should inherit from this class as shown in
+    templates/ExamplePredictionModel.py where the user overrides
+    train(), predict(), fit(), and make_labels().
+    Author: Robert Caulk, rob.caulk@gmail.com
+    """
+
+    def __init__(self, config: Dict[str, Any]) -> None:
+
+        self.config = config
+        self.assert_config(self.config)
+        self.freqai_info = config["freqai"]
+        self.data_split_parameters = config.get("freqai", {}).get("data_split_parameters")
+        self.model_training_parameters = config.get("freqai", {}).get("model_training_parameters")
+        self.feature_parameters = config.get("freqai", {}).get("feature_parameters")
+        self.time_last_trained = None
+        self.current_time = None
+        self.model = None
+        self.predictions = None
+        self.training_on_separate_thread = False
+        self.retrain = False
+        self.first = True
+        self.update_historic_data = 0
+        self.set_full_path()
+        self.follow_mode = self.freqai_info.get("follow_mode", False)
+        self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
+        self.lock = threading.Lock()
+        self.follow_mode = self.freqai_info.get("follow_mode", False)
+        self.identifier = self.freqai_info.get("identifier", "no_id_provided")
+        self.scanning = False
+        self.ready_to_scan = False
+        self.first = True
+        self.keras = self.freqai_info.get("keras", False)
+        self.CONV_WIDTH = self.freqai_info.get("conv_width", 2)
+
+    def assert_config(self, config: Dict[str, Any]) -> None:
+
+        if not config.get("freqai", {}):
+            raise OperationalException("No freqai parameters found in configuration file.")
+
+    def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame:
+        """
+        Entry point to the FreqaiModel from a specific pair, it will train a new model if
+        necessary before making the prediction.
+
+        :params:
+        :dataframe: Full dataframe coming from strategy - it contains entire
+        backtesting timerange + additional historical data necessary to train
+        the model.
+        :metadata: pair metadata coming from strategy.
+        """
+
+        self.live = strategy.dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE)
+        self.dd.set_pair_dict_info(metadata)
+
+        if self.live:
+            self.dk = FreqaiDataKitchen(self.config, self.dd, self.live, metadata["pair"])
+            dk = self.start_live(dataframe, metadata, strategy, self.dk)
+
+        # For backtesting, each pair enters and then gets trained for each window along the
+        # sliding window defined by "train_period" (training window) and "backtest_period"
+        # (backtest window, i.e. window immediately following the training window).
+        # FreqAI slides the window and sequentially builds the backtesting results before returning
+        # the concatenated results for the full backtesting period back to the strategy.
+        elif not self.follow_mode:
+            self.dk = FreqaiDataKitchen(self.config, self.dd, self.live, metadata["pair"])
+            logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
+            dk = self.start_backtesting(dataframe, metadata, self.dk)
+
+        dataframe = self.remove_features_from_df(dk.return_dataframe)
+        return self.return_values(dataframe, dk)
+
+    @threaded
+    def start_scanning(self, strategy: IStrategy) -> None:
+        """
+        Function designed to constantly scan pairs for retraining on a separate thread (intracandle)
+        to improve model youth. This function is agnostic to data preparation/collection/storage,
+        it simply trains on what ever data is available in the self.dd.
+        :params:
+        strategy: IStrategy = The user defined strategy class
+        """
+        while 1:
+            time.sleep(1)
+            for pair in self.config.get("exchange", {}).get("pair_whitelist"):
+
+                (_, trained_timestamp, _, _) = self.dd.get_pair_dict_info(pair)
+
+                if self.dd.pair_dict[pair]["priority"] != 1:
+                    continue
+                dk = FreqaiDataKitchen(self.config, self.dd, self.live, pair)
+
+                # file_exists = False
+
+                dk.set_paths(pair, trained_timestamp)
+                # file_exists = self.model_exists(pair,
+                #                                 dk,
+                #                                 trained_timestamp=trained_timestamp,
+                #                                 model_filename=model_filename,
+                #                                 scanning=True)
+
+                (
+                    retrain,
+                    new_trained_timerange,
+                    data_load_timerange,
+                ) = dk.check_if_new_training_required(trained_timestamp)
+                dk.set_paths(pair, new_trained_timerange.stopts)
+
+                if retrain:  # or not file_exists:
+                    self.train_model_in_series(
+                        new_trained_timerange, pair, strategy, dk, data_load_timerange
+                    )
+
+    def start_backtesting(
+        self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
+    ) -> FreqaiDataKitchen:
+        """
+        The main broad execution for backtesting. For backtesting, each pair enters and then gets
+        trained for each window along the sliding window defined by "train_period" (training window)
+        and "backtest_period" (backtest window, i.e. window immediately following the
+        training window). FreqAI slides the window and sequentially builds the backtesting results
+        before returning the concatenated results for the full backtesting period back to the
+        strategy.
+        :params:
+        dataframe: DataFrame = strategy passed dataframe
+        metadata: Dict = pair metadata
+        dk: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
+        :returns:
+        dk: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
+        """
+
+        # Loop enforcing the sliding window training/backtesting paradigm
+        # tr_train is the training time range e.g. 1 historical month
+        # tr_backtest is the backtesting time range e.g. the week directly
+        # following tr_train. Both of these windows slide through the
+        # entire backtest
+        for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges):
+            (_, _, _, _) = self.dd.get_pair_dict_info(metadata["pair"])
+            gc.collect()
+            dk.data = {}  # clean the pair specific data between training window sliding
+            self.training_timerange = tr_train
+            # self.training_timerange_timerange = tr_train
+            dataframe_train = dk.slice_dataframe(tr_train, dataframe)
+            dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
+
+            trained_timestamp = tr_train  # TimeRange.parse_timerange(tr_train)
+            tr_train_startts_str = datetime.datetime.utcfromtimestamp(tr_train.startts).strftime(
+                "%Y-%m-%d %H:%M:%S"
+            )
+            tr_train_stopts_str = datetime.datetime.utcfromtimestamp(tr_train.stopts).strftime(
+                "%Y-%m-%d %H:%M:%S"
+            )
+            logger.info("Training %s", metadata["pair"])
+            logger.info(f"Training {tr_train_startts_str} to {tr_train_stopts_str}")
+
+            dk.data_path = Path(
+                dk.full_path
+                / str(
+                    "sub-train"
+                    + "-"
+                    + metadata["pair"].split("/")[0]
+                    + str(int(trained_timestamp.stopts))
+                )
+            )
+            if not self.model_exists(
+                metadata["pair"], dk, trained_timestamp=trained_timestamp.stopts
+            ):
+                self.model = self.train(dataframe_train, metadata["pair"], dk)
+                self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = trained_timestamp.stopts
+                dk.set_new_model_names(metadata["pair"], trained_timestamp)
+                dk.save_data(self.model, metadata["pair"], keras_model=self.keras)
+            else:
+                self.model = dk.load_data(metadata["pair"], keras_model=self.keras)
+
+            self.check_if_feature_list_matches_strategy(dataframe_train, dk)
+
+            preds, do_preds = self.predict(dataframe_backtest, dk)
+
+            dk.append_predictions(preds, do_preds, len(dataframe_backtest))
+            print("predictions", len(dk.full_predictions), "do_predict", len(dk.full_do_predict))
+
+        dk.fill_predictions(len(dataframe))
+
+        return dk
+
+    def start_live(
+        self, dataframe: DataFrame, metadata: dict, strategy: IStrategy, dk: FreqaiDataKitchen
+    ) -> FreqaiDataKitchen:
+        """
+        The main broad execution for dry/live. This function will check if a retraining should be
+        performed, and if so, retrain and reset the model.
+        :params:
+        dataframe: DataFrame = strategy passed dataframe
+        metadata: Dict = pair metadata
+        strategy: IStrategy = currently employed strategy
+        dk: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
+        :returns:
+        dk: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
+        """
+
+        # update follower
+        if self.follow_mode:
+            self.dd.update_follower_metadata()
+
+        # get the model metadata associated with the current pair
+        (_, trained_timestamp, _, return_null_array) = self.dd.get_pair_dict_info(metadata["pair"])
+
+        # if the metadata doesnt exist, the follower returns null arrays to strategy
+        if self.follow_mode and return_null_array:
+            logger.info("Returning null array from follower to strategy")
+            self.dd.return_null_values_to_strategy(dataframe, dk)
+            return dk
+
+        # append the historic data once per round
+        if self.dd.historic_data:
+            dk.update_historic_data(strategy)
+            logger.debug(f'Updating historic data on pair {metadata["pair"]}')
+
+        # if trainable, check if model needs training, if so compute new timerange,
+        # then save model and metadata.
+        # if not trainable, load existing data
+        if not self.follow_mode:
+
+            (_, new_trained_timerange, data_load_timerange) = dk.check_if_new_training_required(
+                trained_timestamp
+            )
+            dk.set_paths(metadata["pair"], new_trained_timerange.stopts)
+
+            # download candle history if it is not already in memory
+            if not self.dd.historic_data:
+                logger.info(
+                    "Downloading all training data for all pairs in whitelist and "
+                    "corr_pairlist, this may take a while if you do not have the "
+                    "data saved"
+                )
+                dk.download_all_data_for_training(data_load_timerange)
+                dk.load_all_pair_histories(data_load_timerange)
+
+            if not self.scanning:
+                self.scanning = True
+                self.start_scanning(strategy)
+
+        elif self.follow_mode:
+            dk.set_paths(metadata["pair"], trained_timestamp)
+            logger.info(
+                "FreqAI instance set to follow_mode, finding existing pair"
+                f"using { self.identifier }"
+            )
+
+        # load the model and associated data into the data kitchen
+        self.model = dk.load_data(coin=metadata["pair"], keras_model=self.keras)
+
+        if not self.model:
+            logger.warning("No model ready, returning null values to strategy.")
+            self.dd.return_null_values_to_strategy(dataframe, dk)
+            return dk
+
+        # ensure user is feeding the correct indicators to the model
+        self.check_if_feature_list_matches_strategy(dataframe, dk)
+
+        self.build_strategy_return_arrays(dataframe, dk, metadata["pair"], trained_timestamp)
+
+        return dk
+
+    def build_strategy_return_arrays(
+        self, dataframe: DataFrame, dk: FreqaiDataKitchen, pair: str, trained_timestamp: int
+    ) -> None:
+
+        # hold the historical predictions in memory so we are sending back
+        # correct array to strategy
+
+        if pair not in self.dd.model_return_values:
+            pred_df, do_preds = self.predict(dataframe, dk)
+            self.dd.set_initial_return_values(pair, dk, pred_df, do_preds)
+            dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe)
+            return
+        elif self.dk.check_if_model_expired(trained_timestamp):
+            pred_df = DataFrame(np.zeros((2, len(dk.label_list))), columns=dk.label_list)
+            do_preds, dk.DI_values = np.ones(2) * 2, np.zeros(2)
+            logger.warning(
+                "Model expired, returning null values to strategy. Strategy "
+                "construction should take care to consider this event with "
+                "prediction == 0 and do_predict == 2"
+            )
+        else:
+            # Only feed in the most recent candle for prediction in live scenario
+            pred_df, do_preds = self.predict(dataframe.iloc[-self.CONV_WIDTH:], dk, first=False)
+
+        self.dd.append_model_predictions(pair, pred_df, do_preds, dk, len(dataframe))
+        dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe)
+
+        return
+
+    def check_if_feature_list_matches_strategy(
+        self, dataframe: DataFrame, dk: FreqaiDataKitchen
+    ) -> None:
+        """
+        Ensure user is passing the proper feature set if they are reusing an `identifier` pointing
+        to a folder holding existing models.
+        :params:
+        dataframe: DataFrame = strategy provided dataframe
+        dk: FreqaiDataKitchen = non-persistent data container/analyzer for current coin/bot loop
+        """
+        dk.find_features(dataframe)
+        if "training_features_list_raw" in dk.data:
+            feature_list = dk.data["training_features_list_raw"]
+        else:
+            feature_list = dk.training_features_list
+        if dk.training_features_list != feature_list:
+            raise OperationalException(
+                "Trying to access pretrained model with `identifier` "
+                "but found different features furnished by current strategy."
+                "Change `identifer` to train from scratch, or ensure the"
+                "strategy is furnishing the same features as the pretrained"
+                "model"
+            )
+
+    def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None:
+        """
+        Base data cleaning method for train
+        Any function inside this method should drop training data points from the filtered_dataframe
+        based on user decided logic. See FreqaiDataKitchen::remove_outliers() for an example
+        of how outlier data points are dropped from the dataframe used for training.
+        """
+
+        if self.freqai_info.get("feature_parameters", {}).get("principal_component_analysis"):
+            dk.principal_component_analysis()
+
+        if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers"):
+            dk.use_SVM_to_remove_outliers(predict=False)
+
+        if self.freqai_info.get("feature_parameters", {}).get("DI_threshold"):
+            dk.data["avg_mean_dist"] = dk.compute_distances()
+
+        # if self.feature_parameters["determine_statistical_distributions"]:
+        #     dk.determine_statistical_distributions()
+        # if self.feature_parameters["remove_outliers"]:
+        #     dk.remove_outliers(predict=False)
+
+    def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None:
+        """
+        Base data cleaning method for predict.
+        These functions each modify dk.do_predict, which is a dataframe with equal length
+        to the number of candles coming from and returning to the strategy. Inside do_predict,
+         1 allows prediction and < 0 signals to the strategy that the model is not confident in
+         the prediction.
+         See FreqaiDataKitchen::remove_outliers() for an example
+        of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
+        for buy signals.
+        """
+        if self.freqai_info.get("feature_parameters", {}).get("principal_component_analysis"):
+            dk.pca_transform(dataframe)
+
+        if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers"):
+            dk.use_SVM_to_remove_outliers(predict=True)
+
+        if self.freqai_info.get("feature_parameters", {}).get("DI_threshold"):
+            dk.check_if_pred_in_training_spaces()
+
+        # if self.feature_parameters["determine_statistical_distributions"]:
+        #     dk.determine_statistical_distributions()
+        # if self.feature_parameters["remove_outliers"]:
+        #     dk.remove_outliers(predict=True)  # creates dropped index
+
+    def model_exists(
+        self,
+        pair: str,
+        dk: FreqaiDataKitchen,
+        trained_timestamp: int = None,
+        model_filename: str = "",
+        scanning: bool = False,
+    ) -> bool:
+        """
+        Given a pair and path, check if a model already exists
+        :param pair: pair e.g. BTC/USD
+        :param path: path to model
+        """
+        coin, _ = pair.split("/")
+
+        if not self.live:
+            dk.model_filename = model_filename = "cb_" + coin.lower() + "_" + str(trained_timestamp)
+
+        path_to_modelfile = Path(dk.data_path / str(model_filename + "_model.joblib"))
+        file_exists = path_to_modelfile.is_file()
+        if file_exists and not scanning:
+            logger.info("Found model at %s", dk.data_path / dk.model_filename)
+        elif not scanning:
+            logger.info("Could not find model at %s", dk.data_path / dk.model_filename)
+        return file_exists
+
+    def set_full_path(self) -> None:
+        self.full_path = Path(
+            self.config["user_data_dir"] / "models" / str(self.freqai_info.get("identifier"))
+        )
+        self.full_path.mkdir(parents=True, exist_ok=True)
+        shutil.copy(
+            self.config["config_files"][0],
+            Path(self.full_path, Path(self.config["config_files"][0]).name),
+        )
+
+    def remove_features_from_df(self, dataframe: DataFrame) -> DataFrame:
+        """
+        Remove the features from the dataframe before returning it to strategy. This keeps it
+        compact for Frequi purposes.
+        """
+        to_keep = [
+            col for col in dataframe.columns if not col.startswith("%") or col.startswith("%%")
+        ]
+        return dataframe[to_keep]
+
+    def train_model_in_series(
+        self,
+        new_trained_timerange: TimeRange,
+        pair: str,
+        strategy: IStrategy,
+        dk: FreqaiDataKitchen,
+        data_load_timerange: TimeRange,
+    ):
+        """
+        Retreive data and train model in single threaded mode (only used if model directory is empty
+        upon startup for dry/live )
+        :params:
+        new_trained_timerange: TimeRange = the timerange to train the model on
+        metadata: dict = strategy provided metadata
+        strategy: IStrategy = user defined strategy object
+        dk: FreqaiDataKitchen = non-persistent data container for current coin/loop
+        data_load_timerange: TimeRange = the amount of data to be loaded for populate_any_indicators
+        (larger than new_trained_timerange so that new_trained_timerange does not contain any NaNs)
+        """
+
+        corr_dataframes, base_dataframes = dk.get_base_and_corr_dataframes(
+            data_load_timerange, pair
+        )
+
+        unfiltered_dataframe = dk.use_strategy_to_populate_indicators(
+            strategy, corr_dataframes, base_dataframes, pair
+        )
+
+        unfiltered_dataframe = dk.slice_dataframe(new_trained_timerange, unfiltered_dataframe)
+
+        # find the features indicated by strategy and store in datakitchen
+        dk.find_features(unfiltered_dataframe)
+
+        model = self.train(unfiltered_dataframe, pair, dk)
+
+        self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
+        dk.set_new_model_names(pair, new_trained_timerange)
+        self.dd.pair_dict[pair]["first"] = False
+        if self.dd.pair_dict[pair]["priority"] == 1 and self.scanning:
+            with self.lock:
+                self.dd.pair_to_end_of_training_queue(pair)
+        dk.save_data(model, coin=pair, keras_model=self.keras)
+
+        if self.freqai_info.get("purge_old_models", False):
+            self.dd.purge_old_models()
+        # self.retrain = False
+
+    # Following methods which are overridden by user made prediction models.
+    # See freqai/prediction_models/CatboostPredictionModlel.py for an example.
+
+    @abstractmethod
+    def train(self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen) -> Any:
+        """
+        Filter the training data and train a model to it. Train makes heavy use of the datahandler
+        for storing, saving, loading, and analyzing the data.
+        :params:
+        :unfiltered_dataframe: Full dataframe for the current training period
+        :metadata: pair metadata from strategy.
+        :returns:
+        :model: Trained model which can be used to inference (self.predict)
+        """
+
+    @abstractmethod
+    def fit(self) -> Any:
+        """
+        Most regressors use the same function names and arguments e.g. user
+        can drop in LGBMRegressor in place of CatBoostRegressor and all data
+        management will be properly handled by Freqai.
+        :params:
+        data_dictionary: Dict = the dictionary constructed by DataHandler to hold
+        all the training and test data/labels.
+        """
+
+        return
+
+    @abstractmethod
+    def predict(
+        self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True
+    ) -> Tuple[DataFrame, npt.ArrayLike]:
+        """
+        Filter the prediction features data and predict with it.
+        :param:
+        unfiltered_dataframe: Full dataframe for the current backtest period.
+        dk: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
+        :return:
+        :predictions: np.array of predictions
+        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
+        data (NaNs) or felt uncertain about data (i.e. SVM and/or DI index)
+        """
+
+    def make_labels(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
+        """
+        User defines the labels here (target values).
+        :params:
+        dataframe: DataFrame = the full dataframe for the present training period
+        dk: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
+        """
+
+        return
+
+    @abstractmethod
+    def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
+        """
+        User defines the dataframe to be returned to strategy here.
+        :params:
+        dataframe: DataFrame = the full dataframe for the current prediction (live)
+        or --timerange (backtesting)
+        dk: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
+        :returns:
+        dataframe: DataFrame = dataframe filled with user defined data
+        """
+
+        return
diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
new file mode 100644
index 000000000..7b0192fb8
--- /dev/null
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py
@@ -0,0 +1,154 @@
+import logging
+from typing import Any, Dict, Tuple
+
+from catboost import CatBoostRegressor, Pool
+from pandas import DataFrame
+
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
+from freqtrade.freqai.freqai_interface import IFreqaiModel
+
+
+logger = logging.getLogger(__name__)
+
+
+class CatboostPredictionModel(IFreqaiModel):
+    """
+    User created prediction model. The class needs to override three necessary
+    functions, predict(), train(), fit(). The class inherits ModelHandler which
+    has its own DataHandler where data is held, saved, loaded, and managed.
+    """
+
+    def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
+        """
+        User uses this function to add any additional return values to the dataframe.
+        e.g.
+        dataframe['volatility'] = dk.volatility_values
+        """
+
+        return dataframe
+
+    def make_labels(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
+        """
+        User defines the labels here (target values).
+        :params:
+        :dataframe: the full dataframe for the present training period
+        """
+
+        dataframe["s"] = (
+            dataframe["close"]
+            .shift(-self.feature_parameters["period"])
+            .rolling(self.feature_parameters["period"])
+            .mean()
+            / dataframe["close"]
+            - 1
+        )
+
+        return dataframe["s"]
+
+    def train(
+        self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
+    ) -> Tuple[DataFrame, DataFrame]:
+        """
+        Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
+        for storing, saving, loading, and analyzing the data.
+        :params:
+        :unfiltered_dataframe: Full dataframe for the current training period
+        :metadata: pair metadata from strategy.
+        :returns:
+        :model: Trained model which can be used to inference (self.predict)
+        """
+
+        logger.info("--------------------Starting training " f"{pair} --------------------")
+
+        # unfiltered_labels = self.make_labels(unfiltered_dataframe, dk)
+        # filter the features requested by user in the configuration file and elegantly handle NaNs
+        features_filtered, labels_filtered = dk.filter_features(
+            unfiltered_dataframe,
+            dk.training_features_list,
+            dk.label_list,
+            training_filter=True,
+        )
+
+        # split data into train/test data.
+        data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        dk.fit_labels()  # fit labels to a cauchy distribution so we know what to expect in strategy
+        # normalize all data based on train_dataset only
+        data_dictionary = dk.normalize_data(data_dictionary)
+
+        # optional additional data cleaning/analysis
+        self.data_cleaning_train(dk)
+
+        logger.info(
+            f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
+        )
+        logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
+
+        model = self.fit(data_dictionary)
+
+        logger.info(f"--------------------done training {pair}--------------------")
+
+        return model
+
+    def fit(self, data_dictionary: Dict) -> Any:
+        """
+        User sets up the training and test data to fit their desired model here
+        :params:
+        :data_dictionary: the dictionary constructed by DataHandler to hold
+        all the training and test data/labels.
+        """
+
+        train_data = Pool(
+            data=data_dictionary["train_features"],
+            label=data_dictionary["train_labels"],
+            weight=data_dictionary["train_weights"],
+        )
+
+        test_data = Pool(
+            data=data_dictionary["test_features"],
+            label=data_dictionary["test_labels"],
+            weight=data_dictionary["test_weights"],
+        )
+
+        model = CatBoostRegressor(
+            allow_writing_files=False,
+            verbose=100,
+            early_stopping_rounds=400,
+            **self.model_training_parameters,
+        )
+        model.fit(X=train_data, eval_set=test_data)
+
+        return model
+
+    def predict(
+        self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
+    ) -> Tuple[DataFrame, DataFrame]:
+        """
+        Filter the prediction features data and predict with it.
+        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
+        :return:
+        :pred_df: dataframe containing the predictions
+        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
+        data (NaNs) or felt uncertain about data (PCA and DI index)
+        """
+
+        dk.find_features(unfiltered_dataframe)
+        filtered_dataframe, _ = dk.filter_features(
+            unfiltered_dataframe, dk.training_features_list, training_filter=False
+        )
+        filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
+        dk.data_dictionary["prediction_features"] = filtered_dataframe
+
+        # optional additional data cleaning/analysis
+        self.data_cleaning_predict(dk, filtered_dataframe)
+
+        predictions = self.model.predict(dk.data_dictionary["prediction_features"])
+        pred_df = DataFrame(predictions, columns=dk.label_list)
+
+        for label in dk.label_list:
+            pred_df[label] = (
+                (pred_df[label] + 1)
+                * (dk.data["labels_max"][label] - dk.data["labels_min"][label])
+                / 2
+            ) + dk.data["labels_min"][label]
+
+        return (pred_df, dk.do_predict)
diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py
new file mode 100644
index 000000000..c4d92d7bb
--- /dev/null
+++ b/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py
@@ -0,0 +1,133 @@
+import logging
+from typing import Any, Dict, Tuple
+
+from catboost import CatBoostRegressor  # , Pool
+from pandas import DataFrame
+from sklearn.multioutput import MultiOutputRegressor
+
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
+from freqtrade.freqai.freqai_interface import IFreqaiModel
+
+
+logger = logging.getLogger(__name__)
+
+
+class CatboostPredictionMultiModel(IFreqaiModel):
+    """
+    User created prediction model. The class needs to override three necessary
+    functions, predict(), train(), fit(). The class inherits ModelHandler which
+    has its own DataHandler where data is held, saved, loaded, and managed.
+    """
+
+    def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
+        """
+        User uses this function to add any additional return values to the dataframe.
+        e.g.
+        dataframe['volatility'] = dk.volatility_values
+        """
+
+        return dataframe
+
+    def train(
+        self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
+    ) -> Tuple[DataFrame, DataFrame]:
+        """
+        Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
+        for storing, saving, loading, and analyzing the data.
+        :params:
+        :unfiltered_dataframe: Full dataframe for the current training period
+        :metadata: pair metadata from strategy.
+        :returns:
+        :model: Trained model which can be used to inference (self.predict)
+        """
+
+        logger.info("--------------------Starting training " f"{pair} --------------------")
+
+        # unfiltered_labels = self.make_labels(unfiltered_dataframe, dk)
+        # filter the features requested by user in the configuration file and elegantly handle NaNs
+        features_filtered, labels_filtered = dk.filter_features(
+            unfiltered_dataframe,
+            dk.training_features_list,
+            dk.label_list,
+            training_filter=True,
+        )
+
+        # split data into train/test data.
+        data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        dk.fit_labels()  # fit labels to a cauchy distribution so we know what to expect in strategy
+        # normalize all data based on train_dataset only
+        data_dictionary = dk.normalize_data(data_dictionary)
+
+        # optional additional data cleaning/analysis
+        self.data_cleaning_train(dk)
+
+        logger.info(
+            f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
+        )
+        logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
+
+        model = self.fit(data_dictionary)
+
+        logger.info(f"--------------------done training {pair}--------------------")
+
+        return model
+
+    def fit(self, data_dictionary: Dict) -> Any:
+        """
+        User sets up the training and test data to fit their desired model here
+        :params:
+        :data_dictionary: the dictionary constructed by DataHandler to hold
+        all the training and test data/labels.
+        """
+
+        cbr = CatBoostRegressor(
+            allow_writing_files=False,
+            gpu_ram_part=0.5,
+            verbose=100,
+            early_stopping_rounds=400,
+            **self.model_training_parameters,
+        )
+
+        X = data_dictionary["train_features"]
+        y = data_dictionary["train_labels"]
+        # eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
+        sample_weight = data_dictionary["train_weights"]
+
+        model = MultiOutputRegressor(estimator=cbr)
+        model.fit(X=X, y=y, sample_weight=sample_weight)  # , eval_set=eval_set)
+
+        return model
+
+    def predict(
+        self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
+    ) -> Tuple[DataFrame, DataFrame]:
+        """
+        Filter the prediction features data and predict with it.
+        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
+        :return:
+        :pred_df: dataframe containing the predictions
+        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
+        data (NaNs) or felt uncertain about data (PCA and DI index)
+        """
+
+        dk.find_features(unfiltered_dataframe)
+        filtered_dataframe, _ = dk.filter_features(
+            unfiltered_dataframe, dk.training_features_list, training_filter=False
+        )
+        filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
+        dk.data_dictionary["prediction_features"] = filtered_dataframe
+
+        # optional additional data cleaning/analysis
+        self.data_cleaning_predict(dk, filtered_dataframe)
+
+        predictions = self.model.predict(dk.data_dictionary["prediction_features"])
+        pred_df = DataFrame(predictions, columns=dk.label_list)
+
+        for label in dk.label_list:
+            pred_df[label] = (
+                (pred_df[label] + 1)
+                * (dk.data["labels_max"][label] - dk.data["labels_min"][label])
+                / 2
+            ) + dk.data["labels_min"][label]
+
+        return (pred_df, dk.do_predict)
diff --git a/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py b/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py
new file mode 100644
index 000000000..17700a97f
--- /dev/null
+++ b/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py
@@ -0,0 +1,127 @@
+import logging
+from typing import Any, Dict, Tuple
+
+from lightgbm import LGBMRegressor
+from pandas import DataFrame
+
+from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
+from freqtrade.freqai.freqai_interface import IFreqaiModel
+
+
+logger = logging.getLogger(__name__)
+
+
+class LightGBMPredictionModel(IFreqaiModel):
+    """
+    User created prediction model. The class needs to override three necessary
+    functions, predict(), train(), fit(). The class inherits ModelHandler which
+    has its own DataHandler where data is held, saved, loaded, and managed.
+    """
+
+    def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
+        """
+        User uses this function to add any additional return values to the dataframe.
+        e.g.
+        dataframe['volatility'] = dk.volatility_values
+        """
+
+        return dataframe
+
+    def train(
+        self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
+    ) -> Tuple[DataFrame, DataFrame]:
+        """
+        Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
+        for storing, saving, loading, and analyzing the data.
+        :params:
+        :unfiltered_dataframe: Full dataframe for the current training period
+        :metadata: pair metadata from strategy.
+        :returns:
+        :model: Trained model which can be used to inference (self.predict)
+        """
+
+        logger.info("--------------------Starting training " f"{pair} --------------------")
+
+        # unfiltered_labels = self.make_labels(unfiltered_dataframe, dk)
+        # filter the features requested by user in the configuration file and elegantly handle NaNs
+        features_filtered, labels_filtered = dk.filter_features(
+            unfiltered_dataframe,
+            dk.training_features_list,
+            dk.label_list,
+            training_filter=True,
+        )
+
+        # split data into train/test data.
+        data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
+        dk.fit_labels()  # fit labels to a cauchy distribution so we know what to expect in strategy
+        # normalize all data based on train_dataset only
+        data_dictionary = dk.normalize_data(data_dictionary)
+
+        # optional additional data cleaning/analysis
+        self.data_cleaning_train(dk)
+
+        logger.info(
+            f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
+        )
+        logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
+
+        model = self.fit(data_dictionary)
+
+        logger.info(f"--------------------done training {pair}--------------------")
+
+        return model
+
+    def fit(self, data_dictionary: Dict) -> Any:
+        """
+        Most regressors use the same function names and arguments e.g. user
+        can drop in LGBMRegressor in place of CatBoostRegressor and all data
+        management will be properly handled by Freqai.
+        :params:
+        :data_dictionary: the dictionary constructed by DataHandler to hold
+        all the training and test data/labels.
+        """
+
+        eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
+        X = data_dictionary["train_features"]
+        y = data_dictionary["train_labels"]
+
+        model = LGBMRegressor(seed=42, n_estimators=2000, verbosity=1, force_col_wise=True)
+        model.fit(X=X, y=y, eval_set=eval_set)
+
+        return model
+
+    def predict(
+        self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen
+    ) -> Tuple[DataFrame, DataFrame]:
+        """
+        Filter the prediction features data and predict with it.
+        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
+        :return:
+        :predictions: np.array of predictions
+        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
+        data (NaNs) or felt uncertain about data (PCA and DI index)
+        """
+
+        # logger.info("--------------------Starting prediction--------------------")
+
+        original_feature_list = dk.find_features(unfiltered_dataframe)
+        filtered_dataframe, _ = dk.filter_features(
+            unfiltered_dataframe, original_feature_list, training_filter=False
+        )
+        filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
+        dk.data_dictionary["prediction_features"] = filtered_dataframe
+
+        # optional additional data cleaning/analysis
+        self.data_cleaning_predict(dk, filtered_dataframe)
+
+        predictions = self.model.predict(dk.data_dictionary["prediction_features"])
+        pred_df = DataFrame(predictions, columns=dk.label_list)
+
+        for label in dk.label_list:
+            pred_df[label] = (
+                (pred_df[label] + 1)
+                * (dk.data["labels_max"][label] - dk.data["labels_min"][label])
+                / 2
+            ) + dk.data["labels_min"][label]
+
+        return (pred_df, dk.do_predict)
diff --git a/freqtrade/freqai/strategy_bridge.py b/freqtrade/freqai/strategy_bridge.py
new file mode 100644
index 000000000..bb43084a0
--- /dev/null
+++ b/freqtrade/freqai/strategy_bridge.py
@@ -0,0 +1,12 @@
+from freqtrade.resolvers.freqaimodel_resolver import FreqaiModelResolver
+
+
+class CustomModel:
+    """
+    A bridge between the user defined IFreqaiModel class
+    and the strategy.
+    """
+
+    def __init__(self, config):
+
+        self.bridge = FreqaiModelResolver.load_freqaimodel(config)
diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py
index 030d7bdf0..37078b85a 100755
--- a/freqtrade/optimize/backtesting.py
+++ b/freqtrade/optimize/backtesting.py
@@ -206,6 +206,11 @@ class Backtesting:
         """
         self.progress.init_step(BacktestState.DATALOAD, 1)
 
+        if self.config.get('freqai') is not None:
+            self.required_startup += int(self.config.get('freqai', {}).get('startup_candles', 1000))
+            logger.info(f'Increasing startup_candle_count for freqai to {self.required_startup}')
+            self.config['startup_candle_count'] = self.required_startup
+
         data = history.load_data(
             datadir=self.config['datadir'],
             pairs=self.pairlists.whitelist,
diff --git a/freqtrade/plugins/pairlist/pairlist_helpers.py b/freqtrade/plugins/pairlist/pairlist_helpers.py
index 1de27fcbd..23233481a 100644
--- a/freqtrade/plugins/pairlist/pairlist_helpers.py
+++ b/freqtrade/plugins/pairlist/pairlist_helpers.py
@@ -40,3 +40,14 @@ def expand_pairlist(wildcardpl: List[str], available_pairs: List[str],
             except re.error as err:
                 raise ValueError(f"Wildcard error in {pair_wc}, {err}")
     return result
+
+
+def dynamic_expand_pairlist(config: dict, markets: list) -> List[str]:
+    if config.get('freqai', {}):
+        full_pairs = config['pairs'] + [pair for pair in config['freqai']['corr_pairlist']
+                                        if pair not in config['pairs']]
+        expanded_pairs = expand_pairlist(full_pairs, markets)
+    else:
+        expanded_pairs = expand_pairlist(config['pairs'], markets)
+
+    return expanded_pairs
diff --git a/freqtrade/resolvers/exchange_resolver.py b/freqtrade/resolvers/exchange_resolver.py
index 4dfbf445b..c1ec8b69c 100644
--- a/freqtrade/resolvers/exchange_resolver.py
+++ b/freqtrade/resolvers/exchange_resolver.py
@@ -18,7 +18,8 @@ class ExchangeResolver(IResolver):
     object_type = Exchange
 
     @staticmethod
-    def load_exchange(exchange_name: str, config: dict, validate: bool = True) -> Exchange:
+    def load_exchange(exchange_name: str, config: dict, validate: bool = True,
+                      freqai: bool = False) -> Exchange:
         """
         Load the custom class from config parameter
         :param exchange_name: name of the Exchange to load
@@ -31,7 +32,8 @@ class ExchangeResolver(IResolver):
         try:
             exchange = ExchangeResolver._load_exchange(exchange_name,
                                                        kwargs={'config': config,
-                                                               'validate': validate})
+                                                               'validate': validate,
+                                                               'freqai': freqai})
         except ImportError:
             logger.info(
                 f"No {exchange_name} specific subclass found. Using the generic class instead.")
diff --git a/freqtrade/resolvers/freqaimodel_resolver.py b/freqtrade/resolvers/freqaimodel_resolver.py
new file mode 100644
index 000000000..e666b462c
--- /dev/null
+++ b/freqtrade/resolvers/freqaimodel_resolver.py
@@ -0,0 +1,50 @@
+# pragma pylint: disable=attribute-defined-outside-init
+
+"""
+This module load a custom model for freqai
+"""
+import logging
+from pathlib import Path
+from typing import Dict
+
+from freqtrade.constants import USERPATH_FREQAIMODELS
+from freqtrade.exceptions import OperationalException
+from freqtrade.freqai.freqai_interface import IFreqaiModel
+from freqtrade.resolvers import IResolver
+
+
+logger = logging.getLogger(__name__)
+
+
+class FreqaiModelResolver(IResolver):
+    """
+    This class contains all the logic to load custom hyperopt loss class
+    """
+
+    object_type = IFreqaiModel
+    object_type_str = "FreqaiModel"
+    user_subdir = USERPATH_FREQAIMODELS
+    initial_search_path = Path(__file__).parent.parent.joinpath(
+                                                       "freqai/prediction_models").resolve()
+
+    @staticmethod
+    def load_freqaimodel(config: Dict) -> IFreqaiModel:
+        """
+        Load the custom class from config parameter
+        :param config: configuration dictionary
+        """
+
+        freqaimodel_name = config.get("freqaimodel")
+        if not freqaimodel_name:
+            raise OperationalException(
+                "No freqaimodel set. Please use `--freqaimodel` to "
+                "specify the FreqaiModel class to use.\n"
+            )
+        freqaimodel = FreqaiModelResolver.load_object(
+            freqaimodel_name,
+            config,
+            kwargs={"config": config},
+            extra_dir=config.get("freqaimodel_path"),
+        )
+
+        return freqaimodel
diff --git a/freqtrade/strategy/interface.py b/freqtrade/strategy/interface.py
index d4ccfc5db..11cec1fae 100644
--- a/freqtrade/strategy/interface.py
+++ b/freqtrade/strategy/interface.py
@@ -546,6 +546,23 @@ class IStrategy(ABC, HyperStrategyMixin):
         """
         return None
 
+    def populate_any_indicators(self, basepair: str, pair: str, df: DataFrame, tf: str,
+                                informative: DataFrame = None, coin: str = "",
+                                set_generalized_indicators: bool = False) -> DataFrame:
+        """
+        Function designed to automatically generate, name and merge features
+        from user indicated timeframes in the configuration file. User can add
+        additional features here, but must follow the naming convention.
+        Defined in IStrategy because Freqai needs to know it exists.
+        :params:
+        :pair: pair to be used as informative
+        :df: strategy dataframe which will receive merges from informatives
+        :tf: timeframe of the dataframe which will modify the feature names
+        :informative: the dataframe associated with the informative pair
+        :coin: the name of the coin which will modify the feature names.
+        """
+        return df
+
 ###
 # END - Intended to be overridden by strategy
 ###
diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py
new file mode 100644
index 000000000..1140ba383
--- /dev/null
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@@ -0,0 +1,342 @@
+import logging
+from functools import reduce
+
+import pandas as pd
+import talib.abstract as ta
+from pandas import DataFrame
+from technical import qtpylib
+
+from freqtrade.exchange import timeframe_to_prev_date
+from freqtrade.freqai.strategy_bridge import CustomModel
+from freqtrade.persistence import Trade
+from freqtrade.strategy import DecimalParameter, IntParameter, merge_informative_pair
+from freqtrade.strategy.interface import IStrategy
+
+
+logger = logging.getLogger(__name__)
+
+
+class FreqaiExampleStrategy(IStrategy):
+    """
+    Example strategy showing how the user connects their own
+    IFreqaiModel to the strategy. Namely, the user uses:
+    self.model = CustomModel(self.config)
+    self.model.bridge.start(dataframe, metadata)
+
+    to make predictions on their data. populate_any_indicators() automatically
+    generates the variety of features indicated by the user in the
+    canonical freqtrade configuration file under config['freqai'].
+    """
+
+    minimal_roi = {"0": 0.1, "240": -1}
+
+    plot_config = {
+        "main_plot": {},
+        "subplots": {
+            "prediction": {"prediction": {"color": "blue"}},
+            "target_roi": {
+                "target_roi": {"color": "brown"},
+            },
+            "do_predict": {
+                "do_predict": {"color": "brown"},
+            },
+        },
+    }
+
+    process_only_new_candles = True
+    stoploss = -0.05
+    use_exit_signal = True
+    startup_candle_count: int = 300
+    can_short = False
+
+    linear_roi_offset = DecimalParameter(
+        0.00, 0.02, default=0.005, space="sell", optimize=False, load=True
+    )
+    max_roi_time_long = IntParameter(0, 800, default=400, space="sell", optimize=False, load=True)
+
+    def informative_pairs(self):
+        whitelist_pairs = self.dp.current_whitelist()
+        corr_pairs = self.config["freqai"]["corr_pairlist"]
+        informative_pairs = []
+        for tf in self.config["freqai"]["timeframes"]:
+            for pair in whitelist_pairs:
+                informative_pairs.append((pair, tf))
+            for pair in corr_pairs:
+                if pair in whitelist_pairs:
+                    continue  # avoid duplication
+                informative_pairs.append((pair, tf))
+        return informative_pairs
+
+    def bot_start(self):
+        self.model = CustomModel(self.config)
+
+    def populate_any_indicators(
+        self, metadata, pair, df, tf, informative=None, coin="", set_generalized_indicators=False
+    ):
+        """
+        Function designed to automatically generate, name and merge features
+        from user indicated timeframes in the configuration file. User controls the indicators
+        passed to the training/prediction by prepending indicators with `'%-' + coin `
+        (see convention below). I.e. user should not prepend any supporting metrics
+        (e.g. bb_lowerband below) with % unless they explicitly want to pass that metric to the
+        model.
+        :params:
+        :pair: pair to be used as informative
+        :df: strategy dataframe which will receive merges from informatives
+        :tf: timeframe of the dataframe which will modify the feature names
+        :informative: the dataframe associated with the informative pair
+        :coin: the name of the coin which will modify the feature names.
+        """
+
+        with self.model.bridge.lock:
+            if informative is None:
+                informative = self.dp.get_pair_dataframe(pair, tf)
+
+            # first loop is automatically duplicating indicators for time periods
+            for t in self.freqai_info["feature_parameters"]["indicator_periods"]:
+
+                t = int(t)
+                informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t)
+                informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t)
+                informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, window=t)
+                informative[f"{coin}20sma-period_{t}"] = ta.SMA(informative, timeperiod=t)
+                informative[f"{coin}21ema-period_{t}"] = ta.EMA(informative, timeperiod=t)
+                informative[f"%-{coin}close_over_20sma-period_{t}"] = (
+                    informative["close"] / informative[f"{coin}20sma-period_{t}"]
+                )
+
+                informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t)
+
+                bollinger = qtpylib.bollinger_bands(
+                    qtpylib.typical_price(informative), window=t, stds=2.2
+                )
+                informative[f"{coin}bb_lowerband-period_{t}"] = bollinger["lower"]
+                informative[f"{coin}bb_middleband-period_{t}"] = bollinger["mid"]
+                informative[f"{coin}bb_upperband-period_{t}"] = bollinger["upper"]
+
+                informative[f"%-{coin}bb_width-period_{t}"] = (
+                    informative[f"{coin}bb_upperband-period_{t}"]
+                    - informative[f"{coin}bb_lowerband-period_{t}"]
+                ) / informative[f"{coin}bb_middleband-period_{t}"]
+                informative[f"%-{coin}close-bb_lower-period_{t}"] = (
+                    informative["close"] / informative[f"{coin}bb_lowerband-period_{t}"]
+                )
+
+                informative[f"%-{coin}roc-period_{t}"] = ta.ROC(informative, timeperiod=t)
+                macd = ta.MACD(informative, timeperiod=t)
+                informative[f"%-{coin}macd-period_{t}"] = macd["macd"]
+
+                informative[f"%-{coin}relative_volume-period_{t}"] = (
+                    informative["volume"] / informative["volume"].rolling(t).mean()
+                )
+
+            informative[f"%-{coin}pct-change"] = informative["close"].pct_change()
+            informative[f"%-{coin}raw_volume"] = informative["volume"]
+            informative[f"%-{coin}raw_price"] = informative["close"]
+
+            indicators = [col for col in informative if col.startswith("%")]
+            # This loop duplicates and shifts all indicators to add a sense of recency to data
+            for n in range(self.freqai_info["feature_parameters"]["shift"] + 1):
+                if n == 0:
+                    continue
+                informative_shift = informative[indicators].shift(n)
+                informative_shift = informative_shift.add_suffix("_shift-" + str(n))
+                informative = pd.concat((informative, informative_shift), axis=1)
+
+            df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True)
+            skip_columns = [
+                (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]
+            ]
+            df = df.drop(columns=skip_columns)
+
+            # Add generalized indicators here (because in live, it will call this
+            # function to populate indicators during training). Notice how we ensure not to
+            # add them multiple times
+            if set_generalized_indicators:
+                df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7
+                df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25
+
+                # user adds targets here by prepending them with &- (see convention below)
+                # If user wishes to use multiple targets, a multioutput prediction model
+                # needs to be used such as templates/CatboostPredictionMultiModel.py
+                df["&-s_close"] = (
+                    df["close"]
+                    .shift(-self.freqai_info["feature_parameters"]["period"])
+                    .rolling(self.freqai_info["feature_parameters"]["period"])
+                    .mean()
+                    / df["close"]
+                    - 1
+                )
+
+        return df
+
+    def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+
+        self.freqai_info = self.config["freqai"]
+        self.pair = metadata["pair"]
+        sgi = True
+        # the following loops are necessary for building the features
+        # indicated by the user in the configuration file.
+        # All indicators must be populated by populate_any_indicators() for live functionality
+        # to work correctly.
+        for tf in self.freqai_info["timeframes"]:
+            dataframe = self.populate_any_indicators(
+                metadata,
+                self.pair,
+                dataframe.copy(),
+                tf,
+                coin=self.pair.split("/")[0] + "-",
+                set_generalized_indicators=sgi,
+            )
+            sgi = False
+            for pair in self.freqai_info["corr_pairlist"]:
+                if metadata["pair"] in pair:
+                    continue  # do not include whitelisted pair twice if it is in corr_pairlist
+                dataframe = self.populate_any_indicators(
+                    metadata, pair, dataframe.copy(), tf, coin=pair.split("/")[0] + "-"
+                )
+
+        # the model will return 4 values, its prediction, an indication of whether or not the
+        # prediction should be accepted, the target mean/std values from the labels used during
+        # each training period.
+        dataframe = self.model.bridge.start(dataframe, metadata, self)
+
+        dataframe["target_roi"] = dataframe["&-s_close_mean"] + dataframe["&-s_close_std"] * 1.25
+        dataframe["sell_roi"] = dataframe["&-s_close_mean"] - dataframe["&-s_close_std"] * 1.25
+        return dataframe
+
+    def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
+
+        enter_long_conditions = [df["do_predict"] == 1, df["&-s_close"] > df["target_roi"]]
+
+        if enter_long_conditions:
+            df.loc[
+                reduce(lambda x, y: x & y, enter_long_conditions), ["enter_long", "enter_tag"]
+            ] = (1, "long")
+
+        enter_short_conditions = [df["do_predict"] == 1, df["&-s_close"] < df["sell_roi"]]
+
+        if enter_short_conditions:
+            df.loc[
+                reduce(lambda x, y: x & y, enter_short_conditions), ["enter_short", "enter_tag"]
+            ] = (1, "short")
+
+        return df
+
+    def populate_exit_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
+        exit_long_conditions = [df["do_predict"] == 1, df["&-s_close"] < df["sell_roi"] * 0.25]
+        if exit_long_conditions:
+            df.loc[reduce(lambda x, y: x & y, exit_long_conditions), "exit_long"] = 1
+
+        exit_short_conditions = [df["do_predict"] == 1, df["&-s_close"] > df["target_roi"] * 0.25]
+        if exit_short_conditions:
+            df.loc[reduce(lambda x, y: x & y, exit_short_conditions), "exit_short"] = 1
+
+        return df
+
+    def get_ticker_indicator(self):
+        return int(self.config["timeframe"][:-1])
+
+    def custom_exit(
+        self, pair: str, trade: Trade, current_time, current_rate, current_profit, **kwargs
+    ):
+
+        dataframe, _ = self.dp.get_analyzed_dataframe(pair=pair, timeframe=self.timeframe)
+
+        trade_date = timeframe_to_prev_date(self.config["timeframe"], trade.open_date_utc)
+        trade_candle = dataframe.loc[(dataframe["date"] == trade_date)]
+
+        if trade_candle.empty:
+            return None
+        trade_candle = trade_candle.squeeze()
+
+        follow_mode = self.config.get("freqai", {}).get("follow_mode", False)
+
+        if not follow_mode:
+            pair_dict = self.model.bridge.dd.pair_dict
+        else:
+            pair_dict = self.model.bridge.dd.follower_dict
+
+        entry_tag = trade.enter_tag
+
+        if (
+            "prediction" + entry_tag not in pair_dict[pair]
+            or pair_dict[pair]["prediction" + entry_tag] > 0
+        ):
+            with self.model.bridge.lock:
+                pair_dict[pair]["prediction" + entry_tag] = abs(trade_candle["&-s_close"])
+                if not follow_mode:
+                    self.model.bridge.dd.save_drawer_to_disk()
+                else:
+                    self.model.bridge.dd.save_follower_dict_to_disk()
+
+        roi_price = pair_dict[pair]["prediction" + entry_tag]
+        roi_time = self.max_roi_time_long.value
+
+        roi_decay = roi_price * (
+            1 - ((current_time - trade.open_date_utc).seconds) / (roi_time * 60)
+        )
+        if roi_decay < 0:
+            roi_decay = self.linear_roi_offset.value
+        else:
+            roi_decay += self.linear_roi_offset.value
+
+        if current_profit > roi_decay:
+            return "roi_custom_win"
+
+        if current_profit < -roi_decay:
+            return "roi_custom_loss"
+
+    def confirm_trade_exit(
+        self,
+        pair: str,
+        trade: Trade,
+        order_type: str,
+        amount: float,
+        rate: float,
+        time_in_force: str,
+        exit_reason: str,
+        current_time,
+        **kwargs,
+    ) -> bool:
+
+        entry_tag = trade.enter_tag
+        follow_mode = self.config.get("freqai", {}).get("follow_mode", False)
+        if not follow_mode:
+            pair_dict = self.model.bridge.dd.pair_dict
+        else:
+            pair_dict = self.model.bridge.dd.follower_dict
+
+        with self.model.bridge.lock:
+            pair_dict[pair]["prediction" + entry_tag] = 0
+            if not follow_mode:
+                self.model.bridge.dd.save_drawer_to_disk()
+            else:
+                self.model.bridge.dd.save_follower_dict_to_disk()
+
+        return True
+
+    def confirm_trade_entry(
+        self,
+        pair: str,
+        order_type: str,
+        amount: float,
+        rate: float,
+        time_in_force: str,
+        current_time,
+        entry_tag,
+        side: str,
+        **kwargs,
+    ) -> bool:
+
+        df, _ = self.dp.get_analyzed_dataframe(pair, self.timeframe)
+        last_candle = df.iloc[-1].squeeze()
+
+        if side == "long":
+            if rate > (last_candle["close"] * (1 + 0.0025)):
+                return False
+        else:
+            if rate < (last_candle["close"] * (1 - 0.0025)):
+                return False
+
+        return True
diff --git a/mkdocs.yml b/mkdocs.yml
index a43322f78..18744e0d5 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -35,6 +35,7 @@ nav:
         - Edge Positioning: edge.md
         - Advanced Strategy: strategy-advanced.md
         - Advanced Hyperopt: advanced-hyperopt.md
+        - Freqai: freqai.md
         - Sandbox Testing: sandbox-testing.md
     - FAQ: faq.md
     - SQL Cheat-sheet: sql_cheatsheet.md
diff --git a/requirements-freqai.txt b/requirements-freqai.txt
new file mode 100644
index 000000000..a06a41b96
--- /dev/null
+++ b/requirements-freqai.txt
@@ -0,0 +1,9 @@
+# Include all requirements to run the bot.
+-r requirements.txt
+
+# Required for freqai
+scikit-learn==1.0.2
+scikit-optimize==0.9.0
+joblib==1.1.0
+catboost==1.0.4
+lightgbm==3.3.2
diff --git a/user_data/freqaimodels/.gitkeep b/user_data/freqaimodels/.gitkeep
new file mode 100644
index 000000000..e69de29bb