fix bug for target_mean/std array merging in backtesting

This commit is contained in:
robcaulk 2022-05-26 21:07:50 +02:00
parent ff531c416f
commit 6193205012
6 changed files with 186 additions and 110 deletions

View File

@ -1,7 +1,7 @@
{
"max_open_trades": 1,
"stake_currency": "USDT",
"stake_amount": 800,
"stake_amount": 900,
"tradable_balance_ratio": 1,
"fiat_display_currency": "USD",
"dry_run": true,
@ -24,8 +24,7 @@
"rateLimit": 200
},
"pair_whitelist": [
"BTC/USDT",
"ETH/USDT"
"BTC/USDT"
],
"pair_blacklist": []
},
@ -55,7 +54,7 @@
"15m"
],
"train_period": 30,
"backtest_period": 10,
"backtest_period": 7,
"identifier": "example",
"live_trained_timestamp": 0,
"corr_pairlist": [
@ -64,16 +63,16 @@
"DOT/USDT"
],
"feature_parameters": {
"period": 12,
"period": 24,
"shift": 1,
"DI_threshold": 1,
"weight_factor": 0,
"DI_threshold": 0,
"weight_factor": 0.9,
"principal_component_analysis": false,
"use_SVM_to_remove_outliers": false,
"stratify": 0
"use_SVM_to_remove_outliers": true,
"stratify": 3
},
"data_split_parameters": {
"test_size": 0.25,
"test_size": 0.33,
"random_state": 1
},
"model_training_parameters": {

View File

@ -221,33 +221,43 @@ This way, the user can return to using any model they wish by simply changing th
### Building a freqai strategy
The Freqai strategy requires the user to include the following lines of code in `populate_ any _indicators()`
The Freqai strategy requires the user to include the following lines of code in the strategy:
```python
from freqtrade.freqai.strategy_bridge import CustomModel
from freqtrade.freqai.strategy_bridge import CustomModel
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
# the configuration file parameters are stored here
self.freqai_info = self.config['freqai']
def informative_pairs(self):
whitelist_pairs = self.dp.current_whitelist()
corr_pairs = self.config["freqai"]["corr_pairlist"]
informative_pairs = []
for tf in self.config["freqai"]["timeframes"]:
for pair in whitelist_pairs:
informative_pairs.append((pair, tf))
for pair in corr_pairs:
if pair in whitelist_pairs:
continue # avoid duplication
informative_pairs.append((pair, tf))
return informative_pairs
# the model is instantiated here
self.model = CustomModel(self.config)
def bot_start(self):
self.model = CustomModel(self.config)
print('Populating indicators...')
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
self.freqai_info = self.config['freqai']
# the following loops are necessary for building the features
# indicated by the user in the configuration file.
for tf in self.freqai_info['timeframes']:
for i in self.freqai_info['corr_pairlist']:
dataframe = self.populate_any_indicators(i,
dataframe.copy(), tf, coin=i.split("/")[0]+'-')
# the following loops are necessary for building the features
# indicated by the user in the configuration file.
for tf in self.freqai_info['timeframes']:
for i in self.freqai_info['corr_pairlist']:
dataframe = self.populate_any_indicators(i,
dataframe.copy(), tf, coin=i.split("/")[0]+'-')
# the model will return 4 values, its prediction, an indication of whether or not the prediction
# should be accepted, the target mean/std values from the labels used during each training period.
(dataframe['prediction'], dataframe['do_predict'],
dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata)
# the model will return 4 values, its prediction, an indication of whether or not the prediction
# should be accepted, the target mean/std values from the labels used during each training period.
(dataframe['prediction'], dataframe['do_predict'],
dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata)
return dataframe
return dataframe
```
The user should also include `populate_any_indicators()` from `templates/FreqaiExampleStrategy.py` which builds
@ -314,7 +324,7 @@ data point and all other training data points:
$$ d_{ab} = \sqrt{\sum_{j=1}^p(X_{a,j}-X_{b,j})^2} $$
where $d_{ab}$ is the distance between the standardized points $a$ and $b$. $p$
where $d_{ab}$ is the distance between the normalized points $a$ and $b$. $p$
is the number of features i.e. the length of the vector $X$. The
characteristic distance, $\overline{d}$ for a set of training data points is simply the mean
of the average distances:
@ -392,13 +402,63 @@ The user can stratify the training/testing data using:
which will split the data chronolocially so that every X data points is a testing data point. In the
present example, the user is asking for every third data point in the dataframe to be used for
testing, the other points are used for training.
testing, the other points are used for training.
<!-- ## Dynamic target expectation
The labels used for model training have a unique statistical distribution for each separate model training.
We can use this information to know if our current prediction is in the realm of what the model was trained on,
and if so, what is the statistical probability of the current prediction. With this information, we can
make more informed prediction._
FreqAI builds this label distribution and provides a quantile to the strategy, which can be optionally used as a
dynamic threshold. The `target_quantile: X` means that X% of the labels are below this value. So setting:
```json
"freqai": {
"feature_parameters" : {
"target_quantile": 0.9
}
}
```
Means the user will get back in the strategy the label threshold at which 90% of the labels were
below this value. An example usage in the strategy may look something like:
```python
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
# ... #
(
dataframe["prediction"],
dataframe["do_predict"],
dataframe["target_upper_quantile"],
dataframe["target_lower_quantile"],
) = self.model.bridge.start(dataframe, metadata, self)
return dataframe
def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
buy_conditions = [
(dataframe["prediction"] > dataframe["target_upper_quantile"]) & (dataframe["do_predict"] == 1)
]
if buy_conditions:
dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), "buy"] = 1
return dataframe
``` -->
## Additional information
### Feature standardization
### Feature normalization
The feature set created by the user is automatically standardized to the training
The feature set created by the user is automatically normalized to the training
data only. This includes all test data and unseen prediction data (dry/live/backtest).
### File structure

View File

@ -141,9 +141,9 @@ class FreqaiDataKitchen:
:model: User trained model which can be inferenced for new predictions
"""
# if self.live:
self.model_filename = self.data_drawer.pair_dict[coin]['model_filename']
self.data_path = Path(self.data_drawer.pair_dict[coin]['data_path'])
if self.live:
self.model_filename = self.data_drawer.pair_dict[coin]['model_filename']
self.data_path = Path(self.data_drawer.pair_dict[coin]['data_path'])
with open(self.data_path / str(self.model_filename + "_metadata.json"), "r") as fp:
self.data = json.load(fp)
@ -329,42 +329,6 @@ class FreqaiDataKitchen:
:data_dictionary: updated dictionary with standardized values.
"""
# standardize the data by training stats
train_mean = data_dictionary["train_features"].mean()
train_std = data_dictionary["train_features"].std()
data_dictionary["train_features"] = (
data_dictionary["train_features"] - train_mean
) / train_std
data_dictionary["test_features"] = (
data_dictionary["test_features"] - train_mean
) / train_std
train_labels_std = data_dictionary["train_labels"].std()
train_labels_mean = data_dictionary["train_labels"].mean()
data_dictionary["train_labels"] = (
data_dictionary["train_labels"] - train_labels_mean
) / train_labels_std
data_dictionary["test_labels"] = (
data_dictionary["test_labels"] - train_labels_mean
) / train_labels_std
for item in train_std.keys():
self.data[item + "_std"] = train_std[item]
self.data[item + "_mean"] = train_mean[item]
self.data["labels_std"] = train_labels_std
self.data["labels_mean"] = train_labels_mean
return data_dictionary
def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
"""
Standardize all data in the data_dictionary according to the training dataset
:params:
:data_dictionary: dictionary containing the cleaned and split training/test data/labels
:returns:
:data_dictionary: updated dictionary with standardized values.
"""
# standardize the data by training stats
train_max = data_dictionary["train_features"].max()
train_min = data_dictionary["train_features"].min()
data_dictionary["train_features"] = 2 * (
@ -392,9 +356,9 @@ class FreqaiDataKitchen:
return data_dictionary
def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
"""
Standardizes a set of data using the mean and standard deviation from
Normalize a set of data using the mean and standard deviation from
the associated training data.
:params:
:df: Dataframe to be standardized
@ -406,19 +370,6 @@ class FreqaiDataKitchen:
return df
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
"""
Normalizes a set of data using the mean and standard deviation from
the associated training data.
:params:
:df: Dataframe to be standardized
"""
for item in df.keys():
df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]
return df
def split_timerange(
self, tr: str, train_split: int = 28, bt_split: int = 7
) -> Tuple[list, list]:
@ -657,12 +608,12 @@ class FreqaiDataKitchen:
"""
ones = np.ones(len_dataframe)
s_mean, s_std = ones * self.data["s_mean"], ones * self.data["s_std"]
target_mean, target_std = ones * self.data["target_mean"], ones * self.data["target_std"]
self.full_predictions = np.append(self.full_predictions, predictions)
self.full_do_predict = np.append(self.full_do_predict, do_predict)
self.full_target_mean = np.append(self.full_target_mean, s_mean)
self.full_target_std = np.append(self.full_target_std, s_std)
self.full_target_mean = np.append(self.full_target_mean, target_mean)
self.full_target_std = np.append(self.full_target_std, target_std)
return
@ -827,6 +778,23 @@ class FreqaiDataKitchen:
return dataframe
def fit_labels(self) -> None:
import scipy as spy
f = spy.stats.norm.fit(self.data_dictionary["train_labels"])
# KEEPME incase we want to let user start to grab quantiles.
# upper_q = spy.stats.norm.ppf(self.freqai_config['feature_parameters'][
# 'target_quantile'], *f)
# lower_q = spy.stats.norm.ppf(1 - self.freqai_config['feature_parameters'][
# 'target_quantile'], *f)
self.data["target_mean"], self.data["target_std"] = f[0], f[1]
# self.data["upper_quantile"] = upper_q
# self.data["lower_quantile"] = lower_q
return
def np_encoder(self, object):
if isinstance(object, np.generic):
return object.item()
@ -968,3 +936,52 @@ class FreqaiDataKitchen:
# )
# return
# def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
# """
# standardize all data in the data_dictionary according to the training dataset
# :params:
# :data_dictionary: dictionary containing the cleaned and split training/test data/labels
# :returns:
# :data_dictionary: updated dictionary with standardized values.
# """
# # standardize the data by training stats
# train_mean = data_dictionary["train_features"].mean()
# train_std = data_dictionary["train_features"].std()
# data_dictionary["train_features"] = (
# data_dictionary["train_features"] - train_mean
# ) / train_std
# data_dictionary["test_features"] = (
# data_dictionary["test_features"] - train_mean
# ) / train_std
# train_labels_std = data_dictionary["train_labels"].std()
# train_labels_mean = data_dictionary["train_labels"].mean()
# data_dictionary["train_labels"] = (
# data_dictionary["train_labels"] - train_labels_mean
# ) / train_labels_std
# data_dictionary["test_labels"] = (
# data_dictionary["test_labels"] - train_labels_mean
# ) / train_labels_std
# for item in train_std.keys():
# self.data[item + "_std"] = train_std[item]
# self.data[item + "_mean"] = train_mean[item]
# self.data["labels_std"] = train_labels_std
# self.data["labels_mean"] = train_labels_mean
# return data_dictionary
# def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
# """
# Normalizes a set of data using the mean and standard deviation from
# the associated training data.
# :params:
# :df: Dataframe to be standardized
# """
# for item in df.keys():
# df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]
# return df

View File

@ -158,12 +158,7 @@ class IFreqaiModel(ABC):
else:
self.model = dh.load_data(metadata['pair'])
# strategy_provided_features = self.dh.find_features(dataframe_train)
# # FIXME doesnt work with PCA
# if strategy_provided_features != self.dh.training_features_list:
# logger.info("User changed input features, retraining model.")
# self.model = self.train(dataframe_train, metadata)
# self.dh.save_data(self.model)
self.check_if_feature_list_matches_strategy(dataframe_train, dh)
preds, do_preds = self.predict(dataframe_backtest, dh)
@ -220,16 +215,23 @@ class IFreqaiModel(ABC):
self.model = dh.load_data(coin=metadata['pair'])
# FIXME
# strategy_provided_features = dh.find_features(dataframe)
# if strategy_provided_features != dh.training_features_list:
# self.train_model_in_series(new_trained_timerange, metadata, strategy)
self.check_if_feature_list_matches_strategy(dataframe, dh)
preds, do_preds = self.predict(dataframe, dh)
dh.append_predictions(preds, do_preds, len(dataframe))
return dh
def check_if_feature_list_matches_strategy(self, dataframe: DataFrame,
dh: FreqaiDataKitchen) -> None:
strategy_provided_features = dh.find_features(dataframe)
if strategy_provided_features != dh.training_features_list:
raise OperationalException("Trying to access pretrained model with `identifier` "
"but found different features furnished by current strategy."
"Change `identifer` to train from scratch, or ensure the"
"strategy is furnishing the same features as the pretrained"
"model")
def data_cleaning_train(self, dh: FreqaiDataKitchen) -> None:
"""
Base data cleaning method for train
@ -237,6 +239,7 @@ class IFreqaiModel(ABC):
based on user decided logic. See FreqaiDataKitchen::remove_outliers() for an example
of how outlier data points are dropped from the dataframe used for training.
"""
if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'):
dh.principal_component_analysis()

View File

@ -33,10 +33,6 @@ class CatboostPredictionModel(IFreqaiModel):
/ dataframe["close"]
- 1
)
dh.data["s_mean"] = dataframe["s"].mean()
dh.data["s_std"] = dataframe["s"].std()
# logger.info("label mean", dh.data["s_mean"], "label std", dh.data["s_std"])
return dataframe["s"]
@ -68,8 +64,9 @@ class CatboostPredictionModel(IFreqaiModel):
# split data into train/test data.
data_dictionary = dh.make_train_test_datasets(features_filtered, labels_filtered)
# standardize all data based on train_dataset only
data_dictionary = dh.standardize_data(data_dictionary)
dh.fit_labels() # fit labels to a cauchy distribution so we know what to expect in strategy
# normalize all data based on train_dataset only
data_dictionary = dh.normalize_data(data_dictionary)
# optional additional data cleaning/analysis
self.data_cleaning_train(dh)
@ -128,7 +125,7 @@ class CatboostPredictionModel(IFreqaiModel):
filtered_dataframe, _ = dh.filter_features(
unfiltered_dataframe, original_feature_list, training_filter=False
)
filtered_dataframe = dh.standardize_data_from_metadata(filtered_dataframe)
filtered_dataframe = dh.normalize_data_from_metadata(filtered_dataframe)
dh.data_dictionary["prediction_features"] = filtered_dataframe
# optional additional data cleaning/analysis
@ -136,7 +133,7 @@ class CatboostPredictionModel(IFreqaiModel):
predictions = self.model.predict(dh.data_dictionary["prediction_features"])
# compute the non-standardized predictions
# compute the non-normalized predictions
dh.predictions = (predictions + 1) * (dh.data["labels_max"] -
dh.data["labels_min"]) / 2 + dh.data["labels_min"]

View File

@ -178,8 +178,8 @@ class FreqaiExampleStrategy(IStrategy):
dataframe["target_std"],
) = self.model.bridge.start(dataframe, metadata, self)
dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 1.5
dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1
dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"]
dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"]
return dataframe
def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: