alleviate FutureWarning in sklearn about ensuring svm model features are passed with identical order

This commit is contained in:
robcaulk 2022-05-24 14:46:16 +02:00
parent 255d35976e
commit 31ae2b3060
4 changed files with 61 additions and 11 deletions

View File

@ -105,11 +105,11 @@ config setup includes:
### Building the feature set ### Building the feature set
Most of these parameters are controlling the feature data set. Features are added by the user Features are added by the user inside the `populate_any_indicators()` method of the strategy
inside the `populate_any_indicators()` method of the strategy by prepending indicators with `%`: by prepending indicators with `%`:
```python ```python
def populate_any_indicators(self, pair, df, tf, informative=None, coin=""): def populate_any_indicators(self, metadata, pair, df, tf, informative=None, coin=""):
informative['%-''%-' + coin + "rsi"] = ta.RSI(informative, timeperiod=14) informative['%-''%-' + coin + "rsi"] = ta.RSI(informative, timeperiod=14)
informative['%-' + coin + "mfi"] = ta.MFI(informative, timeperiod=25) informative['%-' + coin + "mfi"] = ta.MFI(informative, timeperiod=25)
informative['%-' + coin + "adx"] = ta.ADX(informative, window=20) informative['%-' + coin + "adx"] = ta.ADX(informative, window=20)
@ -120,11 +120,46 @@ inside the `populate_any_indicators()` method of the strategy by prepending indi
informative['%-' + coin + "bb_width"] = ( informative['%-' + coin + "bb_width"] = (
informative[coin + "bb_upperband"] - informative[coin + "bb_lowerband"] informative[coin + "bb_upperband"] - informative[coin + "bb_lowerband"]
) / informative[coin + "bb_middleband"] ) / informative[coin + "bb_middleband"]
# The following code automatically adds features according to the `shift` parameter passed
# in the config. Do not remove
indicators = [col for col in informative if col.startswith('%')]
for n in range(self.freqai_info["feature_parameters"]["shift"] + 1):
if n == 0:
continue
informative_shift = informative[indicators].shift(n)
informative_shift = informative_shift.add_suffix("_shift-" + str(n))
informative = pd.concat((informative, informative_shift), axis=1)
# The following code safely merges into the base timeframe.
# Do not remove.
df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True)
skip_columns = [(s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]]
df = df.drop(columns=skip_columns)
``` ```
The user of the present example does not want to pass the `bb_lowerband` as a feature to the model, The user of the present example does not want to pass the `bb_lowerband` as a feature to the model,
and has therefore not prepended it with `%`. The user does, however, wish to pass `bb_width` to the and has therefore not prepended it with `%`. The user does, however, wish to pass `bb_width` to the
model for training/prediction and has therfore prepended it with `%`._ model for training/prediction and has therfore prepended it with `%`._
Note: features **must** be defined in `populate_any_indicators()`. Making features in `populate_indicators()`
will fail in live/dry. If the user wishes to add generalized features that are not associated with
a specific pair or timeframe, they should use the following structure inside `populate_any_indicators()`
(as exemplified in `freqtrade/templates/FreqaiExampleStrategy.py`:
```python
def populate_any_indicators(self, metadata, pair, df, tf, informative=None, coin=""):
# Add generalized indicators here (because in live, it will call only this function to populate
# indicators for retraining). Notice how we ensure not to add them multiple times by associating
# these generalized indicators to the basepair/timeframe
if pair == metadata['pair'] and tf == self.timeframe:
df['%-day_of_week'] = (df["date"].dt.dayofweek + 1) / 7
df['%-hour_of_day'] = (df['date'].dt.hour + 1) / 25
(Please see the example script located in `freqtrade/templates/FreqaiExampleStrategy.py` for a full example of `populate_any_indicators()`) (Please see the example script located in `freqtrade/templates/FreqaiExampleStrategy.py` for a full example of `populate_any_indicators()`)
The `timeframes` from the example config above are the timeframes of each `populate_any_indicator()` The `timeframes` from the example config above are the timeframes of each `populate_any_indicator()`

View File

@ -823,7 +823,9 @@ class FreqaiDataKitchen:
pairs = self.freqai_config.get("corr_pairlist", []) pairs = self.freqai_config.get("corr_pairlist", [])
for tf in self.freqai_config.get("timeframes"): for tf in self.freqai_config.get("timeframes"):
dataframe = strategy.populate_any_indicators(metadata['pair'], dataframe = strategy.populate_any_indicators(
metadata,
metadata['pair'],
dataframe.copy(), dataframe.copy(),
tf, tf,
base_dataframes[tf], base_dataframes[tf],
@ -833,7 +835,9 @@ class FreqaiDataKitchen:
for i in pairs: for i in pairs:
if metadata['pair'] in i: if metadata['pair'] in i:
continue # dont repeat anything from whitelist continue # dont repeat anything from whitelist
dataframe = strategy.populate_any_indicators(i, dataframe = strategy.populate_any_indicators(
metadata,
i,
dataframe.copy(), dataframe.copy(),
tf, tf,
corr_dataframes[i][tf], corr_dataframes[i][tf],

View File

@ -532,7 +532,7 @@ class IStrategy(ABC, HyperStrategyMixin):
""" """
return None return None
def populate_any_indicators(self, pair: str, df: DataFrame, tf: str, def populate_any_indicators(self, metadata: dict, pair: str, df: DataFrame, tf: str,
informative: DataFrame = None, coin: str = "") -> DataFrame: informative: DataFrame = None, coin: str = "") -> DataFrame:
""" """
Function designed to automatically generate, name and merge features Function designed to automatically generate, name and merge features

View File

@ -63,7 +63,7 @@ class FreqaiExampleStrategy(IStrategy):
def bot_start(self): def bot_start(self):
self.model = CustomModel(self.config) self.model = CustomModel(self.config)
def populate_any_indicators(self, pair, df, tf, informative=None, coin=""): def populate_any_indicators(self, metadata, pair, df, tf, informative=None, coin=""):
""" """
Function designed to automatically generate, name and merge features Function designed to automatically generate, name and merge features
from user indicated timeframes in the configuration file. User controls the indicators from user indicated timeframes in the configuration file. User controls the indicators
@ -124,8 +124,9 @@ class FreqaiExampleStrategy(IStrategy):
informative[coin + "pct-change"] = informative["close"].pct_change() informative[coin + "pct-change"] = informative["close"].pct_change()
# The following code automatically adds features according to the `shift` parameter passed
# in the config. Do not remove
indicators = [col for col in informative if col.startswith('%')] indicators = [col for col in informative if col.startswith('%')]
for n in range(self.freqai_info["feature_parameters"]["shift"] + 1): for n in range(self.freqai_info["feature_parameters"]["shift"] + 1):
if n == 0: if n == 0:
continue continue
@ -133,28 +134,38 @@ class FreqaiExampleStrategy(IStrategy):
informative_shift = informative_shift.add_suffix("_shift-" + str(n)) informative_shift = informative_shift.add_suffix("_shift-" + str(n))
informative = pd.concat((informative, informative_shift), axis=1) informative = pd.concat((informative, informative_shift), axis=1)
# The following code safely merges into the base timeframe.
# Do not remove.
df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True) df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True)
skip_columns = [(s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]] skip_columns = [(s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]]
df = df.drop(columns=skip_columns) df = df.drop(columns=skip_columns)
# Add generalized indicators (not associated to any individual coin or timeframe) here
# because in live, it will call this function to populate
# indicators during training. Notice how we ensure not to add them multiple times
if pair == metadata['pair'] and tf == self.timeframe:
df['%-day_of_week'] = (df["date"].dt.dayofweek + 1) / 7
df['%-hour_of_day'] = (df['date'].dt.hour + 1) / 25
return df return df
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
# the configuration file parameters are stored here
self.freqai_info = self.config["freqai"] self.freqai_info = self.config["freqai"]
self.pair = metadata['pair'] self.pair = metadata['pair']
# the following loops are necessary for building the features # the following loops are necessary for building the features
# indicated by the user in the configuration file. # indicated by the user in the configuration file.
# All indicators must be populated by populate_any_indicators() for live functionality
# to work correctly.
for tf in self.freqai_info["timeframes"]: for tf in self.freqai_info["timeframes"]:
dataframe = self.populate_any_indicators(self.pair, dataframe.copy(), tf, dataframe = self.populate_any_indicators(metadata, self.pair, dataframe.copy(), tf,
coin=self.pair.split("/")[0] + "-") coin=self.pair.split("/")[0] + "-")
for pair in self.freqai_info["corr_pairlist"]: for pair in self.freqai_info["corr_pairlist"]:
if metadata['pair'] in pair: if metadata['pair'] in pair:
continue # do not include whitelisted pair twice if it is in corr_pairlist continue # do not include whitelisted pair twice if it is in corr_pairlist
dataframe = self.populate_any_indicators( dataframe = self.populate_any_indicators(
pair, dataframe.copy(), tf, coin=pair.split("/")[0] + "-" metadata, pair, dataframe.copy(), tf, coin=pair.split("/")[0] + "-"
) )
# the model will return 4 values, its prediction, an indication of whether or not the # the model will return 4 values, its prediction, an indication of whether or not the