Merge pull request #7701 from freqtrade/add-single-precision-freqai

add option to force single precision
This commit is contained in:
Matthias 2022-11-14 07:02:35 +01:00 committed by GitHub
commit 95fd4072fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 76 additions and 9 deletions

View File

@ -204,6 +204,7 @@
"strategy_path": "user_data/strategies/", "strategy_path": "user_data/strategies/",
"recursive_strategy_search": false, "recursive_strategy_search": false,
"add_config_files": [], "add_config_files": [],
"reduce_df_footprint": false,
"dataformat_ohlcv": "json", "dataformat_ohlcv": "json",
"dataformat_trades": "jsongz" "dataformat_trades": "jsongz"
} }

View File

@ -253,6 +253,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi
| `add_config_files` | Additional config files. These files will be loaded and merged with the current config file. The files are resolved relative to the initial file.<br> *Defaults to `[]`*. <br> **Datatype:** List of strings | `add_config_files` | Additional config files. These files will be loaded and merged with the current config file. The files are resolved relative to the initial file.<br> *Defaults to `[]`*. <br> **Datatype:** List of strings
| `dataformat_ohlcv` | Data format to use to store historical candle (OHLCV) data. <br> *Defaults to `json`*. <br> **Datatype:** String | `dataformat_ohlcv` | Data format to use to store historical candle (OHLCV) data. <br> *Defaults to `json`*. <br> **Datatype:** String
| `dataformat_trades` | Data format to use to store historical trades data. <br> *Defaults to `jsongz`*. <br> **Datatype:** String | `dataformat_trades` | Data format to use to store historical trades data. <br> *Defaults to `jsongz`*. <br> **Datatype:** String
| `reduce_df_footprint` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage (and decreasing train/inference timing in FreqAI). (Currently only affects FreqAI use-cases) <br> **Datatype:** Boolean. <br> Default: `False`.
### Parameters in the strategy ### Parameters in the strategy

View File

@ -50,3 +50,4 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
| | **Extraneous parameters** | | **Extraneous parameters**
| `keras` | If the selected model makes use of Keras (typical for Tensorflow-based prediction models), this flag needs to be activated so that the model save/loading follows Keras standards. <br> **Datatype:** Boolean. <br> Default: `False`. | `keras` | If the selected model makes use of Keras (typical for Tensorflow-based prediction models), this flag needs to be activated so that the model save/loading follows Keras standards. <br> **Datatype:** Boolean. <br> Default: `False`.
| `conv_width` | The width of a convolutional neural network input tensor. This replaces the need for shifting candles (`include_shifted_candles`) by feeding in historical data points as the second dimension of the tensor. Technically, this parameter can also be used for regressors, but it only adds computational overhead and does not change the model training/prediction. <br> **Datatype:** Integer. <br> Default: `2`. | `conv_width` | The width of a convolutional neural network input tensor. This replaces the need for shifting candles (`include_shifted_candles`) by feeding in historical data points as the second dimension of the tensor. Technically, this parameter can also be used for regressors, but it only adds computational overhead and does not change the model training/prediction. <br> **Datatype:** Integer. <br> Default: `2`.
| `reduce_df_footprint` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage and decreasing train/inference timing. This parameter is set in the main level of the Freqtrade configuration file (not inside FreqAI). <br> **Datatype:** Boolean. <br> Default: `False`.

View File

@ -159,6 +159,7 @@ CONF_SCHEMA = {
'ignore_buying_expired_candle_after': {'type': 'number'}, 'ignore_buying_expired_candle_after': {'type': 'number'},
'trading_mode': {'type': 'string', 'enum': TRADING_MODES}, 'trading_mode': {'type': 'string', 'enum': TRADING_MODES},
'margin_mode': {'type': 'string', 'enum': MARGIN_MODES}, 'margin_mode': {'type': 'string', 'enum': MARGIN_MODES},
'reduce_df_footprint': {'type': 'boolean', 'default': False},
'liquidation_buffer': {'type': 'number', 'minimum': 0.0, 'maximum': 0.99}, 'liquidation_buffer': {'type': 'number', 'minimum': 0.0, 'maximum': 0.99},
'backtest_breakdown': { 'backtest_breakdown': {
'type': 'array', 'type': 'array',

View File

@ -7,6 +7,7 @@ from datetime import datetime, timezone
from operator import itemgetter from operator import itemgetter
from typing import Dict, List from typing import Dict, List
import numpy as np
import pandas as pd import pandas as pd
from pandas import DataFrame, to_datetime from pandas import DataFrame, to_datetime
@ -313,3 +314,29 @@ def convert_ohlcv_format(
if erase and convert_from != convert_to: if erase and convert_from != convert_to:
logger.info(f"Deleting source data for {pair} / {timeframe}") logger.info(f"Deleting source data for {pair} / {timeframe}")
src.ohlcv_purge(pair=pair, timeframe=timeframe, candle_type=candle_type) src.ohlcv_purge(pair=pair, timeframe=timeframe, candle_type=candle_type)
def reduce_dataframe_footprint(df: DataFrame) -> DataFrame:
"""
Ensure all values are float32 in the incoming dataframe.
:param df: Dataframe to be converted to float/int 32s
:return: Dataframe converted to float/int 32s
"""
logger.debug(f"Memory usage of dataframe is "
f"{df.memory_usage().sum() / 1024**2:.2f} MB")
df_dtypes = df.dtypes
for column, dtype in df_dtypes.items():
if column in ['open', 'high', 'low', 'close', 'volume']:
continue
if dtype == np.float64:
df_dtypes[column] = np.float32
elif dtype == np.int64:
df_dtypes[column] = np.int32
df = df.astype(df_dtypes)
logger.debug(f"Memory usage after optimization is: "
f"{df.memory_usage().sum() / 1024**2:.2f} MB")
return df

View File

@ -19,6 +19,7 @@ from sklearn.neighbors import NearestNeighbors
from freqtrade.configuration import TimeRange from freqtrade.configuration import TimeRange
from freqtrade.constants import Config from freqtrade.constants import Config
from freqtrade.data.converter import reduce_dataframe_footprint
from freqtrade.exceptions import OperationalException from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds from freqtrade.exchange import timeframe_to_seconds
from freqtrade.strategy.interface import IStrategy from freqtrade.strategy.interface import IStrategy
@ -1275,6 +1276,9 @@ class FreqaiDataKitchen:
dataframe = self.remove_special_chars_from_feature_names(dataframe) dataframe = self.remove_special_chars_from_feature_names(dataframe)
if self.config.get('reduce_df_footprint', False):
dataframe = reduce_dataframe_footprint(dataframe)
return dataframe return dataframe
def fit_labels(self) -> None: def fit_labels(self) -> None:

View File

@ -3,18 +3,19 @@ import logging
from pathlib import Path from pathlib import Path
from shutil import copyfile from shutil import copyfile
import numpy as np
import pytest import pytest
from freqtrade.configuration.timerange import TimeRange from freqtrade.configuration.timerange import TimeRange
from freqtrade.data.converter import (convert_ohlcv_format, convert_trades_format, from freqtrade.data.converter import (convert_ohlcv_format, convert_trades_format,
ohlcv_fill_up_missing_data, ohlcv_to_dataframe, ohlcv_fill_up_missing_data, ohlcv_to_dataframe,
trades_dict_to_list, trades_remove_duplicates, reduce_dataframe_footprint, trades_dict_to_list,
trades_to_ohlcv, trim_dataframe) trades_remove_duplicates, trades_to_ohlcv, trim_dataframe)
from freqtrade.data.history import (get_timerange, load_data, load_pair_history, from freqtrade.data.history import (get_timerange, load_data, load_pair_history,
validate_backtest_data) validate_backtest_data)
from freqtrade.data.history.idatahandler import IDataHandler from freqtrade.data.history.idatahandler import IDataHandler
from freqtrade.enums import CandleType from freqtrade.enums import CandleType
from tests.conftest import log_has, log_has_re from tests.conftest import generate_test_data, log_has, log_has_re
from tests.data.test_history import _clean_test_file from tests.data.test_history import _clean_test_file
@ -344,3 +345,33 @@ def test_convert_ohlcv_format(default_conf, testdatadir, tmpdir, file_base, cand
assert file.exists() assert file.exists()
for file in (files_new): for file in (files_new):
assert not file.exists() assert not file.exists()
def test_reduce_dataframe_footprint():
data = generate_test_data('15m', 40)
data['open_copy'] = data['open']
data['close_copy'] = data['close']
data['close_copy'] = data['close']
assert data['open'].dtype == np.float64
assert data['open_copy'].dtype == np.float64
assert data['close_copy'].dtype == np.float64
df2 = reduce_dataframe_footprint(data)
# Does not modify original dataframe
assert data['open'].dtype == np.float64
assert data['open_copy'].dtype == np.float64
assert data['close_copy'].dtype == np.float64
# skips ohlcv columns
assert df2['open'].dtype == np.float64
assert df2['high'].dtype == np.float64
assert df2['low'].dtype == np.float64
assert df2['close'].dtype == np.float64
assert df2['volume'].dtype == np.float64
# Changes dtype of returned dataframe
assert df2['open_copy'].dtype == np.float32
assert df2['close_copy'].dtype == np.float32

View File

@ -27,13 +27,13 @@ def is_mac() -> bool:
return "Darwin" in machine return "Darwin" in machine
@pytest.mark.parametrize('model, pca, dbscan', [ @pytest.mark.parametrize('model, pca, dbscan, float32', [
('LightGBMRegressor', True, False), ('LightGBMRegressor', True, False, True),
('XGBoostRegressor', False, True), ('XGBoostRegressor', False, True, False),
('XGBoostRFRegressor', False, False), ('XGBoostRFRegressor', False, False, False),
('CatboostRegressor', False, False), ('CatboostRegressor', False, False, False),
]) ])
def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, dbscan): def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, dbscan, float32):
if is_arm() and model == 'CatboostRegressor': if is_arm() and model == 'CatboostRegressor':
pytest.skip("CatBoost is not supported on ARM") pytest.skip("CatBoost is not supported on ARM")
@ -43,6 +43,7 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
freqai_conf.update({"strategy": "freqai_test_strat"}) freqai_conf.update({"strategy": "freqai_test_strat"})
freqai_conf['freqai']['feature_parameters'].update({"principal_component_analysis": pca}) freqai_conf['freqai']['feature_parameters'].update({"principal_component_analysis": pca})
freqai_conf['freqai']['feature_parameters'].update({"use_DBSCAN_to_remove_outliers": dbscan}) freqai_conf['freqai']['feature_parameters'].update({"use_DBSCAN_to_remove_outliers": dbscan})
freqai_conf.update({"reduce_df_footprint": float32})
strategy = get_patched_freqai_strategy(mocker, freqai_conf) strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf) exchange = get_patched_exchange(mocker, freqai_conf)