fix possible memory leak associated with Catboost Pool object

This commit is contained in:
robcaulk 2022-07-26 17:29:29 +02:00
parent 3f149c4067
commit 324e54c015
4 changed files with 11 additions and 8 deletions

View File

@ -120,7 +120,8 @@ class IFreqaiModel(ABC):
dk = self.start_backtesting(dataframe, metadata, self.dk)
dataframe = dk.remove_features_from_df(dk.return_dataframe)
return self.return_values(dataframe, dk)
del dk
return self.return_values(dataframe)
@threaded
def start_scanning(self, strategy: IStrategy) -> None:
@ -560,12 +561,11 @@ class IFreqaiModel(ABC):
"""
@abstractmethod
def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
def return_values(self, dataframe: DataFrame) -> DataFrame:
"""
User defines the dataframe to be returned to strategy here.
:param dataframe: DataFrame = the full dataframe for the current prediction (live)
or --timerange (backtesting)
:param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
:return: dataframe: DataFrame = dataframe filled with user defined data
"""

View File

@ -18,7 +18,7 @@ class BaseRegressionModel(IFreqaiModel):
such as prediction_models/CatboostPredictionModel.py for guidance.
"""
def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
def return_values(self, dataframe: DataFrame) -> DataFrame:
"""
User uses this function to add any additional return values to the dataframe.
e.g.

View File

@ -16,7 +16,7 @@ class BaseTensorFlowModel(IFreqaiModel):
User *must* inherit from this class and set fit() and predict().
"""
def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
def return_values(self, dataframe: DataFrame) -> DataFrame:
"""
User uses this function to add any additional return values to the dataframe.
e.g.

View File

@ -1,6 +1,6 @@
import logging
from typing import Any, Dict
import gc
from catboost import CatBoostRegressor, Pool
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
@ -42,8 +42,11 @@ class CatboostPredictionModel(BaseRegressionModel):
**self.model_training_parameters,
)
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
test_data = None
model.fit(X=train_data, eval_set=test_data)
# some evidence that catboost pools have memory leaks:
# https://github.com/catboost/catboost/issues/1835
del train_data, test_data
gc.collect()
return model