mirror of
https://github.com/freqtrade/freqtrade.git
synced 2024-11-10 10:21:59 +00:00
improve DBSCAN performance for subsequent trainings
This commit is contained in:
parent
fe1b8515a8
commit
51a6b4289f
|
@ -76,6 +76,7 @@ class FreqaiDataDrawer:
|
||||||
self.load_historic_predictions_from_disk()
|
self.load_historic_predictions_from_disk()
|
||||||
self.training_queue: Dict[str, int] = {}
|
self.training_queue: Dict[str, int] = {}
|
||||||
self.history_lock = threading.Lock()
|
self.history_lock = threading.Lock()
|
||||||
|
self.old_DBSCAN_eps: Dict[str, float] = {}
|
||||||
|
|
||||||
def load_drawer_from_disk(self):
|
def load_drawer_from_disk(self):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -582,7 +582,7 @@ class FreqaiDataKitchen:
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
def use_DBSCAN_to_remove_outliers(self, predict: bool) -> None:
|
def use_DBSCAN_to_remove_outliers(self, predict: bool, eps=None) -> None:
|
||||||
"""
|
"""
|
||||||
Use DBSCAN to cluster training data and remove "noisy" data (read outliers).
|
Use DBSCAN to cluster training data and remove "noisy" data (read outliers).
|
||||||
User controls this via the config param `DBSCAN_outlier_pct` which indicates the
|
User controls this via the config param `DBSCAN_outlier_pct` which indicates the
|
||||||
|
@ -615,10 +615,10 @@ class FreqaiDataKitchen:
|
||||||
|
|
||||||
else:
|
else:
|
||||||
outlier_target = self.freqai_config['feature_parameters'].get('DBSCAN_outlier_pct')
|
outlier_target = self.freqai_config['feature_parameters'].get('DBSCAN_outlier_pct')
|
||||||
if 'DBSCAN_eps' in self.data:
|
if eps:
|
||||||
eps = self.data['DBSCAN_eps']
|
epsilon = eps
|
||||||
else:
|
else:
|
||||||
eps = 10
|
epsilon = 10
|
||||||
logger.info('DBSCAN starting from high value. This should be faster next train.')
|
logger.info('DBSCAN starting from high value. This should be faster next train.')
|
||||||
|
|
||||||
error = 1.
|
error = 1.
|
||||||
|
@ -628,7 +628,7 @@ class FreqaiDataKitchen:
|
||||||
|
|
||||||
# find optimal value for epsilon using an iterative approach:
|
# find optimal value for epsilon using an iterative approach:
|
||||||
while abs(np.sqrt(error)) > 0.1:
|
while abs(np.sqrt(error)) > 0.1:
|
||||||
clustering = DBSCAN(eps=eps, min_samples=MinPts,
|
clustering = DBSCAN(eps=epsilon, min_samples=MinPts,
|
||||||
n_jobs=int(self.thread_count / 2)).fit(
|
n_jobs=int(self.thread_count / 2)).fit(
|
||||||
self.data_dictionary['train_features']
|
self.data_dictionary['train_features']
|
||||||
)
|
)
|
||||||
|
@ -637,13 +637,14 @@ class FreqaiDataKitchen:
|
||||||
multiplier = (outlier_pct - outlier_target) if outlier_pct > 0 else 1 * \
|
multiplier = (outlier_pct - outlier_target) if outlier_pct > 0 else 1 * \
|
||||||
np.sign(outlier_pct - outlier_target)
|
np.sign(outlier_pct - outlier_target)
|
||||||
multiplier = 1 + error * multiplier
|
multiplier = 1 + error * multiplier
|
||||||
eps = multiplier * eps
|
epsilon = multiplier * epsilon
|
||||||
logger.info(
|
logger.info(
|
||||||
f'DBSCAN error {error:.2f} for eps {eps:.2f} and outliet pct {outlier_pct:.2f}')
|
f'DBSCAN error {error:.2f} for eps {epsilon:.2f}'
|
||||||
|
f' and outlier pct {outlier_pct:.2f}')
|
||||||
|
|
||||||
logger.info(f'DBSCAN found eps of {eps}.')
|
logger.info(f'DBSCAN found eps of {epsilon}.')
|
||||||
|
|
||||||
self.data['DBSCAN_eps'] = eps
|
self.data['DBSCAN_eps'] = epsilon
|
||||||
self.data['DBSCAN_min_samples'] = MinPts
|
self.data['DBSCAN_min_samples'] = MinPts
|
||||||
dropped_points = np.where(clustering.labels_ == -1, 1, 0)
|
dropped_points = np.where(clustering.labels_ == -1, 1, 0)
|
||||||
|
|
||||||
|
|
|
@ -385,7 +385,12 @@ class IFreqaiModel(ABC):
|
||||||
dk.data["avg_mean_dist"] = dk.compute_distances()
|
dk.data["avg_mean_dist"] = dk.compute_distances()
|
||||||
|
|
||||||
if self.freqai_info["feature_parameters"].get("DBSCAN_outlier_pct", 0):
|
if self.freqai_info["feature_parameters"].get("DBSCAN_outlier_pct", 0):
|
||||||
dk.use_DBSCAN_to_remove_outliers(predict=False)
|
if dk.pair in self.dd.old_DBSCAN_eps:
|
||||||
|
eps = self.dd.old_DBSCAN_eps[dk.pair]
|
||||||
|
else:
|
||||||
|
eps = None
|
||||||
|
dk.use_DBSCAN_to_remove_outliers(predict=False, eps=eps)
|
||||||
|
self.dd.old_DBSCAN_eps[dk.pair] = dk.data['DBSCAN_eps']
|
||||||
|
|
||||||
def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None:
|
def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None:
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in New Issue
Block a user