Fix typing issue, avoid using .get() when unnecessary, convert to fstrings

This commit is contained in:
robcaulk 2022-07-29 08:12:50 +02:00
parent efbd83c56d
commit c84d54b35e
5 changed files with 57 additions and 57 deletions

View File

@ -11,7 +11,7 @@ import numpy as np
import pandas as pd
from joblib import dump, load
from joblib.externals import cloudpickle
from numpy.typing import ArrayLike
from numpy.typing import ArrayLike, NDArray
from pandas import DataFrame
from freqtrade.configuration import TimeRange
@ -233,12 +233,13 @@ class FreqaiDataDrawer:
mrv_df[f"{label}_mean"] = dk.data["labels_mean"][label]
mrv_df[f"{label}_std"] = dk.data["labels_std"][label]
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
mrv_df["DI_values"] = dk.DI_values
mrv_df["do_predict"] = do_preds
def append_model_predictions(self, pair: str, predictions: DataFrame, do_preds: ArrayLike,
def append_model_predictions(self, pair: str, predictions: DataFrame,
do_preds: NDArray[np.int_],
dk: FreqaiDataKitchen, len_df: int) -> None:
# strat seems to feed us variable sized dataframes - and since we are trying to build our
@ -266,10 +267,10 @@ class FreqaiDataDrawer:
df[label].iloc[-1] = predictions[label].iloc[-1]
df[f"{label}_mean"].iloc[-1] = dk.data["labels_mean"][label]
df[f"{label}_std"].iloc[-1] = dk.data["labels_std"][label]
# df['prediction'].iloc[-1] = predictions[-1]
df["do_predict"].iloc[-1] = do_preds[-1]
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
df["DI_values"].iloc[-1] = dk.DI_values[-1]
# append the new predictions to persistent storage
@ -309,7 +310,7 @@ class FreqaiDataDrawer:
# dataframe['prediction'] = 0
dataframe["do_predict"] = 0
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
dataframe["DI_value"] = 0
dk.return_dataframe = dataframe
@ -379,24 +380,24 @@ class FreqaiDataDrawer:
model.save(save_path / f"{dk.model_filename}_model.h5")
if dk.svm_model is not None:
dump(dk.svm_model, save_path / str(dk.model_filename + "_svm_model.joblib"))
dump(dk.svm_model, save_path / f"{dk.model_filename}_svm_model.joblib")
dk.data["data_path"] = str(dk.data_path)
dk.data["model_filename"] = str(dk.model_filename)
dk.data["training_features_list"] = list(dk.data_dictionary["train_features"].columns)
dk.data["label_list"] = dk.label_list
# store the metadata
with open(save_path / str(dk.model_filename + "_metadata.json"), "w") as fp:
with open(save_path / f"{dk.model_filename}_metadata.json", "w") as fp:
json.dump(dk.data, fp, default=dk.np_encoder)
# save the train data to file so we can check preds for area of applicability later
dk.data_dictionary["train_features"].to_pickle(
save_path / str(dk.model_filename + "_trained_df.pkl")
save_path / f"{dk.model_filename}_trained_df.pkl"
)
if self.freqai_info.get("feature_parameters", {}).get("principal_component_analysis"):
if self.freqai_info["feature_parameters"].get("principal_component_analysis"):
cloudpickle.dump(
dk.pca, open(dk.data_path / str(dk.model_filename + "_pca_object.pkl"), "wb")
dk.pca, open(dk.data_path / f"{dk.model_filename}_pca_object.pkl", "wb")
)
# if self.live:
@ -429,27 +430,27 @@ class FreqaiDataDrawer:
/ dk.data_path.parts[-1]
)
with open(dk.data_path / str(dk.model_filename + "_metadata.json"), "r") as fp:
with open(dk.data_path / f"{dk.model_filename}_metadata.json", "r") as fp:
dk.data = json.load(fp)
dk.training_features_list = dk.data["training_features_list"]
dk.label_list = dk.data["label_list"]
dk.data_dictionary["train_features"] = pd.read_pickle(
dk.data_path / str(dk.model_filename + "_trained_df.pkl")
dk.data_path / f"{dk.model_filename}_trained_df.pkl"
)
# try to access model in memory instead of loading object from disk to save time
if dk.live and dk.model_filename in self.model_dictionary:
model = self.model_dictionary[dk.model_filename]
elif not dk.keras:
model = load(dk.data_path / str(dk.model_filename + "_model.joblib"))
model = load(dk.data_path / f"{dk.model_filename}_model.joblib")
else:
from tensorflow import keras
model = keras.models.load_model(dk.data_path / str(dk.model_filename + "_model.h5"))
model = keras.models.load_model(dk.data_path / f"{dk.model_filename}_model.h5")
if Path(dk.data_path / str(dk.model_filename + "_svm_model.joblib")).resolve().exists():
dk.svm_model = load(dk.data_path / str(dk.model_filename + "_svm_model.joblib"))
if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file():
dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib")
if not model:
raise OperationalException(
@ -458,7 +459,7 @@ class FreqaiDataDrawer:
if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
dk.pca = cloudpickle.load(
open(dk.data_path / str(dk.model_filename + "_pca_object.pkl"), "rb")
open(dk.data_path / f"{dk.model_filename}_pca_object.pkl", "rb")
)
return model
@ -471,7 +472,7 @@ class FreqaiDataDrawer:
:params:
dataframe: DataFrame = strategy provided dataframe
"""
feat_params = self.freqai_info.get("feature_parameters", {})
feat_params = self.freqai_info["feature_parameters"]
with self.history_lock:
history_data = self.historic_data
@ -524,7 +525,7 @@ class FreqaiDataDrawer:
for pair in dk.all_pairs:
if pair not in history_data:
history_data[pair] = {}
for tf in self.freqai_info.get("feature_parameters", {}).get("include_timeframes"):
for tf in self.freqai_info["feature_parameters"].get("include_timeframes"):
history_data[pair][tf] = load_pair_history(
datadir=self.config["datadir"],
timeframe=tf,
@ -550,11 +551,11 @@ class FreqaiDataDrawer:
corr_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {}
historic_data = self.historic_data
pairs = self.freqai_info.get("feature_parameters", {}).get(
pairs = self.freqai_info["feature_parameters"].get(
"include_corr_pairlist", []
)
for tf in self.freqai_info.get("feature_parameters", {}).get("include_timeframes"):
for tf in self.freqai_info["feature_parameters"].get("include_timeframes"):
base_dataframes[tf] = dk.slice_dataframe(timerange, historic_data[pair][tf])
if pairs:
for p in pairs:

View File

@ -116,7 +116,7 @@ class FreqaiDataKitchen:
:filtered_dataframe: cleaned dataframe ready to be split.
:labels: cleaned labels ready to be split.
"""
feat_dict = self.freqai_config.get("feature_parameters", {})
feat_dict = self.freqai_config["feature_parameters"]
weights: npt.ArrayLike
if feat_dict.get("weight_factor", 0) > 0:
@ -515,7 +515,9 @@ class FreqaiDataKitchen:
return
if predict:
assert self.svm_model, "No svm model available for outlier removal"
if not self.svm_model:
logger.warning("No svm model available for outlier removal")
return
y_pred = self.svm_model.predict(self.data_dictionary["prediction_features"])
do_predict = np.where(y_pred == -1, 0, y_pred)
@ -528,7 +530,7 @@ class FreqaiDataKitchen:
else:
# use SGDOneClassSVM to increase speed?
nu = self.freqai_config.get("feature_parameters", {}).get("svm_nu", 0.2)
nu = self.freqai_config["feature_parameters"].get("svm_nu", 0.2)
self.svm_model = linear_model.SGDOneClassSVM(nu=nu).fit(
self.data_dictionary["train_features"]
)
@ -551,7 +553,7 @@ class FreqaiDataKitchen:
)
# same for test data
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
if self.freqai_config['data_split_parameters'].get('test_size', 0.1) != 0:
y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
dropped_points = np.where(y_pred == -1, 0, y_pred)
self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
@ -605,7 +607,7 @@ class FreqaiDataKitchen:
self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"]
do_predict = np.where(
self.DI_values < self.freqai_config.get("feature_parameters", {}).get("DI_threshold"),
self.DI_values < self.freqai_config["feature_parameters"]["DI_threshold"],
1,
0,
)
@ -640,7 +642,7 @@ class FreqaiDataKitchen:
self.append_df[f"{label}_std"] = self.data["labels_std"][label]
self.append_df["do_predict"] = do_predict
if self.freqai_config.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
self.append_df["DI_values"] = self.DI_values
if self.full_df.empty:
@ -701,7 +703,7 @@ class FreqaiDataKitchen:
full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
self.full_path = Path(
self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier"))
self.config["user_data_dir"] / "models" / f"{self.freqai_config['identifier']}"
)
config_path = Path(self.config["config_files"][0])
@ -741,10 +743,10 @@ class FreqaiDataKitchen:
data_load_timerange = TimeRange()
# find the max indicator length required
max_timeframe_chars = self.freqai_config.get("feature_parameters", {}).get(
max_timeframe_chars = self.freqai_config["feature_parameters"].get(
"include_timeframes"
)[-1]
max_period = self.freqai_config.get("feature_parameters", {}).get(
max_period = self.freqai_config["feature_parameters"].get(
"indicator_max_period_candles", 50
)
additional_seconds = 0
@ -832,7 +834,7 @@ class FreqaiDataKitchen:
refresh_backtest_ohlcv_data(
exchange,
pairs=self.all_pairs,
timeframes=self.freqai_config.get("feature_parameters", {}).get("include_timeframes"),
timeframes=self.freqai_config["feature_parameters"].get("include_timeframes"),
datadir=self.config["datadir"],
timerange=timerange,
new_pairs_days=new_pairs_days,
@ -845,7 +847,7 @@ class FreqaiDataKitchen:
def set_all_pairs(self) -> None:
self.all_pairs = copy.deepcopy(
self.freqai_config.get("feature_parameters", {}).get("include_corr_pairlist", [])
self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
)
for pair in self.config.get("exchange", "").get("pair_whitelist"):
if pair not in self.all_pairs:
@ -876,8 +878,8 @@ class FreqaiDataKitchen:
# for prediction dataframe creation, we let dataprovider handle everything in the strategy
# so we create empty dictionaries, which allows us to pass None to
# `populate_any_indicators()`. Signaling we want the dp to give us the live dataframe.
tfs = self.freqai_config.get("feature_parameters", {}).get("include_timeframes")
pairs = self.freqai_config.get("feature_parameters", {}).get("include_corr_pairlist", [])
tfs = self.freqai_config["feature_parameters"].get("include_timeframes")
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
if not prediction_dataframe.empty:
dataframe = prediction_dataframe.copy()
for tf in tfs:

View File

@ -12,7 +12,7 @@ from typing import Any, Dict, Tuple
import numpy as np
import pandas as pd
from numpy.typing import ArrayLike
from numpy.typing import NDArray
from pandas import DataFrame
from freqtrade.configuration import TimeRange
@ -204,14 +204,9 @@ class IFreqaiModel(ABC):
dk.data_path = Path(
dk.full_path
/ str(
"sub-train"
+ "-"
+ metadata["pair"].split("/")[0]
+ "_"
+ str(int(trained_timestamp.stopts))
/
f"sub-train-{metadata['pair'].split('/')[0]}_{int(trained_timestamp.stopts)}"
)
)
if not self.model_exists(
metadata["pair"], dk, trained_timestamp=int(trained_timestamp.stopts)
):
@ -331,7 +326,8 @@ class IFreqaiModel(ABC):
return
elif self.dk.check_if_model_expired(trained_timestamp):
pred_df = DataFrame(np.zeros((2, len(dk.label_list))), columns=dk.label_list)
do_preds, dk.DI_values = np.ones(2) * 2, np.zeros(2)
do_preds = np.ones(2, dtype=np.int_) * 2
dk.DI_values = np.zeros(2)
logger.warning(
f"Model expired for {pair}, returning null values to strategy. Strategy "
"construction should take care to consider this event with "
@ -379,15 +375,15 @@ class IFreqaiModel(ABC):
example of how outlier data points are dropped from the dataframe used for training.
"""
if self.freqai_info.get("feature_parameters", {}).get(
if self.freqai_info["feature_parameters"].get(
"principal_component_analysis", False
):
dk.principal_component_analysis()
if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers", False):
if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False):
dk.use_SVM_to_remove_outliers(predict=False)
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0):
if self.freqai_info["feature_parameters"].get("DI_threshold", 0):
dk.data["avg_mean_dist"] = dk.compute_distances()
def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None:
@ -401,15 +397,15 @@ class IFreqaiModel(ABC):
of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
for buy signals.
"""
if self.freqai_info.get("feature_parameters", {}).get(
if self.freqai_info["feature_parameters"].get(
"principal_component_analysis", False
):
dk.pca_transform(dataframe)
if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers", False):
if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False):
dk.use_SVM_to_remove_outliers(predict=True)
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0):
if self.freqai_info["feature_parameters"].get("DI_threshold", 0):
dk.check_if_pred_in_training_spaces()
def model_exists(
@ -430,9 +426,9 @@ class IFreqaiModel(ABC):
coin, _ = pair.split("/")
if not self.live:
dk.model_filename = model_filename = "cb_" + coin.lower() + "_" + str(trained_timestamp)
dk.model_filename = model_filename = f"cb_{coin.lower()}_{trained_timestamp}"
path_to_modelfile = Path(dk.data_path / str(model_filename + "_model.joblib"))
path_to_modelfile = Path(dk.data_path / f"{model_filename}_model.joblib")
file_exists = path_to_modelfile.is_file()
if file_exists and not scanning:
logger.info("Found model at %s", dk.data_path / dk.model_filename)
@ -442,7 +438,7 @@ class IFreqaiModel(ABC):
def set_full_path(self) -> None:
self.full_path = Path(
self.config["user_data_dir"] / "models" / str(self.freqai_info.get("identifier"))
self.config["user_data_dir"] / "models" / f"{self.freqai_info['identifier']}"
)
self.full_path.mkdir(parents=True, exist_ok=True)
shutil.copy(
@ -550,7 +546,7 @@ class IFreqaiModel(ABC):
@abstractmethod
def predict(
self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True
) -> Tuple[DataFrame, ArrayLike]:
) -> Tuple[DataFrame, NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param unfiltered_dataframe: Full dataframe for the current backtest period.

View File

@ -3,7 +3,7 @@ from typing import Any, Tuple
import numpy.typing as npt
from pandas import DataFrame
import numpy as np
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.freqai_interface import IFreqaiModel
@ -85,7 +85,7 @@ class BaseRegressionModel(IFreqaiModel):
def predict(
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
) -> Tuple[DataFrame, npt.ArrayLike]:
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.

View File

@ -1,6 +1,7 @@
import gc
import logging
from typing import Any, Dict
import gc
from catboost import CatBoostRegressor, Pool
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel