mirror of
https://github.com/freqtrade/freqtrade.git
synced 2024-11-10 10:21:59 +00:00
black formatting on freqai files
This commit is contained in:
parent
106131ff0f
commit
ffb39a5029
|
@ -1,4 +1,3 @@
|
|||
|
||||
import collections
|
||||
import json
|
||||
import logging
|
||||
|
@ -27,10 +26,11 @@ class FreqaiDataDrawer:
|
|||
This object remains persistent throughout live/dry, unlike FreqaiDataKitchen, which is
|
||||
reinstantiated for each coin.
|
||||
"""
|
||||
|
||||
def __init__(self, full_path: Path, config: dict, follow_mode: bool = False):
|
||||
|
||||
self.config = config
|
||||
self.freqai_info = config.get('freqai', {})
|
||||
self.freqai_info = config.get("freqai", {})
|
||||
# dictionary holding all pair metadata necessary to load in from disk
|
||||
self.pair_dict: Dict[str, Any] = {}
|
||||
# dictionary holding all actively inferenced models in memory given a model filename
|
||||
|
@ -38,7 +38,6 @@ class FreqaiDataDrawer:
|
|||
self.model_return_values: Dict[str, Any] = {}
|
||||
self.pair_data_dict: Dict[str, Any] = {}
|
||||
self.historic_data: Dict[str, Any] = {}
|
||||
# self.populated_historic_data: Dict[str, Any] = {} ?
|
||||
self.follower_dict: Dict[str, Any] = {}
|
||||
self.full_path = full_path
|
||||
self.follow_mode = follow_mode
|
||||
|
@ -47,7 +46,6 @@ class FreqaiDataDrawer:
|
|||
self.load_drawer_from_disk()
|
||||
self.training_queue: Dict[str, int] = {}
|
||||
self.history_lock = threading.Lock()
|
||||
# self.create_training_queue(pair_whitelist)
|
||||
|
||||
def load_drawer_from_disk(self):
|
||||
"""
|
||||
|
@ -56,15 +54,17 @@ class FreqaiDataDrawer:
|
|||
:returns:
|
||||
exists: bool = whether or not the drawer was located
|
||||
"""
|
||||
exists = Path(self.full_path / str('pair_dictionary.json')).resolve().exists()
|
||||
exists = Path(self.full_path / str("pair_dictionary.json")).resolve().exists()
|
||||
if exists:
|
||||
with open(self.full_path / str('pair_dictionary.json'), "r") as fp:
|
||||
with open(self.full_path / str("pair_dictionary.json"), "r") as fp:
|
||||
self.pair_dict = json.load(fp)
|
||||
elif not self.follow_mode:
|
||||
logger.info("Could not find existing datadrawer, starting from scratch")
|
||||
else:
|
||||
logger.warning(f'Follower could not find pair_dictionary at {self.full_path} '
|
||||
'sending null values back to strategy')
|
||||
logger.warning(
|
||||
f"Follower could not find pair_dictionary at {self.full_path} "
|
||||
"sending null values back to strategy"
|
||||
)
|
||||
|
||||
return exists
|
||||
|
||||
|
@ -72,36 +72,41 @@ class FreqaiDataDrawer:
|
|||
"""
|
||||
Save data drawer full of all pair model metadata in present model folder.
|
||||
"""
|
||||
with open(self.full_path / str('pair_dictionary.json'), "w") as fp:
|
||||
with open(self.full_path / str("pair_dictionary.json"), "w") as fp:
|
||||
json.dump(self.pair_dict, fp, default=self.np_encoder)
|
||||
|
||||
def save_follower_dict_to_disk(self):
|
||||
"""
|
||||
Save follower dictionary to disk (used by strategy for persistent prediction targets)
|
||||
"""
|
||||
follower_name = self.config.get('bot_name', 'follower1')
|
||||
with open(self.full_path / str('follower_dictionary-' +
|
||||
follower_name + '.json'), "w") as fp:
|
||||
follower_name = self.config.get("bot_name", "follower1")
|
||||
with open(
|
||||
self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w"
|
||||
) as fp:
|
||||
json.dump(self.follower_dict, fp, default=self.np_encoder)
|
||||
|
||||
def create_follower_dict(self):
|
||||
"""
|
||||
Create or dictionary for each follower to maintain unique persistent prediction targets
|
||||
"""
|
||||
follower_name = self.config.get('bot_name', 'follower1')
|
||||
whitelist_pairs = self.config.get('exchange', {}).get('pair_whitelist')
|
||||
follower_name = self.config.get("bot_name", "follower1")
|
||||
whitelist_pairs = self.config.get("exchange", {}).get("pair_whitelist")
|
||||
|
||||
exists = Path(self.full_path / str('follower_dictionary-' +
|
||||
follower_name + '.json')).resolve().exists()
|
||||
exists = (
|
||||
Path(self.full_path / str("follower_dictionary-" + follower_name + ".json"))
|
||||
.resolve()
|
||||
.exists()
|
||||
)
|
||||
|
||||
if exists:
|
||||
logger.info('Found an existing follower dictionary')
|
||||
logger.info("Found an existing follower dictionary")
|
||||
|
||||
for pair in whitelist_pairs:
|
||||
self.follower_dict[pair] = {}
|
||||
|
||||
with open(self.full_path / str('follower_dictionary-' +
|
||||
follower_name + '.json'), "w") as fp:
|
||||
with open(
|
||||
self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w"
|
||||
) as fp:
|
||||
json.dump(self.follower_dict, fp, default=self.np_encoder)
|
||||
|
||||
def np_encoder(self, object):
|
||||
|
@ -122,46 +127,48 @@ class FreqaiDataDrawer:
|
|||
return_null_array: bool = Follower could not find pair metadata
|
||||
"""
|
||||
pair_in_dict = self.pair_dict.get(pair)
|
||||
data_path_set = self.pair_dict.get(pair, {}).get('data_path', None)
|
||||
data_path_set = self.pair_dict.get(pair, {}).get("data_path", None)
|
||||
return_null_array = False
|
||||
|
||||
if pair_in_dict:
|
||||
model_filename = self.pair_dict[pair]['model_filename']
|
||||
trained_timestamp = self.pair_dict[pair]['trained_timestamp']
|
||||
coin_first = self.pair_dict[pair]['first']
|
||||
model_filename = self.pair_dict[pair]["model_filename"]
|
||||
trained_timestamp = self.pair_dict[pair]["trained_timestamp"]
|
||||
coin_first = self.pair_dict[pair]["first"]
|
||||
elif not self.follow_mode:
|
||||
self.pair_dict[pair] = {}
|
||||
model_filename = self.pair_dict[pair]['model_filename'] = ''
|
||||
coin_first = self.pair_dict[pair]['first'] = True
|
||||
trained_timestamp = self.pair_dict[pair]['trained_timestamp'] = 0
|
||||
self.pair_dict[pair]['priority'] = len(self.pair_dict)
|
||||
model_filename = self.pair_dict[pair]["model_filename"] = ""
|
||||
coin_first = self.pair_dict[pair]["first"] = True
|
||||
trained_timestamp = self.pair_dict[pair]["trained_timestamp"] = 0
|
||||
self.pair_dict[pair]["priority"] = len(self.pair_dict)
|
||||
|
||||
if not data_path_set and self.follow_mode:
|
||||
logger.warning(f'Follower could not find current pair {pair} in '
|
||||
f'pair_dictionary at path {self.full_path}, sending null values '
|
||||
'back to strategy.')
|
||||
logger.warning(
|
||||
f"Follower could not find current pair {pair} in "
|
||||
f"pair_dictionary at path {self.full_path}, sending null values "
|
||||
"back to strategy."
|
||||
)
|
||||
return_null_array = True
|
||||
|
||||
return model_filename, trained_timestamp, coin_first, return_null_array
|
||||
|
||||
def set_pair_dict_info(self, metadata: dict) -> None:
|
||||
pair_in_dict = self.pair_dict.get(metadata['pair'])
|
||||
pair_in_dict = self.pair_dict.get(metadata["pair"])
|
||||
if pair_in_dict:
|
||||
return
|
||||
else:
|
||||
self.pair_dict[metadata['pair']] = {}
|
||||
self.pair_dict[metadata['pair']]['model_filename'] = ''
|
||||
self.pair_dict[metadata['pair']]['first'] = True
|
||||
self.pair_dict[metadata['pair']]['trained_timestamp'] = 0
|
||||
self.pair_dict[metadata['pair']]['priority'] = len(self.pair_dict)
|
||||
self.pair_dict[metadata["pair"]] = {}
|
||||
self.pair_dict[metadata["pair"]]["model_filename"] = ""
|
||||
self.pair_dict[metadata["pair"]]["first"] = True
|
||||
self.pair_dict[metadata["pair"]]["trained_timestamp"] = 0
|
||||
self.pair_dict[metadata["pair"]]["priority"] = len(self.pair_dict)
|
||||
return
|
||||
|
||||
def pair_to_end_of_training_queue(self, pair: str) -> None:
|
||||
# march all pairs up in the queue
|
||||
for p in self.pair_dict:
|
||||
self.pair_dict[p]['priority'] -= 1
|
||||
self.pair_dict[p]["priority"] -= 1
|
||||
# send pair to end of queue
|
||||
self.pair_dict[pair]['priority'] = len(self.pair_dict)
|
||||
self.pair_dict[pair]["priority"] = len(self.pair_dict)
|
||||
|
||||
def set_initial_return_values(self, pair: str, dk, pred_df, do_preds) -> None:
|
||||
"""
|
||||
|
@ -172,16 +179,15 @@ class FreqaiDataDrawer:
|
|||
self.model_return_values[pair] = pd.DataFrame()
|
||||
for label in dk.label_list:
|
||||
self.model_return_values[pair][label] = pred_df[label]
|
||||
self.model_return_values[pair][f'{label}_mean'] = dk.data['labels_mean'][label]
|
||||
self.model_return_values[pair][f'{label}_std'] = dk.data['labels_std'][label]
|
||||
self.model_return_values[pair][f"{label}_mean"] = dk.data["labels_mean"][label]
|
||||
self.model_return_values[pair][f"{label}_std"] = dk.data["labels_std"][label]
|
||||
|
||||
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
|
||||
self.model_return_values[pair]['DI_values'] = dk.DI_values
|
||||
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
|
||||
self.model_return_values[pair]["DI_values"] = dk.DI_values
|
||||
|
||||
self.model_return_values[pair]['do_predict'] = do_preds
|
||||
self.model_return_values[pair]["do_predict"] = do_preds
|
||||
|
||||
def append_model_predictions(self, pair: str, predictions, do_preds,
|
||||
dk, len_df) -> None:
|
||||
def append_model_predictions(self, pair: str, predictions, do_preds, dk, len_df) -> None:
|
||||
|
||||
# strat seems to feed us variable sized dataframes - and since we are trying to build our
|
||||
# own return array in the same shape, we need to figure out how the size has changed
|
||||
|
@ -198,17 +204,18 @@ class FreqaiDataDrawer:
|
|||
|
||||
for label in dk.label_list:
|
||||
df[label].iloc[-1] = predictions[label].iloc[-1]
|
||||
df[f"{label}_mean"].iloc[-1] = dk.data['labels_mean'][label]
|
||||
df[f"{label}_std"].iloc[-1] = dk.data['labels_std'][label]
|
||||
df[f"{label}_mean"].iloc[-1] = dk.data["labels_mean"][label]
|
||||
df[f"{label}_std"].iloc[-1] = dk.data["labels_std"][label]
|
||||
# df['prediction'].iloc[-1] = predictions[-1]
|
||||
df['do_predict'].iloc[-1] = do_preds[-1]
|
||||
df["do_predict"].iloc[-1] = do_preds[-1]
|
||||
|
||||
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
|
||||
df['DI_values'].iloc[-1] = dk.DI_values[-1]
|
||||
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
|
||||
df["DI_values"].iloc[-1] = dk.DI_values[-1]
|
||||
|
||||
if length_difference < 0:
|
||||
prepend_df = pd.DataFrame(np.zeros((abs(length_difference) - 1, len(df.columns))),
|
||||
columns=df.columns)
|
||||
prepend_df = pd.DataFrame(
|
||||
np.zeros((abs(length_difference) - 1, len(df.columns))), columns=df.columns
|
||||
)
|
||||
df = pd.concat([prepend_df, df], axis=0)
|
||||
|
||||
def attach_return_values_to_return_dataframe(self, pair: str, dataframe) -> DataFrame:
|
||||
|
@ -220,7 +227,7 @@ class FreqaiDataDrawer:
|
|||
dataframe: DataFrame = strat dataframe with return values attached
|
||||
"""
|
||||
df = self.model_return_values[pair]
|
||||
to_keep = [col for col in dataframe.columns if not col.startswith('&')]
|
||||
to_keep = [col for col in dataframe.columns if not col.startswith("&")]
|
||||
dataframe = pd.concat([dataframe[to_keep], df], axis=1)
|
||||
return dataframe
|
||||
|
||||
|
@ -237,10 +244,10 @@ class FreqaiDataDrawer:
|
|||
dataframe[f"{label}_std"] = 0
|
||||
|
||||
# dataframe['prediction'] = 0
|
||||
dataframe['do_predict'] = 0
|
||||
dataframe["do_predict"] = 0
|
||||
|
||||
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
|
||||
dataframe['DI_value'] = 0
|
||||
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
|
||||
dataframe["DI_value"] = 0
|
||||
|
||||
dk.return_dataframe = dataframe
|
||||
|
||||
|
@ -261,29 +268,30 @@ class FreqaiDataDrawer:
|
|||
|
||||
if coin not in delete_dict:
|
||||
delete_dict[coin] = {}
|
||||
delete_dict[coin]['num_folders'] = 1
|
||||
delete_dict[coin]['timestamps'] = {int(timestamp): dir}
|
||||
delete_dict[coin]["num_folders"] = 1
|
||||
delete_dict[coin]["timestamps"] = {int(timestamp): dir}
|
||||
else:
|
||||
delete_dict[coin]['num_folders'] += 1
|
||||
delete_dict[coin]['timestamps'][int(timestamp)] = dir
|
||||
delete_dict[coin]["num_folders"] += 1
|
||||
delete_dict[coin]["timestamps"][int(timestamp)] = dir
|
||||
|
||||
for coin in delete_dict:
|
||||
if delete_dict[coin]['num_folders'] > 2:
|
||||
if delete_dict[coin]["num_folders"] > 2:
|
||||
sorted_dict = collections.OrderedDict(
|
||||
sorted(delete_dict[coin]['timestamps'].items()))
|
||||
sorted(delete_dict[coin]["timestamps"].items())
|
||||
)
|
||||
num_delete = len(sorted_dict) - 2
|
||||
deleted = 0
|
||||
for k, v in sorted_dict.items():
|
||||
if deleted >= num_delete:
|
||||
break
|
||||
logger.info(f'Freqai purging old model file {v}')
|
||||
logger.info(f"Freqai purging old model file {v}")
|
||||
shutil.rmtree(v)
|
||||
deleted += 1
|
||||
|
||||
def update_follower_metadata(self):
|
||||
# follower needs to load from disk to get any changes made by leader to pair_dict
|
||||
self.load_drawer_from_disk()
|
||||
if self.config.get('freqai', {}).get('purge_old_models', False):
|
||||
if self.config.get("freqai", {}).get("purge_old_models", False):
|
||||
self.purge_old_models()
|
||||
|
||||
# to be used if we want to send predictions directly to the follower instead of forcing
|
||||
|
|
|
@ -37,8 +37,13 @@ class FreqaiDataKitchen:
|
|||
author: Robert Caulk, rob.caulk@gmail.com
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any], data_drawer: FreqaiDataDrawer, live: bool = False,
|
||||
pair: str = ''):
|
||||
def __init__(
|
||||
self,
|
||||
config: Dict[str, Any],
|
||||
data_drawer: FreqaiDataDrawer,
|
||||
live: bool = False,
|
||||
pair: str = "",
|
||||
):
|
||||
self.data: Dict[Any, Any] = {}
|
||||
self.data_dictionary: Dict[Any, Any] = {}
|
||||
self.config = config
|
||||
|
@ -60,9 +65,9 @@ class FreqaiDataKitchen:
|
|||
self.svm_model: linear_model.SGDOneClassSVM = None
|
||||
self.set_all_pairs()
|
||||
if not self.live:
|
||||
self.full_timerange = self.create_fulltimerange(self.config["timerange"],
|
||||
self.freqai_config.get("train_period")
|
||||
)
|
||||
self.full_timerange = self.create_fulltimerange(
|
||||
self.config["timerange"], self.freqai_config.get("train_period")
|
||||
)
|
||||
|
||||
(self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
|
||||
self.full_timerange,
|
||||
|
@ -72,24 +77,28 @@ class FreqaiDataKitchen:
|
|||
# self.strat_dataframe: DataFrame = strat_dataframe
|
||||
self.dd = data_drawer
|
||||
|
||||
def set_paths(self, pair: str, trained_timestamp: int = None,) -> None:
|
||||
def set_paths(
|
||||
self,
|
||||
pair: str,
|
||||
trained_timestamp: int = None,
|
||||
) -> None:
|
||||
"""
|
||||
Set the paths to the data for the present coin/botloop
|
||||
:params:
|
||||
metadata: dict = strategy furnished pair metadata
|
||||
trained_timestamp: int = timestamp of most recent training
|
||||
"""
|
||||
self.full_path = Path(self.config['user_data_dir'] /
|
||||
"models" /
|
||||
str(self.freqai_config.get('identifier')))
|
||||
self.full_path = Path(
|
||||
self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier"))
|
||||
)
|
||||
|
||||
self.data_path = Path(self.full_path / str("sub-train" + "-" +
|
||||
pair.split("/")[0] +
|
||||
str(trained_timestamp)))
|
||||
self.data_path = Path(
|
||||
self.full_path / str("sub-train" + "-" + pair.split("/")[0] + str(trained_timestamp))
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
def save_data(self, model: Any, coin: str = '', keras_model=False, label=None) -> None:
|
||||
def save_data(self, model: Any, coin: str = "", keras_model=False, label=None) -> None:
|
||||
"""
|
||||
Saves all data associated with a model for a single sub-train time range
|
||||
:params:
|
||||
|
@ -114,7 +123,7 @@ class FreqaiDataKitchen:
|
|||
self.data["data_path"] = str(self.data_path)
|
||||
self.data["model_filename"] = str(self.model_filename)
|
||||
self.data["training_features_list"] = list(self.data_dictionary["train_features"].columns)
|
||||
self.data['label_list'] = self.label_list
|
||||
self.data["label_list"] = self.label_list
|
||||
# store the metadata
|
||||
with open(save_path / str(self.model_filename + "_metadata.json"), "w") as fp:
|
||||
json.dump(self.data, fp, default=self.np_encoder)
|
||||
|
@ -124,14 +133,15 @@ class FreqaiDataKitchen:
|
|||
save_path / str(self.model_filename + "_trained_df.pkl")
|
||||
)
|
||||
|
||||
if self.freqai_config.get('feature_parameters', {}).get('principal_component_analysis'):
|
||||
pk.dump(self.pca, open(self.data_path /
|
||||
str(self.model_filename + "_pca_object.pkl"), "wb"))
|
||||
if self.freqai_config.get("feature_parameters", {}).get("principal_component_analysis"):
|
||||
pk.dump(
|
||||
self.pca, open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "wb")
|
||||
)
|
||||
|
||||
# if self.live:
|
||||
self.dd.model_dictionary[self.model_filename] = model
|
||||
self.dd.pair_dict[coin]['model_filename'] = self.model_filename
|
||||
self.dd.pair_dict[coin]['data_path'] = str(self.data_path)
|
||||
self.dd.pair_dict[coin]["model_filename"] = self.model_filename
|
||||
self.dd.pair_dict[coin]["data_path"] = str(self.data_path)
|
||||
self.dd.save_drawer_to_disk()
|
||||
|
||||
# TODO add a helper function to let user save/load any data they are custom adding. We
|
||||
|
@ -149,29 +159,32 @@ class FreqaiDataKitchen:
|
|||
|
||||
return
|
||||
|
||||
def load_data(self, coin: str = '', keras_model=False) -> Any:
|
||||
def load_data(self, coin: str = "", keras_model=False) -> Any:
|
||||
"""
|
||||
loads all data required to make a prediction on a sub-train time range
|
||||
:returns:
|
||||
:model: User trained model which can be inferenced for new predictions
|
||||
"""
|
||||
|
||||
if not self.dd.pair_dict[coin]['model_filename']:
|
||||
if not self.dd.pair_dict[coin]["model_filename"]:
|
||||
return None
|
||||
|
||||
if self.live:
|
||||
self.model_filename = self.dd.pair_dict[coin]['model_filename']
|
||||
self.data_path = Path(self.dd.pair_dict[coin]['data_path'])
|
||||
if self.freqai_config.get('follow_mode', False):
|
||||
self.model_filename = self.dd.pair_dict[coin]["model_filename"]
|
||||
self.data_path = Path(self.dd.pair_dict[coin]["data_path"])
|
||||
if self.freqai_config.get("follow_mode", False):
|
||||
# follower can be on a different system which is rsynced to the leader:
|
||||
self.data_path = Path(self.config["user_data_dir"] /
|
||||
"models" / self.data_path.parts[-2] /
|
||||
self.data_path.parts[-1])
|
||||
self.data_path = Path(
|
||||
self.config["user_data_dir"]
|
||||
/ "models"
|
||||
/ self.data_path.parts[-2]
|
||||
/ self.data_path.parts[-1]
|
||||
)
|
||||
|
||||
with open(self.data_path / str(self.model_filename + "_metadata.json"), "r") as fp:
|
||||
self.data = json.load(fp)
|
||||
self.training_features_list = self.data["training_features_list"]
|
||||
self.label_list = self.data['label_list']
|
||||
self.label_list = self.data["label_list"]
|
||||
|
||||
self.data_dictionary["train_features"] = pd.read_pickle(
|
||||
self.data_path / str(self.model_filename + "_trained_df.pkl")
|
||||
|
@ -200,17 +213,16 @@ class FreqaiDataKitchen:
|
|||
model = load(self.data_path / str(self.model_filename + "_model.joblib"))
|
||||
else:
|
||||
from tensorflow import keras
|
||||
|
||||
model = keras.models.load_model(self.data_path / str(self.model_filename + "_model.h5"))
|
||||
|
||||
if Path(self.data_path / str(self.model_filename +
|
||||
"_svm_model.joblib")).resolve().exists():
|
||||
if Path(self.data_path / str(self.model_filename + "_svm_model.joblib")).resolve().exists():
|
||||
self.svm_model = load(self.data_path / str(self.model_filename + "_svm_model.joblib"))
|
||||
|
||||
if not model:
|
||||
raise OperationalException(
|
||||
f"Unable to load model, ensure model exists at "
|
||||
f"{self.data_path} "
|
||||
)
|
||||
f"Unable to load model, ensure model exists at " f"{self.data_path} "
|
||||
)
|
||||
|
||||
if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
|
||||
self.pca = pk.load(
|
||||
|
@ -257,7 +269,7 @@ class FreqaiDataKitchen:
|
|||
weights,
|
||||
stratify=stratification,
|
||||
# shuffle=False,
|
||||
**self.config["freqai"]["data_split_parameters"]
|
||||
**self.config["freqai"]["data_split_parameters"],
|
||||
)
|
||||
|
||||
return self.build_data_dictionary(
|
||||
|
@ -309,14 +321,14 @@ class FreqaiDataKitchen:
|
|||
(drop_index == 0) & (drop_index_labels == 0)
|
||||
] # assuming the labels depend entirely on the dataframe here.
|
||||
logger.info(
|
||||
f'dropped {len(unfiltered_dataframe) - len(filtered_dataframe)} training points'
|
||||
f' due to NaNs in populated dataset {len(unfiltered_dataframe)}.'
|
||||
f"dropped {len(unfiltered_dataframe) - len(filtered_dataframe)} training points"
|
||||
f" due to NaNs in populated dataset {len(unfiltered_dataframe)}."
|
||||
)
|
||||
if (1 - len(filtered_dataframe) / len(unfiltered_dataframe)) > 0.1 and self.live:
|
||||
logger.warning(
|
||||
f' {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100:.2f} percent'
|
||||
' of training data dropped due to NaNs, model may perform inconsistent'
|
||||
'with expectations'
|
||||
f" {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100:.2f} percent"
|
||||
" of training data dropped due to NaNs, model may perform inconsistent"
|
||||
"with expectations"
|
||||
)
|
||||
self.data["filter_drop_index_training"] = drop_index
|
||||
|
||||
|
@ -372,21 +384,27 @@ class FreqaiDataKitchen:
|
|||
# standardize the data by training stats
|
||||
train_max = data_dictionary["train_features"].max()
|
||||
train_min = data_dictionary["train_features"].min()
|
||||
data_dictionary["train_features"] = 2 * (
|
||||
data_dictionary["train_features"] - train_min
|
||||
) / (train_max - train_min) - 1
|
||||
data_dictionary["test_features"] = 2 * (
|
||||
data_dictionary["test_features"] - train_min
|
||||
) / (train_max - train_min) - 1
|
||||
data_dictionary["train_features"] = (
|
||||
2 * (data_dictionary["train_features"] - train_min) / (train_max - train_min) - 1
|
||||
)
|
||||
data_dictionary["test_features"] = (
|
||||
2 * (data_dictionary["test_features"] - train_min) / (train_max - train_min) - 1
|
||||
)
|
||||
|
||||
train_labels_max = data_dictionary["train_labels"].max()
|
||||
train_labels_min = data_dictionary["train_labels"].min()
|
||||
data_dictionary["train_labels"] = 2 * (
|
||||
data_dictionary["train_labels"] - train_labels_min
|
||||
) / (train_labels_max - train_labels_min) - 1
|
||||
data_dictionary["test_labels"] = 2 * (
|
||||
data_dictionary["test_labels"] - train_labels_min
|
||||
) / (train_labels_max - train_labels_min) - 1
|
||||
data_dictionary["train_labels"] = (
|
||||
2
|
||||
* (data_dictionary["train_labels"] - train_labels_min)
|
||||
/ (train_labels_max - train_labels_min)
|
||||
- 1
|
||||
)
|
||||
data_dictionary["test_labels"] = (
|
||||
2
|
||||
* (data_dictionary["test_labels"] - train_labels_min)
|
||||
/ (train_labels_max - train_labels_min)
|
||||
- 1
|
||||
)
|
||||
|
||||
for item in train_max.keys():
|
||||
self.data[item + "_max"] = train_max[item]
|
||||
|
@ -406,8 +424,12 @@ class FreqaiDataKitchen:
|
|||
"""
|
||||
|
||||
for item in df.keys():
|
||||
df[item] = 2 * (df[item] - self.data[item + "_min"]) / (self.data[item + "_max"] -
|
||||
self.data[item + '_min']) - 1
|
||||
df[item] = (
|
||||
2
|
||||
* (df[item] - self.data[item + "_min"])
|
||||
/ (self.data[item + "_max"] - self.data[item + "_min"])
|
||||
- 1
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
|
@ -429,8 +451,9 @@ class FreqaiDataKitchen:
|
|||
full_timerange = TimeRange.parse_timerange(tr)
|
||||
config_timerange = TimeRange.parse_timerange(self.config["timerange"])
|
||||
if config_timerange.stopts == 0:
|
||||
config_timerange.stopts = int(datetime.datetime.now(
|
||||
tz=datetime.timezone.utc).timestamp())
|
||||
config_timerange.stopts = int(
|
||||
datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
|
||||
)
|
||||
timerange_train = copy.deepcopy(full_timerange)
|
||||
timerange_backtest = copy.deepcopy(full_timerange)
|
||||
|
||||
|
@ -518,7 +541,7 @@ class FreqaiDataKitchen:
|
|||
|
||||
# keeping a copy of the non-transformed features so we can check for errors during
|
||||
# model load from disk
|
||||
self.data['training_features_list_raw'] = copy.deepcopy(self.training_features_list)
|
||||
self.data["training_features_list_raw"] = copy.deepcopy(self.training_features_list)
|
||||
self.training_features_list = self.data_dictionary["train_features"].columns
|
||||
|
||||
self.data_dictionary["test_features"] = pd.DataFrame(
|
||||
|
@ -530,7 +553,7 @@ class FreqaiDataKitchen:
|
|||
self.data["n_kept_components"] = n_keep_components
|
||||
self.pca = pca2
|
||||
|
||||
logger.info(f'PCA reduced total features from {n_components} to {n_keep_components}')
|
||||
logger.info(f"PCA reduced total features from {n_components} to {n_keep_components}")
|
||||
|
||||
if not self.data_path.is_dir():
|
||||
self.data_path.mkdir(parents=True, exist_ok=True)
|
||||
|
@ -557,10 +580,10 @@ class FreqaiDataKitchen:
|
|||
for prediction confidence in the Dissimilarity Index
|
||||
"""
|
||||
logger.info("computing average mean distance for all training points")
|
||||
tc = self.freqai_config.get('model_training_parameters', {}).get('thread_count', -1)
|
||||
tc = self.freqai_config.get("model_training_parameters", {}).get("thread_count", -1)
|
||||
pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=tc)
|
||||
avg_mean_dist = pairwise.mean(axis=1).mean()
|
||||
logger.info(f'avg_mean_dist {avg_mean_dist:.2f}')
|
||||
logger.info(f"avg_mean_dist {avg_mean_dist:.2f}")
|
||||
|
||||
return avg_mean_dist
|
||||
|
||||
|
@ -579,45 +602,49 @@ class FreqaiDataKitchen:
|
|||
|
||||
if (len(do_predict) - do_predict.sum()) > 0:
|
||||
logger.info(
|
||||
f'svm_remove_outliers() tossed {len(do_predict) - do_predict.sum()} predictions'
|
||||
f"svm_remove_outliers() tossed {len(do_predict) - do_predict.sum()} predictions"
|
||||
)
|
||||
self.do_predict += do_predict
|
||||
self.do_predict -= 1
|
||||
|
||||
else:
|
||||
# use SGDOneClassSVM to increase speed?
|
||||
nu = self.freqai_config.get('feature_parameters', {}).get('svm_nu', 0.2)
|
||||
nu = self.freqai_config.get("feature_parameters", {}).get("svm_nu", 0.2)
|
||||
self.svm_model = linear_model.SGDOneClassSVM(nu=nu).fit(
|
||||
self.data_dictionary["train_features"]
|
||||
)
|
||||
self.data_dictionary["train_features"]
|
||||
)
|
||||
y_pred = self.svm_model.predict(self.data_dictionary["train_features"])
|
||||
dropped_points = np.where(y_pred == -1, 0, y_pred)
|
||||
# keep_index = np.where(y_pred == 1)
|
||||
self.data_dictionary["train_features"] = self.data_dictionary[
|
||||
"train_features"][(y_pred == 1)]
|
||||
self.data_dictionary["train_labels"] = self.data_dictionary[
|
||||
"train_labels"][(y_pred == 1)]
|
||||
self.data_dictionary["train_weights"] = self.data_dictionary[
|
||||
"train_weights"][(y_pred == 1)]
|
||||
self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
|
||||
(y_pred == 1)
|
||||
]
|
||||
self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
|
||||
(y_pred == 1)
|
||||
]
|
||||
self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
|
||||
(y_pred == 1)
|
||||
]
|
||||
|
||||
logger.info(
|
||||
f'svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}'
|
||||
f' train points from {len(y_pred)}'
|
||||
f"svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}"
|
||||
f" train points from {len(y_pred)}"
|
||||
)
|
||||
|
||||
# same for test data
|
||||
y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
|
||||
dropped_points = np.where(y_pred == -1, 0, y_pred)
|
||||
self.data_dictionary["test_features"] = self.data_dictionary[
|
||||
"test_features"][(y_pred == 1)]
|
||||
self.data_dictionary["test_labels"] = self.data_dictionary[
|
||||
"test_labels"][(y_pred == 1)]
|
||||
self.data_dictionary["test_weights"] = self.data_dictionary[
|
||||
"test_weights"][(y_pred == 1)]
|
||||
self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
|
||||
(y_pred == 1)
|
||||
]
|
||||
self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][(y_pred == 1)]
|
||||
self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
|
||||
(y_pred == 1)
|
||||
]
|
||||
|
||||
logger.info(
|
||||
f'svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}'
|
||||
f' test points from {len(y_pred)}'
|
||||
f"svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}"
|
||||
f" test points from {len(y_pred)}"
|
||||
)
|
||||
|
||||
return
|
||||
|
@ -631,8 +658,8 @@ class FreqaiDataKitchen:
|
|||
features: list = the features to be used for training/prediction
|
||||
"""
|
||||
column_names = dataframe.columns
|
||||
features = [c for c in column_names if '%' in c]
|
||||
labels = [c for c in column_names if '&' in c]
|
||||
features = [c for c in column_names if "%" in c]
|
||||
labels = [c for c in column_names if "&" in c]
|
||||
if not features:
|
||||
raise OperationalException("Could not find any features!")
|
||||
|
||||
|
@ -657,16 +684,15 @@ class FreqaiDataKitchen:
|
|||
self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"]
|
||||
|
||||
do_predict = np.where(
|
||||
self.DI_values
|
||||
< self.freqai_config.get("feature_parameters", {}).get("DI_threshold"),
|
||||
self.DI_values < self.freqai_config.get("feature_parameters", {}).get("DI_threshold"),
|
||||
1,
|
||||
0,
|
||||
)
|
||||
|
||||
if (len(do_predict) - do_predict.sum()) > 0:
|
||||
logger.info(
|
||||
f'DI tossed {len(do_predict) - do_predict.sum():.2f} predictions for '
|
||||
'being too far from training data'
|
||||
f"DI tossed {len(do_predict) - do_predict.sum():.2f} predictions for "
|
||||
"being too far from training data"
|
||||
)
|
||||
|
||||
self.do_predict += do_predict
|
||||
|
@ -695,7 +721,7 @@ class FreqaiDataKitchen:
|
|||
|
||||
self.full_predictions = np.append(self.full_predictions, predictions)
|
||||
self.full_do_predict = np.append(self.full_do_predict, do_predict)
|
||||
if self.freqai_config.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
|
||||
if self.freqai_config.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
|
||||
self.full_DI_values = np.append(self.full_DI_values, self.DI_values)
|
||||
self.full_target_mean = np.append(self.full_target_mean, target_mean)
|
||||
self.full_target_std = np.append(self.full_target_std, target_std)
|
||||
|
@ -711,7 +737,7 @@ class FreqaiDataKitchen:
|
|||
filler = np.zeros(len_dataframe - len(self.full_predictions)) # startup_candle_count
|
||||
self.full_predictions = np.append(filler, self.full_predictions)
|
||||
self.full_do_predict = np.append(filler, self.full_do_predict)
|
||||
if self.freqai_config.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
|
||||
if self.freqai_config.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
|
||||
self.full_DI_values = np.append(filler, self.full_DI_values)
|
||||
self.full_target_mean = np.append(filler, self.full_target_mean)
|
||||
self.full_target_std = np.append(filler, self.full_target_std)
|
||||
|
@ -722,8 +748,9 @@ class FreqaiDataKitchen:
|
|||
backtest_timerange = TimeRange.parse_timerange(backtest_tr)
|
||||
|
||||
if backtest_timerange.stopts == 0:
|
||||
backtest_timerange.stopts = int(datetime.datetime.now(
|
||||
tz=datetime.timezone.utc).timestamp())
|
||||
backtest_timerange.stopts = int(
|
||||
datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
|
||||
)
|
||||
|
||||
backtest_timerange.startts = backtest_timerange.startts - backtest_period * SECONDS_IN_DAY
|
||||
start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts)
|
||||
|
@ -731,9 +758,7 @@ class FreqaiDataKitchen:
|
|||
full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
|
||||
|
||||
self.full_path = Path(
|
||||
self.config["user_data_dir"]
|
||||
/ "models"
|
||||
/ str(self.freqai_config.get("identifier"))
|
||||
self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier"))
|
||||
)
|
||||
|
||||
config_path = Path(self.config["config_files"][0])
|
||||
|
@ -758,61 +783,71 @@ class FreqaiDataKitchen:
|
|||
"""
|
||||
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
|
||||
elapsed_time = (time - trained_timestamp) / 3600 # hours
|
||||
max_time = self.freqai_config.get('expiration_hours', 0)
|
||||
max_time = self.freqai_config.get("expiration_hours", 0)
|
||||
if max_time > 0:
|
||||
return elapsed_time > max_time
|
||||
else:
|
||||
return False
|
||||
|
||||
def check_if_new_training_required(self, trained_timestamp: int) -> Tuple[bool,
|
||||
TimeRange, TimeRange]:
|
||||
def check_if_new_training_required(
|
||||
self, trained_timestamp: int
|
||||
) -> Tuple[bool, TimeRange, TimeRange]:
|
||||
|
||||
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
|
||||
trained_timerange = TimeRange()
|
||||
data_load_timerange = TimeRange()
|
||||
|
||||
# find the max indicator length required
|
||||
max_timeframe_chars = self.freqai_config.get('timeframes')[-1]
|
||||
max_period = self.freqai_config.get('feature_parameters', {}).get(
|
||||
'indicator_max_period', 50)
|
||||
max_timeframe_chars = self.freqai_config.get("timeframes")[-1]
|
||||
max_period = self.freqai_config.get("feature_parameters", {}).get(
|
||||
"indicator_max_period", 50
|
||||
)
|
||||
additional_seconds = 0
|
||||
if max_timeframe_chars[-1] == 'd':
|
||||
if max_timeframe_chars[-1] == "d":
|
||||
additional_seconds = max_period * SECONDS_IN_DAY * int(max_timeframe_chars[-2])
|
||||
elif max_timeframe_chars[-1] == 'h':
|
||||
elif max_timeframe_chars[-1] == "h":
|
||||
additional_seconds = max_period * 3600 * int(max_timeframe_chars[-2])
|
||||
elif max_timeframe_chars[-1] == 'm':
|
||||
elif max_timeframe_chars[-1] == "m":
|
||||
if len(max_timeframe_chars) == 2:
|
||||
additional_seconds = max_period * 60 * int(max_timeframe_chars[-2])
|
||||
elif len(max_timeframe_chars) == 3:
|
||||
additional_seconds = max_period * 60 * int(float(max_timeframe_chars[0:2]))
|
||||
else:
|
||||
logger.warning('FreqAI could not detect max timeframe and therefore may not '
|
||||
'download the proper amount of data for training')
|
||||
logger.warning(
|
||||
"FreqAI could not detect max timeframe and therefore may not "
|
||||
"download the proper amount of data for training"
|
||||
)
|
||||
|
||||
# logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY:.2f} days')
|
||||
|
||||
if trained_timestamp != 0:
|
||||
elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY
|
||||
retrain = elapsed_time > self.freqai_config.get('backtest_period')
|
||||
retrain = elapsed_time > self.freqai_config.get("backtest_period")
|
||||
if retrain:
|
||||
trained_timerange.startts = int(time - self.freqai_config.get(
|
||||
'train_period', 0) * SECONDS_IN_DAY)
|
||||
trained_timerange.startts = int(
|
||||
time - self.freqai_config.get("train_period", 0) * SECONDS_IN_DAY
|
||||
)
|
||||
trained_timerange.stopts = int(time)
|
||||
# we want to load/populate indicators on more data than we plan to train on so
|
||||
# because most of the indicators have a rolling timeperiod, and are thus NaNs
|
||||
# unless they have data further back in time before the start of the train period
|
||||
data_load_timerange.startts = int(time - self.freqai_config.get(
|
||||
'train_period', 0) * SECONDS_IN_DAY
|
||||
- additional_seconds)
|
||||
data_load_timerange.startts = int(
|
||||
time
|
||||
- self.freqai_config.get("train_period", 0) * SECONDS_IN_DAY
|
||||
- additional_seconds
|
||||
)
|
||||
data_load_timerange.stopts = int(time)
|
||||
else: # user passed no live_trained_timerange in config
|
||||
trained_timerange.startts = int(time - self.freqai_config.get('train_period') *
|
||||
SECONDS_IN_DAY)
|
||||
trained_timerange.startts = int(
|
||||
time - self.freqai_config.get("train_period") * SECONDS_IN_DAY
|
||||
)
|
||||
trained_timerange.stopts = int(time)
|
||||
|
||||
data_load_timerange.startts = int(time - self.freqai_config.get(
|
||||
'train_period', 0) * SECONDS_IN_DAY
|
||||
- additional_seconds)
|
||||
data_load_timerange.startts = int(
|
||||
time
|
||||
- self.freqai_config.get("train_period", 0) * SECONDS_IN_DAY
|
||||
- additional_seconds
|
||||
)
|
||||
data_load_timerange.stopts = int(time)
|
||||
retrain = True
|
||||
|
||||
|
@ -822,9 +857,10 @@ class FreqaiDataKitchen:
|
|||
|
||||
coin, _ = pair.split("/")
|
||||
# set the new data_path
|
||||
self.data_path = Path(self.full_path / str("sub-train" + "-" +
|
||||
pair.split("/")[0] +
|
||||
str(int(trained_timerange.stopts))))
|
||||
self.data_path = Path(
|
||||
self.full_path
|
||||
/ str("sub-train" + "-" + pair.split("/")[0] + str(int(trained_timerange.stopts)))
|
||||
)
|
||||
|
||||
self.model_filename = "cb_" + coin.lower() + "_" + str(int(trained_timerange.stopts))
|
||||
|
||||
|
@ -860,20 +896,24 @@ class FreqaiDataKitchen:
|
|||
timerange: TimeRange = The full data timerange for populating the indicators
|
||||
and training the model.
|
||||
"""
|
||||
exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
|
||||
self.config, validate=False, freqai=True)
|
||||
exchange = ExchangeResolver.load_exchange(
|
||||
self.config["exchange"]["name"], self.config, validate=False, freqai=True
|
||||
)
|
||||
|
||||
new_pairs_days = int((timerange.stopts - timerange.startts) / SECONDS_IN_DAY)
|
||||
|
||||
refresh_backtest_ohlcv_data(
|
||||
exchange, pairs=self.all_pairs,
|
||||
timeframes=self.freqai_config.get('timeframes'),
|
||||
datadir=self.config['datadir'], timerange=timerange,
|
||||
new_pairs_days=new_pairs_days,
|
||||
erase=False, data_format=self.config.get('dataformat_ohlcv', 'json'),
|
||||
trading_mode=self.config.get('trading_mode', 'spot'),
|
||||
prepend=self.config.get('prepend_data', False)
|
||||
)
|
||||
exchange,
|
||||
pairs=self.all_pairs,
|
||||
timeframes=self.freqai_config.get("timeframes"),
|
||||
datadir=self.config["datadir"],
|
||||
timerange=timerange,
|
||||
new_pairs_days=new_pairs_days,
|
||||
erase=False,
|
||||
data_format=self.config.get("dataformat_ohlcv", "json"),
|
||||
trading_mode=self.config.get("trading_mode", "spot"),
|
||||
prepend=self.config.get("prepend_data", False),
|
||||
)
|
||||
|
||||
def update_historic_data(self, strategy: IStrategy) -> None:
|
||||
"""
|
||||
|
@ -888,34 +928,36 @@ class FreqaiDataKitchen:
|
|||
history_data = self.dd.historic_data
|
||||
|
||||
for pair in self.all_pairs:
|
||||
for tf in self.freqai_config.get('timeframes'):
|
||||
for tf in self.freqai_config.get("timeframes"):
|
||||
|
||||
# check if newest candle is already appended
|
||||
df_dp = strategy.dp.get_pair_dataframe(pair, tf)
|
||||
if len(df_dp.index) == 0:
|
||||
continue
|
||||
if (
|
||||
str(history_data[pair][tf].iloc[-1]['date']) ==
|
||||
str(df_dp.iloc[-1:]['date'].iloc[-1])
|
||||
):
|
||||
if str(history_data[pair][tf].iloc[-1]["date"]) == str(
|
||||
df_dp.iloc[-1:]["date"].iloc[-1]
|
||||
):
|
||||
continue
|
||||
|
||||
index = df_dp.loc[
|
||||
df_dp['date'] ==
|
||||
history_data[pair][tf].iloc[-1]['date']
|
||||
].index[0] + 1
|
||||
index = (
|
||||
df_dp.loc[df_dp["date"] == history_data[pair][tf].iloc[-1]["date"]].index[0]
|
||||
+ 1
|
||||
)
|
||||
history_data[pair][tf] = pd.concat(
|
||||
[history_data[pair][tf],
|
||||
strategy.dp.get_pair_dataframe(pair, tf).iloc[index:]],
|
||||
ignore_index=True, axis=0
|
||||
)
|
||||
[
|
||||
history_data[pair][tf],
|
||||
strategy.dp.get_pair_dataframe(pair, tf).iloc[index:],
|
||||
],
|
||||
ignore_index=True,
|
||||
axis=0,
|
||||
)
|
||||
|
||||
# logger.info(f'Length of history data {len(history_data[pair][tf])}')
|
||||
|
||||
def set_all_pairs(self) -> None:
|
||||
|
||||
self.all_pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', []))
|
||||
for pair in self.config.get('exchange', '').get('pair_whitelist'):
|
||||
self.all_pairs = copy.deepcopy(self.freqai_config.get("corr_pairlist", []))
|
||||
for pair in self.config.get("exchange", "").get("pair_whitelist"):
|
||||
if pair not in self.all_pairs:
|
||||
self.all_pairs.append(pair)
|
||||
|
||||
|
@ -932,17 +974,19 @@ class FreqaiDataKitchen:
|
|||
for pair in self.all_pairs:
|
||||
if pair not in history_data:
|
||||
history_data[pair] = {}
|
||||
for tf in self.freqai_config.get('timeframes'):
|
||||
history_data[pair][tf] = load_pair_history(datadir=self.config['datadir'],
|
||||
timeframe=tf,
|
||||
pair=pair, timerange=timerange,
|
||||
data_format=self.config.get(
|
||||
'dataformat_ohlcv', 'json'),
|
||||
candle_type=self.config.get(
|
||||
'trading_mode', 'spot'))
|
||||
for tf in self.freqai_config.get("timeframes"):
|
||||
history_data[pair][tf] = load_pair_history(
|
||||
datadir=self.config["datadir"],
|
||||
timeframe=tf,
|
||||
pair=pair,
|
||||
timerange=timerange,
|
||||
data_format=self.config.get("dataformat_ohlcv", "json"),
|
||||
candle_type=self.config.get("trading_mode", "spot"),
|
||||
)
|
||||
|
||||
def get_base_and_corr_dataframes(self, timerange: TimeRange,
|
||||
pair: str) -> Tuple[Dict[Any, Any], Dict[Any, Any]]:
|
||||
def get_base_and_corr_dataframes(
|
||||
self, timerange: TimeRange, pair: str
|
||||
) -> Tuple[Dict[Any, Any], Dict[Any, Any]]:
|
||||
"""
|
||||
Searches through our historic_data in memory and returns the dataframes relevant
|
||||
to the present pair.
|
||||
|
@ -956,21 +1000,19 @@ class FreqaiDataKitchen:
|
|||
corr_dataframes: Dict[Any, Any] = {}
|
||||
base_dataframes: Dict[Any, Any] = {}
|
||||
historic_data = self.dd.historic_data
|
||||
pairs = self.freqai_config.get('corr_pairlist', [])
|
||||
pairs = self.freqai_config.get("corr_pairlist", [])
|
||||
|
||||
for tf in self.freqai_config.get('timeframes'):
|
||||
base_dataframes[tf] = self.slice_dataframe(
|
||||
timerange,
|
||||
historic_data[pair][tf]
|
||||
)
|
||||
for tf in self.freqai_config.get("timeframes"):
|
||||
base_dataframes[tf] = self.slice_dataframe(timerange, historic_data[pair][tf])
|
||||
if pairs:
|
||||
for p in pairs:
|
||||
if pair in p:
|
||||
continue # dont repeat anything from whitelist
|
||||
if p not in corr_dataframes:
|
||||
corr_dataframes[p] = {}
|
||||
corr_dataframes[p][tf] = self.slice_dataframe(timerange,
|
||||
historic_data[p][tf])
|
||||
corr_dataframes[p][tf] = self.slice_dataframe(
|
||||
timerange, historic_data[p][tf]
|
||||
)
|
||||
|
||||
return corr_dataframes, base_dataframes
|
||||
|
||||
|
@ -1006,10 +1048,9 @@ class FreqaiDataKitchen:
|
|||
|
||||
# return corr_dataframes, base_dataframes
|
||||
|
||||
def use_strategy_to_populate_indicators(self, strategy: IStrategy,
|
||||
corr_dataframes: dict,
|
||||
base_dataframes: dict,
|
||||
pair: str) -> DataFrame:
|
||||
def use_strategy_to_populate_indicators(
|
||||
self, strategy: IStrategy, corr_dataframes: dict, base_dataframes: dict, pair: str
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Use the user defined strategy for populating indicators during
|
||||
retrain
|
||||
|
@ -1023,30 +1064,25 @@ class FreqaiDataKitchen:
|
|||
:returns:
|
||||
dataframe: DataFrame = dataframe containing populated indicators
|
||||
"""
|
||||
dataframe = base_dataframes[self.config['timeframe']].copy()
|
||||
dataframe = base_dataframes[self.config["timeframe"]].copy()
|
||||
pairs = self.freqai_config.get("corr_pairlist", [])
|
||||
|
||||
for tf in self.freqai_config.get("timeframes"):
|
||||
dataframe = strategy.populate_any_indicators(
|
||||
pair,
|
||||
pair,
|
||||
dataframe.copy(),
|
||||
tf,
|
||||
base_dataframes[tf],
|
||||
coin=pair.split("/")[0] + "-"
|
||||
)
|
||||
pair, pair, dataframe.copy(), tf, base_dataframes[tf], coin=pair.split("/")[0] + "-"
|
||||
)
|
||||
if pairs:
|
||||
for i in pairs:
|
||||
if pair in i:
|
||||
continue # dont repeat anything from whitelist
|
||||
dataframe = strategy.populate_any_indicators(
|
||||
pair,
|
||||
i,
|
||||
dataframe.copy(),
|
||||
tf,
|
||||
corr_dataframes[i][tf],
|
||||
coin=i.split("/")[0] + "-"
|
||||
)
|
||||
pair,
|
||||
i,
|
||||
dataframe.copy(),
|
||||
tf,
|
||||
corr_dataframes[i][tf],
|
||||
coin=i.split("/")[0] + "-",
|
||||
)
|
||||
|
||||
return dataframe
|
||||
|
||||
|
@ -1056,7 +1092,7 @@ class FreqaiDataKitchen:
|
|||
"""
|
||||
import scipy as spy
|
||||
|
||||
self.data['labels_mean'], self.data['labels_std'] = {}, {}
|
||||
self.data["labels_mean"], self.data["labels_std"] = {}, {}
|
||||
for label in self.label_list:
|
||||
f = spy.stats.norm.fit(self.data_dictionary["train_labels"][label])
|
||||
self.data["labels_mean"][label], self.data["labels_std"][label] = f[0], f[1]
|
||||
|
|
|
@ -29,6 +29,7 @@ logger = logging.getLogger(__name__)
|
|||
def threaded(fn):
|
||||
def wrapper(*args, **kwargs):
|
||||
threading.Thread(target=fn, args=args, kwargs=kwargs).start()
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
|
@ -46,7 +47,7 @@ class IFreqaiModel(ABC):
|
|||
self.config = config
|
||||
self.assert_config(self.config)
|
||||
self.freqai_info = config["freqai"]
|
||||
self.data_split_parameters = config.get('freqai', {}).get("data_split_parameters")
|
||||
self.data_split_parameters = config.get("freqai", {}).get("data_split_parameters")
|
||||
self.model_training_parameters = config.get("freqai", {}).get("model_training_parameters")
|
||||
self.feature_parameters = config.get("freqai", {}).get("feature_parameters")
|
||||
self.time_last_trained = None
|
||||
|
@ -58,23 +59,21 @@ class IFreqaiModel(ABC):
|
|||
self.first = True
|
||||
self.update_historic_data = 0
|
||||
self.set_full_path()
|
||||
self.follow_mode = self.freqai_info.get('follow_mode', False)
|
||||
self.follow_mode = self.freqai_info.get("follow_mode", False)
|
||||
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
|
||||
self.lock = threading.Lock()
|
||||
self.follow_mode = self.freqai_info.get('follow_mode', False)
|
||||
self.identifier = self.freqai_info.get('identifier', 'no_id_provided')
|
||||
self.follow_mode = self.freqai_info.get("follow_mode", False)
|
||||
self.identifier = self.freqai_info.get("identifier", "no_id_provided")
|
||||
self.scanning = False
|
||||
self.ready_to_scan = False
|
||||
self.first = True
|
||||
self.keras = self.freqai_info.get('keras', False)
|
||||
self.CONV_WIDTH = self.freqai_info.get('conv_width', 2)
|
||||
self.keras = self.freqai_info.get("keras", False)
|
||||
self.CONV_WIDTH = self.freqai_info.get("conv_width", 2)
|
||||
|
||||
def assert_config(self, config: Dict[str, Any]) -> None:
|
||||
|
||||
if not config.get('freqai', {}):
|
||||
raise OperationalException(
|
||||
"No freqai parameters found in configuration file."
|
||||
)
|
||||
if not config.get("freqai", {}):
|
||||
raise OperationalException("No freqai parameters found in configuration file.")
|
||||
|
||||
def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame:
|
||||
"""
|
||||
|
@ -92,8 +91,7 @@ class IFreqaiModel(ABC):
|
|||
self.dd.set_pair_dict_info(metadata)
|
||||
|
||||
if self.live:
|
||||
self.dk = FreqaiDataKitchen(self.config, self.dd,
|
||||
self.live, metadata["pair"])
|
||||
self.dk = FreqaiDataKitchen(self.config, self.dd, self.live, metadata["pair"])
|
||||
dk = self.start_live(dataframe, metadata, strategy, self.dk)
|
||||
|
||||
# For backtesting, each pair enters and then gets trained for each window along the
|
||||
|
@ -103,7 +101,7 @@ class IFreqaiModel(ABC):
|
|||
# the concatenated results for the full backtesting period back to the strategy.
|
||||
elif not self.follow_mode:
|
||||
self.dk = FreqaiDataKitchen(self.config, self.dd, self.live, metadata["pair"])
|
||||
logger.info(f'Training {len(self.dk.training_timeranges)} timeranges')
|
||||
logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
|
||||
dk = self.start_backtesting(dataframe, metadata, self.dk)
|
||||
|
||||
dataframe = self.remove_features_from_df(dk.return_dataframe)
|
||||
|
@ -120,14 +118,13 @@ class IFreqaiModel(ABC):
|
|||
"""
|
||||
while 1:
|
||||
time.sleep(1)
|
||||
for pair in self.config.get('exchange', {}).get('pair_whitelist'):
|
||||
for pair in self.config.get("exchange", {}).get("pair_whitelist"):
|
||||
|
||||
(_, trained_timestamp, _, _) = self.dd.get_pair_dict_info(pair)
|
||||
|
||||
if self.dd.pair_dict[pair]['priority'] != 1:
|
||||
if self.dd.pair_dict[pair]["priority"] != 1:
|
||||
continue
|
||||
dk = FreqaiDataKitchen(self.config, self.dd,
|
||||
self.live, pair)
|
||||
dk = FreqaiDataKitchen(self.config, self.dd, self.live, pair)
|
||||
|
||||
# file_exists = False
|
||||
|
||||
|
@ -138,17 +135,21 @@ class IFreqaiModel(ABC):
|
|||
# model_filename=model_filename,
|
||||
# scanning=True)
|
||||
|
||||
(retrain,
|
||||
new_trained_timerange,
|
||||
data_load_timerange) = dk.check_if_new_training_required(trained_timestamp)
|
||||
(
|
||||
retrain,
|
||||
new_trained_timerange,
|
||||
data_load_timerange,
|
||||
) = dk.check_if_new_training_required(trained_timestamp)
|
||||
dk.set_paths(pair, new_trained_timerange.stopts)
|
||||
|
||||
if retrain: # or not file_exists:
|
||||
self.train_model_in_series(new_trained_timerange, pair,
|
||||
strategy, dk, data_load_timerange)
|
||||
self.train_model_in_series(
|
||||
new_trained_timerange, pair, strategy, dk, data_load_timerange
|
||||
)
|
||||
|
||||
def start_backtesting(self, dataframe: DataFrame, metadata: dict,
|
||||
dk: FreqaiDataKitchen) -> FreqaiDataKitchen:
|
||||
def start_backtesting(
|
||||
self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
|
||||
) -> FreqaiDataKitchen:
|
||||
"""
|
||||
The main broad execution for backtesting. For backtesting, each pair enters and then gets
|
||||
trained for each window along the sliding window defined by "train_period" (training window)
|
||||
|
@ -169,10 +170,8 @@ class IFreqaiModel(ABC):
|
|||
# tr_backtest is the backtesting time range e.g. the week directly
|
||||
# following tr_train. Both of these windows slide through the
|
||||
# entire backtest
|
||||
for tr_train, tr_backtest in zip(
|
||||
dk.training_timeranges, dk.backtesting_timeranges
|
||||
):
|
||||
(_, _, _, _) = self.dd.get_pair_dict_info(metadata['pair'])
|
||||
for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges):
|
||||
(_, _, _, _) = self.dd.get_pair_dict_info(metadata["pair"])
|
||||
gc.collect()
|
||||
dk.data = {} # clean the pair specific data between training window sliding
|
||||
self.training_timerange = tr_train
|
||||
|
@ -181,40 +180,48 @@ class IFreqaiModel(ABC):
|
|||
dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
|
||||
|
||||
trained_timestamp = tr_train # TimeRange.parse_timerange(tr_train)
|
||||
tr_train_startts_str = datetime.datetime.utcfromtimestamp(
|
||||
tr_train.startts).strftime('%Y-%m-%d %H:%M:%S')
|
||||
tr_train_stopts_str = datetime.datetime.utcfromtimestamp(
|
||||
tr_train.stopts).strftime('%Y-%m-%d %H:%M:%S')
|
||||
tr_train_startts_str = datetime.datetime.utcfromtimestamp(tr_train.startts).strftime(
|
||||
"%Y-%m-%d %H:%M:%S"
|
||||
)
|
||||
tr_train_stopts_str = datetime.datetime.utcfromtimestamp(tr_train.stopts).strftime(
|
||||
"%Y-%m-%d %H:%M:%S"
|
||||
)
|
||||
logger.info("Training %s", metadata["pair"])
|
||||
logger.info(f'Training {tr_train_startts_str} to {tr_train_stopts_str}')
|
||||
logger.info(f"Training {tr_train_startts_str} to {tr_train_stopts_str}")
|
||||
|
||||
dk.data_path = Path(dk.full_path /
|
||||
str("sub-train" + "-" + metadata['pair'].split("/")[0] +
|
||||
str(int(trained_timestamp.stopts))))
|
||||
if not self.model_exists(metadata["pair"], dk,
|
||||
trained_timestamp=trained_timestamp.stopts):
|
||||
self.model = self.train(dataframe_train, metadata['pair'], dk)
|
||||
self.dd.pair_dict[metadata['pair']][
|
||||
'trained_timestamp'] = trained_timestamp.stopts
|
||||
dk.set_new_model_names(metadata['pair'], trained_timestamp)
|
||||
dk.save_data(self.model, metadata['pair'], keras_model=self.keras)
|
||||
dk.data_path = Path(
|
||||
dk.full_path
|
||||
/ str(
|
||||
"sub-train"
|
||||
+ "-"
|
||||
+ metadata["pair"].split("/")[0]
|
||||
+ str(int(trained_timestamp.stopts))
|
||||
)
|
||||
)
|
||||
if not self.model_exists(
|
||||
metadata["pair"], dk, trained_timestamp=trained_timestamp.stopts
|
||||
):
|
||||
self.model = self.train(dataframe_train, metadata["pair"], dk)
|
||||
self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = trained_timestamp.stopts
|
||||
dk.set_new_model_names(metadata["pair"], trained_timestamp)
|
||||
dk.save_data(self.model, metadata["pair"], keras_model=self.keras)
|
||||
else:
|
||||
self.model = dk.load_data(metadata['pair'], keras_model=self.keras)
|
||||
self.model = dk.load_data(metadata["pair"], keras_model=self.keras)
|
||||
|
||||
self.check_if_feature_list_matches_strategy(dataframe_train, dk)
|
||||
|
||||
preds, do_preds = self.predict(dataframe_backtest, dk)
|
||||
|
||||
dk.append_predictions(preds, do_preds, len(dataframe_backtest))
|
||||
print('predictions', len(dk.full_predictions),
|
||||
'do_predict', len(dk.full_do_predict))
|
||||
print("predictions", len(dk.full_predictions), "do_predict", len(dk.full_do_predict))
|
||||
|
||||
dk.fill_predictions(len(dataframe))
|
||||
|
||||
return dk
|
||||
|
||||
def start_live(self, dataframe: DataFrame, metadata: dict,
|
||||
strategy: IStrategy, dk: FreqaiDataKitchen) -> FreqaiDataKitchen:
|
||||
def start_live(
|
||||
self, dataframe: DataFrame, metadata: dict, strategy: IStrategy, dk: FreqaiDataKitchen
|
||||
) -> FreqaiDataKitchen:
|
||||
"""
|
||||
The main broad execution for dry/live. This function will check if a retraining should be
|
||||
performed, and if so, retrain and reset the model.
|
||||
|
@ -232,14 +239,11 @@ class IFreqaiModel(ABC):
|
|||
self.dd.update_follower_metadata()
|
||||
|
||||
# get the model metadata associated with the current pair
|
||||
(_,
|
||||
trained_timestamp,
|
||||
_,
|
||||
return_null_array) = self.dd.get_pair_dict_info(metadata['pair'])
|
||||
(_, trained_timestamp, _, return_null_array) = self.dd.get_pair_dict_info(metadata["pair"])
|
||||
|
||||
# if the metadata doesnt exist, the follower returns null arrays to strategy
|
||||
if self.follow_mode and return_null_array:
|
||||
logger.info('Returning null array from follower to strategy')
|
||||
logger.info("Returning null array from follower to strategy")
|
||||
self.dd.return_null_values_to_strategy(dataframe, dk)
|
||||
return dk
|
||||
|
||||
|
@ -253,16 +257,18 @@ class IFreqaiModel(ABC):
|
|||
# if not trainable, load existing data
|
||||
if not self.follow_mode:
|
||||
|
||||
(_,
|
||||
new_trained_timerange,
|
||||
data_load_timerange) = dk.check_if_new_training_required(trained_timestamp)
|
||||
dk.set_paths(metadata['pair'], new_trained_timerange.stopts)
|
||||
(_, new_trained_timerange, data_load_timerange) = dk.check_if_new_training_required(
|
||||
trained_timestamp
|
||||
)
|
||||
dk.set_paths(metadata["pair"], new_trained_timerange.stopts)
|
||||
|
||||
# download candle history if it is not already in memory
|
||||
if not self.dd.historic_data:
|
||||
logger.info('Downloading all training data for all pairs in whitelist and '
|
||||
'corr_pairlist, this may take a while if you do not have the '
|
||||
'data saved')
|
||||
logger.info(
|
||||
"Downloading all training data for all pairs in whitelist and "
|
||||
"corr_pairlist, this may take a while if you do not have the "
|
||||
"data saved"
|
||||
)
|
||||
dk.download_all_data_for_training(data_load_timerange)
|
||||
dk.load_all_pair_histories(data_load_timerange)
|
||||
|
||||
|
@ -271,53 +277,47 @@ class IFreqaiModel(ABC):
|
|||
self.start_scanning(strategy)
|
||||
|
||||
elif self.follow_mode:
|
||||
dk.set_paths(metadata['pair'], trained_timestamp)
|
||||
logger.info('FreqAI instance set to follow_mode, finding existing pair'
|
||||
f'using { self.identifier }')
|
||||
dk.set_paths(metadata["pair"], trained_timestamp)
|
||||
logger.info(
|
||||
"FreqAI instance set to follow_mode, finding existing pair"
|
||||
f"using { self.identifier }"
|
||||
)
|
||||
|
||||
# load the model and associated data into the data kitchen
|
||||
self.model = dk.load_data(coin=metadata['pair'], keras_model=self.keras)
|
||||
self.model = dk.load_data(coin=metadata["pair"], keras_model=self.keras)
|
||||
|
||||
if not self.model:
|
||||
logger.warning('No model ready, returning null values to strategy.')
|
||||
logger.warning("No model ready, returning null values to strategy.")
|
||||
self.dd.return_null_values_to_strategy(dataframe, dk)
|
||||
return dk
|
||||
|
||||
# ensure user is feeding the correct indicators to the model
|
||||
self.check_if_feature_list_matches_strategy(dataframe, dk)
|
||||
|
||||
self.build_strategy_return_arrays(dataframe, dk, metadata['pair'], trained_timestamp)
|
||||
self.build_strategy_return_arrays(dataframe, dk, metadata["pair"], trained_timestamp)
|
||||
|
||||
return dk
|
||||
|
||||
def build_strategy_return_arrays(self, dataframe: DataFrame,
|
||||
dk: FreqaiDataKitchen, pair: str,
|
||||
trained_timestamp: int) -> None:
|
||||
def build_strategy_return_arrays(
|
||||
self, dataframe: DataFrame, dk: FreqaiDataKitchen, pair: str, trained_timestamp: int
|
||||
) -> None:
|
||||
|
||||
# hold the historical predictions in memory so we are sending back
|
||||
# correct array to strategy
|
||||
|
||||
if pair not in self.dd.model_return_values:
|
||||
pred_df, do_preds = self.predict(dataframe, dk)
|
||||
# mypy doesnt like the typing in else statement, so we need to explicitly add to
|
||||
# dataframe separately
|
||||
|
||||
# for label in dk.label_list:
|
||||
# dataframe[label] = pred_df[label]
|
||||
|
||||
# dataframe['do_predict'] = do_preds
|
||||
|
||||
# dk.append_predictions(preds, do_preds, len(dataframe))
|
||||
# dk.fill_predictions(len(dataframe))
|
||||
self.dd.set_initial_return_values(pair, dk, pred_df, do_preds)
|
||||
dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe)
|
||||
return
|
||||
elif self.dk.check_if_model_expired(trained_timestamp):
|
||||
pred_df = DataFrame(np.zeros((2, len(dk.label_list))), columns=dk.label_list)
|
||||
do_preds, dk.DI_values = np.ones(2) * 2, np.zeros(2)
|
||||
logger.warning('Model expired, returning null values to strategy. Strategy '
|
||||
'construction should take care to consider this event with '
|
||||
'prediction == 0 and do_predict == 2')
|
||||
logger.warning(
|
||||
"Model expired, returning null values to strategy. Strategy "
|
||||
"construction should take care to consider this event with "
|
||||
"prediction == 0 and do_predict == 2"
|
||||
)
|
||||
else:
|
||||
# Only feed in the most recent candle for prediction in live scenario
|
||||
pred_df, do_preds = self.predict(dataframe.iloc[-self.CONV_WIDTH:], dk, first=False)
|
||||
|
@ -327,8 +327,9 @@ class IFreqaiModel(ABC):
|
|||
|
||||
return
|
||||
|
||||
def check_if_feature_list_matches_strategy(self, dataframe: DataFrame,
|
||||
dk: FreqaiDataKitchen) -> None:
|
||||
def check_if_feature_list_matches_strategy(
|
||||
self, dataframe: DataFrame, dk: FreqaiDataKitchen
|
||||
) -> None:
|
||||
"""
|
||||
Ensure user is passing the proper feature set if they are reusing an `identifier` pointing
|
||||
to a folder holding existing models.
|
||||
|
@ -337,16 +338,18 @@ class IFreqaiModel(ABC):
|
|||
dk: FreqaiDataKitchen = non-persistent data container/analyzer for current coin/bot loop
|
||||
"""
|
||||
dk.find_features(dataframe)
|
||||
if 'training_features_list_raw' in dk.data:
|
||||
feature_list = dk.data['training_features_list_raw']
|
||||
if "training_features_list_raw" in dk.data:
|
||||
feature_list = dk.data["training_features_list_raw"]
|
||||
else:
|
||||
feature_list = dk.training_features_list
|
||||
if dk.training_features_list != feature_list:
|
||||
raise OperationalException("Trying to access pretrained model with `identifier` "
|
||||
"but found different features furnished by current strategy."
|
||||
"Change `identifer` to train from scratch, or ensure the"
|
||||
"strategy is furnishing the same features as the pretrained"
|
||||
"model")
|
||||
raise OperationalException(
|
||||
"Trying to access pretrained model with `identifier` "
|
||||
"but found different features furnished by current strategy."
|
||||
"Change `identifer` to train from scratch, or ensure the"
|
||||
"strategy is furnishing the same features as the pretrained"
|
||||
"model"
|
||||
)
|
||||
|
||||
def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None:
|
||||
"""
|
||||
|
@ -356,13 +359,13 @@ class IFreqaiModel(ABC):
|
|||
of how outlier data points are dropped from the dataframe used for training.
|
||||
"""
|
||||
|
||||
if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'):
|
||||
if self.freqai_info.get("feature_parameters", {}).get("principal_component_analysis"):
|
||||
dk.principal_component_analysis()
|
||||
|
||||
if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'):
|
||||
if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers"):
|
||||
dk.use_SVM_to_remove_outliers(predict=False)
|
||||
|
||||
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold'):
|
||||
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold"):
|
||||
dk.data["avg_mean_dist"] = dk.compute_distances()
|
||||
|
||||
# if self.feature_parameters["determine_statistical_distributions"]:
|
||||
|
@ -381,13 +384,13 @@ class IFreqaiModel(ABC):
|
|||
of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
|
||||
for buy signals.
|
||||
"""
|
||||
if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'):
|
||||
if self.freqai_info.get("feature_parameters", {}).get("principal_component_analysis"):
|
||||
dk.pca_transform(dataframe)
|
||||
|
||||
if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'):
|
||||
if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers"):
|
||||
dk.use_SVM_to_remove_outliers(predict=True)
|
||||
|
||||
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold'):
|
||||
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold"):
|
||||
dk.check_if_pred_in_training_spaces()
|
||||
|
||||
# if self.feature_parameters["determine_statistical_distributions"]:
|
||||
|
@ -395,8 +398,14 @@ class IFreqaiModel(ABC):
|
|||
# if self.feature_parameters["remove_outliers"]:
|
||||
# dk.remove_outliers(predict=True) # creates dropped index
|
||||
|
||||
def model_exists(self, pair: str, dk: FreqaiDataKitchen, trained_timestamp: int = None,
|
||||
model_filename: str = '', scanning: bool = False) -> bool:
|
||||
def model_exists(
|
||||
self,
|
||||
pair: str,
|
||||
dk: FreqaiDataKitchen,
|
||||
trained_timestamp: int = None,
|
||||
model_filename: str = "",
|
||||
scanning: bool = False,
|
||||
) -> bool:
|
||||
"""
|
||||
Given a pair and path, check if a model already exists
|
||||
:param pair: pair e.g. BTC/USD
|
||||
|
@ -416,25 +425,33 @@ class IFreqaiModel(ABC):
|
|||
return file_exists
|
||||
|
||||
def set_full_path(self) -> None:
|
||||
self.full_path = Path(self.config['user_data_dir'] /
|
||||
"models" /
|
||||
str(self.freqai_info.get('identifier')))
|
||||
self.full_path = Path(
|
||||
self.config["user_data_dir"] / "models" / str(self.freqai_info.get("identifier"))
|
||||
)
|
||||
self.full_path.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy(self.config['config_files'][0], Path(self.full_path,
|
||||
Path(self.config['config_files'][0]).name))
|
||||
shutil.copy(
|
||||
self.config["config_files"][0],
|
||||
Path(self.full_path, Path(self.config["config_files"][0]).name),
|
||||
)
|
||||
|
||||
def remove_features_from_df(self, dataframe: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Remove the features from the dataframe before returning it to strategy. This keeps it
|
||||
compact for Frequi purposes.
|
||||
"""
|
||||
to_keep = [col for col in dataframe.columns
|
||||
if not col.startswith('%') or col.startswith('%%')]
|
||||
to_keep = [
|
||||
col for col in dataframe.columns if not col.startswith("%") or col.startswith("%%")
|
||||
]
|
||||
return dataframe[to_keep]
|
||||
|
||||
def train_model_in_series(self, new_trained_timerange: TimeRange, pair: str,
|
||||
strategy: IStrategy, dk: FreqaiDataKitchen,
|
||||
data_load_timerange: TimeRange):
|
||||
def train_model_in_series(
|
||||
self,
|
||||
new_trained_timerange: TimeRange,
|
||||
pair: str,
|
||||
strategy: IStrategy,
|
||||
dk: FreqaiDataKitchen,
|
||||
data_load_timerange: TimeRange,
|
||||
):
|
||||
"""
|
||||
Retreive data and train model in single threaded mode (only used if model directory is empty
|
||||
upon startup for dry/live )
|
||||
|
@ -447,13 +464,13 @@ class IFreqaiModel(ABC):
|
|||
(larger than new_trained_timerange so that new_trained_timerange does not contain any NaNs)
|
||||
"""
|
||||
|
||||
corr_dataframes, base_dataframes = dk.get_base_and_corr_dataframes(data_load_timerange,
|
||||
pair)
|
||||
corr_dataframes, base_dataframes = dk.get_base_and_corr_dataframes(
|
||||
data_load_timerange, pair
|
||||
)
|
||||
|
||||
unfiltered_dataframe = dk.use_strategy_to_populate_indicators(strategy,
|
||||
corr_dataframes,
|
||||
base_dataframes,
|
||||
pair)
|
||||
unfiltered_dataframe = dk.use_strategy_to_populate_indicators(
|
||||
strategy, corr_dataframes, base_dataframes, pair
|
||||
)
|
||||
|
||||
unfiltered_dataframe = dk.slice_dataframe(new_trained_timerange, unfiltered_dataframe)
|
||||
|
||||
|
@ -462,15 +479,15 @@ class IFreqaiModel(ABC):
|
|||
|
||||
model = self.train(unfiltered_dataframe, pair, dk)
|
||||
|
||||
self.dd.pair_dict[pair]['trained_timestamp'] = new_trained_timerange.stopts
|
||||
self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
|
||||
dk.set_new_model_names(pair, new_trained_timerange)
|
||||
self.dd.pair_dict[pair]['first'] = False
|
||||
if self.dd.pair_dict[pair]['priority'] == 1 and self.scanning:
|
||||
self.dd.pair_dict[pair]["first"] = False
|
||||
if self.dd.pair_dict[pair]["priority"] == 1 and self.scanning:
|
||||
with self.lock:
|
||||
self.dd.pair_to_end_of_training_queue(pair)
|
||||
dk.save_data(model, coin=pair, keras_model=self.keras)
|
||||
|
||||
if self.freqai_info.get('purge_old_models', False):
|
||||
if self.freqai_info.get("purge_old_models", False):
|
||||
self.dd.purge_old_models()
|
||||
# self.retrain = False
|
||||
|
||||
|
@ -503,8 +520,9 @@ class IFreqaiModel(ABC):
|
|||
return
|
||||
|
||||
@abstractmethod
|
||||
def predict(self, dataframe: DataFrame,
|
||||
dk: FreqaiDataKitchen, first: bool = True) -> Tuple[DataFrame, npt.ArrayLike]:
|
||||
def predict(
|
||||
self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True
|
||||
) -> Tuple[DataFrame, npt.ArrayLike]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param:
|
||||
|
|
|
@ -45,8 +45,9 @@ class CatboostPredictionModel(IFreqaiModel):
|
|||
|
||||
return dataframe["s"]
|
||||
|
||||
def train(self, unfiltered_dataframe: DataFrame,
|
||||
pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]:
|
||||
def train(
|
||||
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
|
||||
) -> Tuple[DataFrame, DataFrame]:
|
||||
"""
|
||||
Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
|
||||
for storing, saving, loading, and analyzing the data.
|
||||
|
@ -57,8 +58,7 @@ class CatboostPredictionModel(IFreqaiModel):
|
|||
:model: Trained model which can be used to inference (self.predict)
|
||||
"""
|
||||
|
||||
logger.info('--------------------Starting training '
|
||||
f'{pair} --------------------')
|
||||
logger.info("--------------------Starting training " f"{pair} --------------------")
|
||||
|
||||
# unfiltered_labels = self.make_labels(unfiltered_dataframe, dk)
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
|
@ -78,13 +78,14 @@ class CatboostPredictionModel(IFreqaiModel):
|
|||
# optional additional data cleaning/analysis
|
||||
self.data_cleaning_train(dk)
|
||||
|
||||
logger.info(f'Training model on {len(dk.data_dictionary["train_features"].columns)}'
|
||||
' features')
|
||||
logger.info(
|
||||
f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
|
||||
)
|
||||
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
|
||||
|
||||
model = self.fit(data_dictionary)
|
||||
|
||||
logger.info(f'--------------------done training {pair}--------------------')
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
|
||||
return model
|
||||
|
||||
|
@ -110,14 +111,17 @@ class CatboostPredictionModel(IFreqaiModel):
|
|||
|
||||
model = CatBoostRegressor(
|
||||
allow_writing_files=False,
|
||||
verbose=100, early_stopping_rounds=400, **self.model_training_parameters
|
||||
verbose=100,
|
||||
early_stopping_rounds=400,
|
||||
**self.model_training_parameters,
|
||||
)
|
||||
model.fit(X=train_data, eval_set=test_data)
|
||||
|
||||
return model
|
||||
|
||||
def predict(self, unfiltered_dataframe: DataFrame,
|
||||
dk: FreqaiDataKitchen, first: bool = False) -> Tuple[DataFrame, DataFrame]:
|
||||
def predict(
|
||||
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
|
||||
) -> Tuple[DataFrame, DataFrame]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||
|
@ -141,8 +145,10 @@ class CatboostPredictionModel(IFreqaiModel):
|
|||
pred_df = DataFrame(predictions, columns=dk.label_list)
|
||||
|
||||
for label in dk.label_list:
|
||||
pred_df[label] = ((pred_df[label] + 1) *
|
||||
(dk.data["labels_max"][label] -
|
||||
dk.data["labels_min"][label]) / 2) + dk.data["labels_min"][label]
|
||||
pred_df[label] = (
|
||||
(pred_df[label] + 1)
|
||||
* (dk.data["labels_max"][label] - dk.data["labels_min"][label])
|
||||
/ 2
|
||||
) + dk.data["labels_min"][label]
|
||||
|
||||
return (pred_df, dk.do_predict)
|
||||
|
|
|
@ -28,8 +28,9 @@ class CatboostPredictionMultiModel(IFreqaiModel):
|
|||
|
||||
return dataframe
|
||||
|
||||
def train(self, unfiltered_dataframe: DataFrame,
|
||||
pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]:
|
||||
def train(
|
||||
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
|
||||
) -> Tuple[DataFrame, DataFrame]:
|
||||
"""
|
||||
Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
|
||||
for storing, saving, loading, and analyzing the data.
|
||||
|
@ -40,8 +41,7 @@ class CatboostPredictionMultiModel(IFreqaiModel):
|
|||
:model: Trained model which can be used to inference (self.predict)
|
||||
"""
|
||||
|
||||
logger.info('--------------------Starting training '
|
||||
f'{pair} --------------------')
|
||||
logger.info("--------------------Starting training " f"{pair} --------------------")
|
||||
|
||||
# unfiltered_labels = self.make_labels(unfiltered_dataframe, dk)
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
|
@ -61,13 +61,14 @@ class CatboostPredictionMultiModel(IFreqaiModel):
|
|||
# optional additional data cleaning/analysis
|
||||
self.data_cleaning_train(dk)
|
||||
|
||||
logger.info(f'Training model on {len(dk.data_dictionary["train_features"].columns)}'
|
||||
' features')
|
||||
logger.info(
|
||||
f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
|
||||
)
|
||||
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
|
||||
|
||||
model = self.fit(data_dictionary)
|
||||
|
||||
logger.info(f'--------------------done training {pair}--------------------')
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
|
||||
return model
|
||||
|
||||
|
@ -80,22 +81,26 @@ class CatboostPredictionMultiModel(IFreqaiModel):
|
|||
"""
|
||||
|
||||
cbr = CatBoostRegressor(
|
||||
allow_writing_files=False, gpu_ram_part=0.5,
|
||||
verbose=100, early_stopping_rounds=400, **self.model_training_parameters
|
||||
allow_writing_files=False,
|
||||
gpu_ram_part=0.5,
|
||||
verbose=100,
|
||||
early_stopping_rounds=400,
|
||||
**self.model_training_parameters,
|
||||
)
|
||||
|
||||
X = data_dictionary["train_features"]
|
||||
y = data_dictionary["train_labels"]
|
||||
# eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
|
||||
sample_weight = data_dictionary['train_weights']
|
||||
sample_weight = data_dictionary["train_weights"]
|
||||
|
||||
model = MultiOutputRegressor(estimator=cbr)
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set)
|
||||
|
||||
return model
|
||||
|
||||
def predict(self, unfiltered_dataframe: DataFrame,
|
||||
dk: FreqaiDataKitchen, first: bool = False) -> Tuple[DataFrame, DataFrame]:
|
||||
def predict(
|
||||
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
|
||||
) -> Tuple[DataFrame, DataFrame]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||
|
@ -119,8 +124,10 @@ class CatboostPredictionMultiModel(IFreqaiModel):
|
|||
pred_df = DataFrame(predictions, columns=dk.label_list)
|
||||
|
||||
for label in dk.label_list:
|
||||
pred_df[label] = ((pred_df[label] + 1) *
|
||||
(dk.data["labels_max"][label] -
|
||||
dk.data["labels_min"][label]) / 2) + dk.data["labels_min"][label]
|
||||
pred_df[label] = (
|
||||
(pred_df[label] + 1)
|
||||
* (dk.data["labels_max"][label] - dk.data["labels_min"][label])
|
||||
/ 2
|
||||
) + dk.data["labels_min"][label]
|
||||
|
||||
return (pred_df, dk.do_predict)
|
||||
|
|
|
@ -27,8 +27,9 @@ class LightGBMPredictionModel(IFreqaiModel):
|
|||
|
||||
return dataframe
|
||||
|
||||
def train(self, unfiltered_dataframe: DataFrame,
|
||||
pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]:
|
||||
def train(
|
||||
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
|
||||
) -> Tuple[DataFrame, DataFrame]:
|
||||
"""
|
||||
Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
|
||||
for storing, saving, loading, and analyzing the data.
|
||||
|
@ -39,8 +40,7 @@ class LightGBMPredictionModel(IFreqaiModel):
|
|||
:model: Trained model which can be used to inference (self.predict)
|
||||
"""
|
||||
|
||||
logger.info('--------------------Starting training '
|
||||
f'{pair} --------------------')
|
||||
logger.info("--------------------Starting training " f"{pair} --------------------")
|
||||
|
||||
# unfiltered_labels = self.make_labels(unfiltered_dataframe, dk)
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
|
@ -60,13 +60,14 @@ class LightGBMPredictionModel(IFreqaiModel):
|
|||
# optional additional data cleaning/analysis
|
||||
self.data_cleaning_train(dk)
|
||||
|
||||
logger.info(f'Training model on {len(dk.data_dictionary["train_features"].columns)}'
|
||||
' features')
|
||||
logger.info(
|
||||
f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
|
||||
)
|
||||
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
|
||||
|
||||
model = self.fit(data_dictionary)
|
||||
|
||||
logger.info(f'--------------------done training {pair}--------------------')
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
|
||||
return model
|
||||
|
||||
|
@ -89,8 +90,9 @@ class LightGBMPredictionModel(IFreqaiModel):
|
|||
|
||||
return model
|
||||
|
||||
def predict(self, unfiltered_dataframe: DataFrame,
|
||||
dk: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]:
|
||||
def predict(
|
||||
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen
|
||||
) -> Tuple[DataFrame, DataFrame]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||
|
@ -116,8 +118,10 @@ class LightGBMPredictionModel(IFreqaiModel):
|
|||
pred_df = DataFrame(predictions, columns=dk.label_list)
|
||||
|
||||
for label in dk.label_list:
|
||||
pred_df[label] = ((pred_df[label] + 1) *
|
||||
(dk.data["labels_max"][label] -
|
||||
dk.data["labels_min"][label]) / 2) + dk.data["labels_min"][label]
|
||||
pred_df[label] = (
|
||||
(pred_df[label] + 1)
|
||||
* (dk.data["labels_max"][label] - dk.data["labels_min"][label])
|
||||
/ 2
|
||||
) + dk.data["labels_min"][label]
|
||||
|
||||
return (pred_df, dk.do_predict)
|
||||
|
|
|
@ -120,9 +120,7 @@ class FreqaiExampleStrategy(IStrategy):
|
|||
informative["close"] / informative[f"{coin}bb_lowerband-period_{t}"]
|
||||
)
|
||||
|
||||
informative[f"%-{coin}roc-period_{t}"] = ta.ROC(
|
||||
informative, timeperiod=t
|
||||
)
|
||||
informative[f"%-{coin}roc-period_{t}"] = ta.ROC(informative, timeperiod=t)
|
||||
macd = ta.MACD(informative, timeperiod=t)
|
||||
informative[f"%-{coin}macd-period_{t}"] = macd["macd"]
|
||||
|
||||
|
@ -152,17 +150,17 @@ class FreqaiExampleStrategy(IStrategy):
|
|||
# Add generalized indicators here (because in live, it will call this
|
||||
# function to populate indicators during training). Notice how we ensure not to
|
||||
# add them multiple times
|
||||
if pair == self.freqai_info['corr_pairlist'][0] and tf == self.timeframe:
|
||||
if pair == self.freqai_info["corr_pairlist"][0] and tf == self.timeframe:
|
||||
df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7
|
||||
df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25
|
||||
|
||||
# user adds targets here by prepending them with &- (see convention below)
|
||||
# If user wishes to use multiple targets, a multioutput prediction model
|
||||
# needs to be used such as templates/CatboostPredictionMultiModel.py
|
||||
df['&-s_close'] = (
|
||||
df["&-s_close"] = (
|
||||
df["close"]
|
||||
.shift(-self.freqai_info['feature_parameters']["period"])
|
||||
.rolling(self.freqai_info['feature_parameters']["period"])
|
||||
.shift(-self.freqai_info["feature_parameters"]["period"])
|
||||
.rolling(self.freqai_info["feature_parameters"]["period"])
|
||||
.mean()
|
||||
/ df["close"]
|
||||
- 1
|
||||
|
@ -231,19 +229,20 @@ class FreqaiExampleStrategy(IStrategy):
|
|||
def get_ticker_indicator(self):
|
||||
return int(self.config["timeframe"][:-1])
|
||||
|
||||
def custom_exit(self, pair: str, trade: Trade, current_time, current_rate,
|
||||
current_profit, **kwargs):
|
||||
def custom_exit(
|
||||
self, pair: str, trade: Trade, current_time, current_rate, current_profit, **kwargs
|
||||
):
|
||||
|
||||
dataframe, _ = self.dp.get_analyzed_dataframe(pair=pair, timeframe=self.timeframe)
|
||||
|
||||
trade_date = timeframe_to_prev_date(self.config['timeframe'], trade.open_date_utc)
|
||||
trade_candle = dataframe.loc[(dataframe['date'] == trade_date)]
|
||||
trade_date = timeframe_to_prev_date(self.config["timeframe"], trade.open_date_utc)
|
||||
trade_candle = dataframe.loc[(dataframe["date"] == trade_date)]
|
||||
|
||||
if trade_candle.empty:
|
||||
return None
|
||||
trade_candle = trade_candle.squeeze()
|
||||
|
||||
follow_mode = self.config.get('freqai', {}).get('follow_mode', False)
|
||||
follow_mode = self.config.get("freqai", {}).get("follow_mode", False)
|
||||
|
||||
if not follow_mode:
|
||||
pair_dict = self.model.bridge.data_drawer.pair_dict
|
||||
|
@ -252,30 +251,33 @@ class FreqaiExampleStrategy(IStrategy):
|
|||
|
||||
entry_tag = trade.enter_tag
|
||||
|
||||
if ('prediction' + entry_tag not in pair_dict[pair] or
|
||||
pair_dict[pair]['prediction' + entry_tag] > 0):
|
||||
if (
|
||||
"prediction" + entry_tag not in pair_dict[pair]
|
||||
or pair_dict[pair]["prediction" + entry_tag] > 0
|
||||
):
|
||||
with self.model.bridge.lock:
|
||||
pair_dict[pair]['prediction' + entry_tag] = abs(trade_candle['&-s_close'])
|
||||
pair_dict[pair]["prediction" + entry_tag] = abs(trade_candle["&-s_close"])
|
||||
if not follow_mode:
|
||||
self.model.bridge.data_drawer.save_drawer_to_disk()
|
||||
else:
|
||||
self.model.bridge.data_drawer.save_follower_dict_to_disk()
|
||||
|
||||
roi_price = pair_dict[pair]['prediction' + entry_tag]
|
||||
roi_price = pair_dict[pair]["prediction" + entry_tag]
|
||||
roi_time = self.max_roi_time_long.value
|
||||
|
||||
roi_decay = roi_price * (1 - ((current_time - trade.open_date_utc).seconds) /
|
||||
(roi_time * 60))
|
||||
roi_decay = roi_price * (
|
||||
1 - ((current_time - trade.open_date_utc).seconds) / (roi_time * 60)
|
||||
)
|
||||
if roi_decay < 0:
|
||||
roi_decay = self.linear_roi_offset.value
|
||||
else:
|
||||
roi_decay += self.linear_roi_offset.value
|
||||
|
||||
if current_profit > roi_decay:
|
||||
return 'roi_custom_win'
|
||||
return "roi_custom_win"
|
||||
|
||||
if current_profit < -roi_decay:
|
||||
return 'roi_custom_loss'
|
||||
return "roi_custom_loss"
|
||||
|
||||
def confirm_trade_exit(
|
||||
self,
|
||||
|
@ -287,7 +289,7 @@ class FreqaiExampleStrategy(IStrategy):
|
|||
time_in_force: str,
|
||||
exit_reason: str,
|
||||
current_time,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
) -> bool:
|
||||
|
||||
entry_tag = trade.enter_tag
|
||||
|
@ -316,7 +318,7 @@ class FreqaiExampleStrategy(IStrategy):
|
|||
current_time,
|
||||
entry_tag,
|
||||
side: str,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
) -> bool:
|
||||
|
||||
df, _ = self.dp.get_analyzed_dataframe(pair, self.timeframe)
|
||||
|
|
Loading…
Reference in New Issue
Block a user