diff --git a/docs/developer.md b/docs/developer.md index 7f3dc76f6..cf6b5d2cd 100644 --- a/docs/developer.md +++ b/docs/developer.md @@ -112,7 +112,7 @@ pair = "XLM/BTC" # Make sure to use a pair that exists on that exchange! raw = ct.fetch_ohlcv(pair, timeframe=timeframe) # convert to dataframe -df1 = parse_ticker_dataframe(raw, timeframe, drop_incomplete=False) +df1 = parse_ticker_dataframe(raw, timeframe, pair=pair, drop_incomplete=False) print(df1["date"].tail(1)) print(datetime.utcnow()) diff --git a/freqtrade/data/converter.py b/freqtrade/data/converter.py index dc566070d..b530b3bce 100644 --- a/freqtrade/data/converter.py +++ b/freqtrade/data/converter.py @@ -10,13 +10,14 @@ from pandas import DataFrame, to_datetime logger = logging.getLogger(__name__) -def parse_ticker_dataframe(ticker: list, ticker_interval: str, *, +def parse_ticker_dataframe(ticker: list, ticker_interval: str, pair: str, *, fill_missing: bool = True, drop_incomplete: bool = True) -> DataFrame: """ Converts a ticker-list (format ccxt.fetch_ohlcv) to a Dataframe :param ticker: ticker list, as returned by exchange.async_get_candle_history :param ticker_interval: ticker_interval (e.g. 5m). Used to fill up eventual missing data + :param pair: Pair this data is for (used to warn if fillup was necessary) :param fill_missing: fill up missing candles with 0 candles (see ohlcv_fill_up_missing_data for details) :param drop_incomplete: Drop the last candle of the dataframe, assuming it's incomplete @@ -51,12 +52,12 @@ def parse_ticker_dataframe(ticker: list, ticker_interval: str, *, logger.debug('Dropping last candle') if fill_missing: - return ohlcv_fill_up_missing_data(frame, ticker_interval) + return ohlcv_fill_up_missing_data(frame, ticker_interval, pair) else: return frame -def ohlcv_fill_up_missing_data(dataframe: DataFrame, ticker_interval: str) -> DataFrame: +def ohlcv_fill_up_missing_data(dataframe: DataFrame, ticker_interval: str, pair: str) -> DataFrame: """ Fills up missing data with 0 volume rows, using the previous close as price for "open", "high" "low" and "close", volume is set to 0 @@ -84,7 +85,10 @@ def ohlcv_fill_up_missing_data(dataframe: DataFrame, ticker_interval: str) -> Da 'low': df['close'], }) df.reset_index(inplace=True) - logger.debug(f"Missing data fillup: before: {len(dataframe)} - after: {len(df)}") + len_before = len(dataframe) + len_after = len(df) + if len_before != len_after: + logger.info(f"Missing data fillup for {pair}: before: {len_before} - after: {len_after}") return df diff --git a/freqtrade/data/history.py b/freqtrade/data/history.py index e9694b90f..2a0d9b15e 100644 --- a/freqtrade/data/history.py +++ b/freqtrade/data/history.py @@ -116,7 +116,7 @@ def load_pair_history(pair: str, logger.warning('Missing data at end for pair %s, data ends at %s', pair, arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S')) - return parse_ticker_dataframe(pairdata, ticker_interval, + return parse_ticker_dataframe(pairdata, ticker_interval, pair=pair, fill_missing=fill_up_missing, drop_incomplete=drop_incomplete) else: @@ -286,12 +286,13 @@ def get_timeframe(data: Dict[str, DataFrame]) -> Tuple[arrow.Arrow, arrow.Arrow] max(timeframe, key=operator.itemgetter(1))[1] -def validate_backtest_data(data: Dict[str, DataFrame], min_date: datetime, +def validate_backtest_data(data: DataFrame, pair: str, min_date: datetime, max_date: datetime, ticker_interval_mins: int) -> bool: """ Validates preprocessed backtesting data for missing values and shows warnings about it that. - :param data: dictionary with preprocessed backtesting data + :param data: preprocessed backtesting data (as DataFrame) + :param pair: pair used for log output. :param min_date: start-date of the data :param max_date: end-date of the data :param ticker_interval_mins: ticker interval in minutes @@ -299,10 +300,9 @@ def validate_backtest_data(data: Dict[str, DataFrame], min_date: datetime, # total difference in minutes / interval-minutes expected_frames = int((max_date - min_date).total_seconds() // 60 // ticker_interval_mins) found_missing = False - for pair, df in data.items(): - dflen = len(df) - if dflen < expected_frames: - found_missing = True - logger.warning("%s has missing frames: expected %s, got %s, that's %s missing values", - pair, expected_frames, dflen, expected_frames - dflen) + dflen = len(data) + if dflen < expected_frames: + found_missing = True + logger.warning("%s has missing frames: expected %s, got %s, that's %s missing values", + pair, expected_frames, dflen, expected_frames - dflen) return found_missing diff --git a/freqtrade/exchange/exchange.py b/freqtrade/exchange/exchange.py index 194e1d883..a65294091 100644 --- a/freqtrade/exchange/exchange.py +++ b/freqtrade/exchange/exchange.py @@ -581,7 +581,7 @@ class Exchange(object): self._pairs_last_refresh_time[(pair, ticker_interval)] = ticks[-1][0] // 1000 # keeping parsed dataframe in cache self._klines[(pair, ticker_interval)] = parse_ticker_dataframe( - ticks, ticker_interval, fill_missing=True, + ticks, ticker_interval, pair=pair, fill_missing=True, drop_incomplete=self._ohlcv_partial_candle) return tickers diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index 8bdf66f92..923119591 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -349,7 +349,7 @@ class Backtesting(object): row = ticker[pair][indexes[pair]] except IndexError: # missing Data for one pair at the end. - # Warnings for this are shown by `validate_backtest_data` + # Warnings for this are shown during data loading continue # Waits until the time-counter reaches the start of the data for this pair. @@ -420,20 +420,19 @@ class Backtesting(object): max_open_trades = 0 all_results = {} + min_date, max_date = history.get_timeframe(data) + + logger.info( + 'Backtesting with data from %s up to %s (%s days)..', + min_date.isoformat(), + max_date.isoformat(), + (max_date - min_date).days + ) + for strat in self.strategylist: logger.info("Running backtesting for Strategy %s", strat.get_strategy_name()) self._set_strategy(strat) - min_date, max_date = history.get_timeframe(data) - # Validate dataframe for missing values (mainly at start and end, as fillup is called) - history.validate_backtest_data(data, min_date, max_date, - timeframe_to_minutes(self.ticker_interval)) - logger.info( - 'Backtesting with data from %s up to %s (%s days)..', - min_date.isoformat(), - max_date.isoformat(), - (max_date - min_date).days - ) # need to reprocess data every time to populate signals preprocessed = self.strategy.tickerdata_to_dataframe(data) diff --git a/freqtrade/optimize/hyperopt.py b/freqtrade/optimize/hyperopt.py index 28b9ce789..7fd9bf5d9 100644 --- a/freqtrade/optimize/hyperopt.py +++ b/freqtrade/optimize/hyperopt.py @@ -19,8 +19,7 @@ from skopt import Optimizer from skopt.space import Dimension from freqtrade.arguments import Arguments -from freqtrade.data.history import load_data, get_timeframe, validate_backtest_data -from freqtrade.exchange import timeframe_to_minutes +from freqtrade.data.history import load_data, get_timeframe from freqtrade.optimize.backtesting import Backtesting from freqtrade.resolvers.hyperopt_resolver import HyperOptResolver @@ -281,9 +280,7 @@ class Hyperopt(Backtesting): return min_date, max_date = get_timeframe(data) - # Validate dataframe for missing values (mainly at start and end, as fillup is called) - validate_backtest_data(data, min_date, max_date, - timeframe_to_minutes(self.ticker_interval)) + logger.info( 'Hyperopting with data from %s up to %s (%s days)..', min_date.isoformat(), diff --git a/freqtrade/tests/conftest.py b/freqtrade/tests/conftest.py index 5c35e866e..eb2a8600f 100644 --- a/freqtrade/tests/conftest.py +++ b/freqtrade/tests/conftest.py @@ -674,7 +674,7 @@ def ticker_history_list(): @pytest.fixture def ticker_history(ticker_history_list): - return parse_ticker_dataframe(ticker_history_list, "5m", fill_missing=True) + return parse_ticker_dataframe(ticker_history_list, "5m", pair="UNITTEST/BTC", fill_missing=True) @pytest.fixture @@ -879,7 +879,8 @@ def tickers(): @pytest.fixture def result(): with Path('freqtrade/tests/testdata/UNITTEST_BTC-1m.json').open('r') as data_file: - return parse_ticker_dataframe(json.load(data_file), '1m', fill_missing=True) + return parse_ticker_dataframe(json.load(data_file), '1m', pair="UNITTEST/BTC", + fill_missing=True) # FIX: # Create an fixture/function diff --git a/freqtrade/tests/data/test_converter.py b/freqtrade/tests/data/test_converter.py index 8a0761f1c..f68224e0e 100644 --- a/freqtrade/tests/data/test_converter.py +++ b/freqtrade/tests/data/test_converter.py @@ -15,7 +15,8 @@ def test_parse_ticker_dataframe(ticker_history_list, caplog): caplog.set_level(logging.DEBUG) # Test file with BV data - dataframe = parse_ticker_dataframe(ticker_history_list, '5m', fill_missing=True) + dataframe = parse_ticker_dataframe(ticker_history_list, '5m', + pair="UNITTEST/BTC", fill_missing=True) assert dataframe.columns.tolist() == columns assert log_has('Parsing tickerlist to dataframe', caplog.record_tuples) @@ -27,18 +28,19 @@ def test_ohlcv_fill_up_missing_data(caplog): pair='UNITTEST/BTC', fill_up_missing=False) caplog.set_level(logging.DEBUG) - data2 = ohlcv_fill_up_missing_data(data, '1m') + data2 = ohlcv_fill_up_missing_data(data, '1m', 'UNITTEST/BTC') assert len(data2) > len(data) # Column names should not change assert (data.columns == data2.columns).all() - assert log_has(f"Missing data fillup: before: {len(data)} - after: {len(data2)}", + assert log_has(f"Missing data fillup for UNITTEST/BTC: before: " + f"{len(data)} - after: {len(data2)}", caplog.record_tuples) # Test fillup actually fixes invalid backtest data min_date, max_date = get_timeframe({'UNITTEST/BTC': data}) - assert validate_backtest_data({'UNITTEST/BTC': data}, min_date, max_date, 1) - assert not validate_backtest_data({'UNITTEST/BTC': data2}, min_date, max_date, 1) + assert validate_backtest_data(data, 'UNITTEST/BTC', min_date, max_date, 1) + assert not validate_backtest_data(data2, 'UNITTEST/BTC', min_date, max_date, 1) def test_ohlcv_fill_up_missing_data2(caplog): @@ -78,10 +80,10 @@ def test_ohlcv_fill_up_missing_data2(caplog): ] # Generate test-data without filling missing - data = parse_ticker_dataframe(ticks, ticker_interval, fill_missing=False) + data = parse_ticker_dataframe(ticks, ticker_interval, pair="UNITTEST/BTC", fill_missing=False) assert len(data) == 3 caplog.set_level(logging.DEBUG) - data2 = ohlcv_fill_up_missing_data(data, ticker_interval) + data2 = ohlcv_fill_up_missing_data(data, ticker_interval, "UNITTEST/BTC") assert len(data2) == 4 # 3rd candle has been filled row = data2.loc[2, :] @@ -94,7 +96,8 @@ def test_ohlcv_fill_up_missing_data2(caplog): # Column names should not change assert (data.columns == data2.columns).all() - assert log_has(f"Missing data fillup: before: {len(data)} - after: {len(data2)}", + assert log_has(f"Missing data fillup for UNITTEST/BTC: before: " + f"{len(data)} - after: {len(data2)}", caplog.record_tuples) @@ -134,12 +137,14 @@ def test_ohlcv_drop_incomplete(caplog): ] ] caplog.set_level(logging.DEBUG) - data = parse_ticker_dataframe(ticks, ticker_interval, fill_missing=False, drop_incomplete=False) + data = parse_ticker_dataframe(ticks, ticker_interval, pair="UNITTEST/BTC", + fill_missing=False, drop_incomplete=False) assert len(data) == 4 assert not log_has("Dropping last candle", caplog.record_tuples) # Drop last candle - data = parse_ticker_dataframe(ticks, ticker_interval, fill_missing=False, drop_incomplete=True) + data = parse_ticker_dataframe(ticks, ticker_interval, pair="UNITTEST/BTC", + fill_missing=False, drop_incomplete=True) assert len(data) == 3 assert log_has("Dropping last candle", caplog.record_tuples) diff --git a/freqtrade/tests/data/test_history.py b/freqtrade/tests/data/test_history.py index a13bc34af..46bcf06c4 100644 --- a/freqtrade/tests/data/test_history.py +++ b/freqtrade/tests/data/test_history.py @@ -555,8 +555,8 @@ def test_validate_backtest_data_warn(default_conf, mocker, caplog) -> None: ) min_date, max_date = history.get_timeframe(data) caplog.clear() - assert history.validate_backtest_data(data, min_date, max_date, - timeframe_to_minutes('1m')) + assert history.validate_backtest_data(data['UNITTEST/BTC'], 'UNITTEST/BTC', + min_date, max_date, timeframe_to_minutes('1m')) assert len(caplog.record_tuples) == 1 assert log_has( "UNITTEST/BTC has missing frames: expected 14396, got 13680, that's 716 missing values", @@ -579,6 +579,6 @@ def test_validate_backtest_data(default_conf, mocker, caplog) -> None: min_date, max_date = history.get_timeframe(data) caplog.clear() - assert not history.validate_backtest_data(data, min_date, max_date, - timeframe_to_minutes('5m')) + assert not history.validate_backtest_data(data['UNITTEST/BTC'], 'UNITTEST/BTC', + min_date, max_date, timeframe_to_minutes('5m')) assert len(caplog.record_tuples) == 0 diff --git a/freqtrade/tests/edge/test_edge.py b/freqtrade/tests/edge/test_edge.py index a14e3282e..45b8e609e 100644 --- a/freqtrade/tests/edge/test_edge.py +++ b/freqtrade/tests/edge/test_edge.py @@ -263,7 +263,7 @@ def mocked_load_data(datadir, pairs=[], ticker_interval='0m', refresh_pairs=Fals hz = 0.1 base = 0.001 - ETHBTC = [ + NEOBTC = [ [ ticker_start_time.shift(minutes=(x * ticker_interval_in_minute)).timestamp * 1000, math.sin(x * hz) / 1000 + base, @@ -285,8 +285,8 @@ def mocked_load_data(datadir, pairs=[], ticker_interval='0m', refresh_pairs=Fals 123.45 ] for x in range(0, 500)] - pairdata = {'NEO/BTC': parse_ticker_dataframe(ETHBTC, '1h', fill_missing=True), - 'LTC/BTC': parse_ticker_dataframe(LTCBTC, '1h', fill_missing=True)} + pairdata = {'NEO/BTC': parse_ticker_dataframe(NEOBTC, '1h', pair="NEO/BTC", fill_missing=True), + 'LTC/BTC': parse_ticker_dataframe(LTCBTC, '1h', pair="LTC/BTC", fill_missing=True)} return pairdata diff --git a/freqtrade/tests/optimize/test_backtesting.py b/freqtrade/tests/optimize/test_backtesting.py index cf32934c7..28568f20c 100644 --- a/freqtrade/tests/optimize/test_backtesting.py +++ b/freqtrade/tests/optimize/test_backtesting.py @@ -73,7 +73,8 @@ def load_data_test(what): pair[x][5] # Keep old volume ] for x in range(0, datalen) ] - return {'UNITTEST/BTC': parse_ticker_dataframe(data, '1m', fill_missing=True)} + return {'UNITTEST/BTC': parse_ticker_dataframe(data, '1m', pair="UNITTEST/BTC", + fill_missing=True)} def simple_backtest(config, contour, num_results, mocker) -> None: @@ -102,7 +103,8 @@ def simple_backtest(config, contour, num_results, mocker) -> None: def mocked_load_data(datadir, pairs=[], ticker_interval='0m', refresh_pairs=False, timerange=None, exchange=None, live=False): tickerdata = history.load_tickerdata_file(datadir, 'UNITTEST/BTC', '1m', timerange=timerange) - pairdata = {'UNITTEST/BTC': parse_ticker_dataframe(tickerdata, '1m', fill_missing=True)} + pairdata = {'UNITTEST/BTC': parse_ticker_dataframe(tickerdata, '1m', pair="UNITTEST/BTC", + fill_missing=True)} return pairdata @@ -350,7 +352,8 @@ def test_tickerdata_to_dataframe_bt(default_conf, mocker) -> None: patch_exchange(mocker) timerange = TimeRange(None, 'line', 0, -100) tick = history.load_tickerdata_file(None, 'UNITTEST/BTC', '1m', timerange=timerange) - tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', fill_missing=True)} + tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', pair="UNITTEST/BTC", + fill_missing=True)} backtesting = Backtesting(default_conf) data = backtesting.strategy.tickerdata_to_dataframe(tickerlist) diff --git a/freqtrade/tests/optimize/test_hyperopt.py b/freqtrade/tests/optimize/test_hyperopt.py index c40baccbc..c3d6d0076 100644 --- a/freqtrade/tests/optimize/test_hyperopt.py +++ b/freqtrade/tests/optimize/test_hyperopt.py @@ -427,7 +427,8 @@ def test_has_space(hyperopt): def test_populate_indicators(hyperopt) -> None: tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m') - tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', fill_missing=True)} + tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', pair="UNITTEST/BTC", + fill_missing=True)} dataframes = hyperopt.strategy.tickerdata_to_dataframe(tickerlist) dataframe = hyperopt.custom_hyperopt.populate_indicators(dataframes['UNITTEST/BTC'], {'pair': 'UNITTEST/BTC'}) @@ -440,7 +441,8 @@ def test_populate_indicators(hyperopt) -> None: def test_buy_strategy_generator(hyperopt) -> None: tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m') - tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', fill_missing=True)} + tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', pair="UNITTEST/BTC", + fill_missing=True)} dataframes = hyperopt.strategy.tickerdata_to_dataframe(tickerlist) dataframe = hyperopt.custom_hyperopt.populate_indicators(dataframes['UNITTEST/BTC'], {'pair': 'UNITTEST/BTC'}) diff --git a/freqtrade/tests/strategy/test_default_strategy.py b/freqtrade/tests/strategy/test_default_strategy.py index be514f2d1..74c81882a 100644 --- a/freqtrade/tests/strategy/test_default_strategy.py +++ b/freqtrade/tests/strategy/test_default_strategy.py @@ -10,7 +10,8 @@ from freqtrade.strategy.default_strategy import DefaultStrategy @pytest.fixture def result(): with open('freqtrade/tests/testdata/ETH_BTC-1m.json') as data_file: - return parse_ticker_dataframe(json.load(data_file), '1m', fill_missing=True) + return parse_ticker_dataframe(json.load(data_file), '1m', pair="UNITTEST/BTC", + fill_missing=True) def test_default_strategy_structure(): diff --git a/freqtrade/tests/strategy/test_interface.py b/freqtrade/tests/strategy/test_interface.py index e384003dc..fe7fd2193 100644 --- a/freqtrade/tests/strategy/test_interface.py +++ b/freqtrade/tests/strategy/test_interface.py @@ -111,7 +111,8 @@ def test_tickerdata_to_dataframe(default_conf) -> None: timerange = TimeRange(None, 'line', 0, -100) tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m', timerange=timerange) - tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', fill_missing=True)} + tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, '1m', pair="UNITTEST/BTC", + fill_missing=True)} data = strategy.tickerdata_to_dataframe(tickerlist) assert len(data['UNITTEST/BTC']) == 102 # partial candle was removed diff --git a/freqtrade/tests/test_misc.py b/freqtrade/tests/test_misc.py index c7bcf7edf..7a7b15cf2 100644 --- a/freqtrade/tests/test_misc.py +++ b/freqtrade/tests/test_misc.py @@ -17,7 +17,8 @@ def test_shorten_date() -> None: def test_datesarray_to_datetimearray(ticker_history_list): - dataframes = parse_ticker_dataframe(ticker_history_list, "5m", fill_missing=True) + dataframes = parse_ticker_dataframe(ticker_history_list, "5m", pair="UNITTEST/BTC", + fill_missing=True) dates = datesarray_to_datetimearray(dataframes['date']) assert isinstance(dates[0], datetime.datetime) @@ -34,7 +35,8 @@ def test_datesarray_to_datetimearray(ticker_history_list): def test_common_datearray(default_conf) -> None: strategy = DefaultStrategy(default_conf) tick = load_tickerdata_file(None, 'UNITTEST/BTC', '1m') - tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, "1m", fill_missing=True)} + tickerlist = {'UNITTEST/BTC': parse_ticker_dataframe(tick, "1m", pair="UNITTEST/BTC", + fill_missing=True)} dataframes = strategy.tickerdata_to_dataframe(tickerlist) dates = common_datearray(dataframes)