Add support for download-data "until"

This commit is contained in:
Matthias 2022-04-30 15:28:01 +02:00
parent 4580127fa8
commit 11d447cd5a
4 changed files with 58 additions and 18 deletions

View File

@ -139,8 +139,9 @@ def _load_cached_data_for_updating(
timeframe: str,
timerange: Optional[TimeRange],
data_handler: IDataHandler,
candle_type: CandleType
) -> Tuple[DataFrame, Optional[int]]:
candle_type: CandleType,
prepend: bool = False,
) -> Tuple[DataFrame, Optional[int], Optional[int]]:
"""
Load cached data to download more data.
If timerange is passed in, checks whether data from an before the stored data will be
@ -150,9 +151,12 @@ def _load_cached_data_for_updating(
Note: Only used by download_pair_history().
"""
start = None
end = None
if timerange:
if timerange.starttype == 'date':
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
if timerange.stoptype == 'date':
end = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
# Intentionally don't pass timerange in - since we need to load the full dataset.
data = data_handler.ohlcv_load(pair, timeframe=timeframe,
@ -160,14 +164,18 @@ def _load_cached_data_for_updating(
drop_incomplete=True, warn_no_data=False,
candle_type=candle_type)
if not data.empty:
if start and start < data.iloc[0]['date']:
if not prepend and start and start < data.iloc[0]['date']:
# Earlier data than existing data requested, redownload all
data = DataFrame(columns=DEFAULT_DATAFRAME_COLUMNS)
else:
start = data.iloc[-1]['date']
if prepend:
end = data.iloc[0]['date']
else:
start = data.iloc[-1]['date']
start_ms = int(start.timestamp() * 1000) if start else None
return data, start_ms
end_ms = int(end.timestamp() * 1000) if end else None
return data, start_ms, end_ms
def _download_pair_history(pair: str, *,
@ -208,9 +216,12 @@ def _download_pair_history(pair: str, *,
f'candle type: {candle_type} and store in {datadir}.'
)
data, since_ms = _load_cached_data_for_updating(pair, timeframe, timerange,
data_handler=data_handler,
candle_type=candle_type)
data, since_ms, until_ms = _load_cached_data_for_updating(
pair, timeframe, timerange,
data_handler=data_handler,
candle_type=candle_type,
prepend=False)
# TODO: Prepend should come from a param
logger.debug("Current Start: %s",
f"{data.iloc[0]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
@ -225,6 +236,7 @@ def _download_pair_history(pair: str, *,
days=-new_pairs_days).int_timestamp * 1000,
is_new_pair=data.empty,
candle_type=candle_type,
until_ms=until_ms if until_ms else None
)
# TODO: Maybe move parsing to exchange class (?)
new_dataframe = ohlcv_to_dataframe(new_data, timeframe, pair,

View File

@ -95,6 +95,7 @@ class Binance(Exchange):
async def _async_get_historic_ohlcv(self, pair: str, timeframe: str,
since_ms: int, candle_type: CandleType,
is_new_pair: bool = False, raise_: bool = False,
until_ms: int = None
) -> Tuple[str, str, str, List]:
"""
Overwrite to introduce "fast new pair" functionality by detecting the pair's listing date
@ -115,7 +116,8 @@ class Binance(Exchange):
since_ms=since_ms,
is_new_pair=is_new_pair,
raise_=raise_,
candle_type=candle_type
candle_type=candle_type,
until_ms=until_ms,
)
def funding_fee_cutoff(self, open_date: datetime):

View File

@ -1645,7 +1645,8 @@ class Exchange:
def get_historic_ohlcv(self, pair: str, timeframe: str,
since_ms: int, candle_type: CandleType,
is_new_pair: bool = False) -> List:
is_new_pair: bool = False,
until_ms: int = None) -> List:
"""
Get candle history using asyncio and returns the list of candles.
Handles all async work for this.
@ -1653,13 +1654,14 @@ class Exchange:
:param pair: Pair to download
:param timeframe: Timeframe to get data for
:param since_ms: Timestamp in milliseconds to get history from
:param until_ms: Timestamp in milliseconds to get history up to
:param candle_type: '', mark, index, premiumIndex, or funding_rate
:return: List with candle (OHLCV) data
"""
pair, _, _, data = self.loop.run_until_complete(
self._async_get_historic_ohlcv(pair=pair, timeframe=timeframe,
since_ms=since_ms, is_new_pair=is_new_pair,
candle_type=candle_type))
since_ms=since_ms, until_ms=until_ms,
is_new_pair=is_new_pair, candle_type=candle_type))
logger.info(f"Downloaded data for {pair} with length {len(data)}.")
return data
@ -1680,6 +1682,7 @@ class Exchange:
async def _async_get_historic_ohlcv(self, pair: str, timeframe: str,
since_ms: int, candle_type: CandleType,
is_new_pair: bool = False, raise_: bool = False,
until_ms: int = None
) -> Tuple[str, str, str, List]:
"""
Download historic ohlcv
@ -1695,7 +1698,7 @@ class Exchange:
)
input_coroutines = [self._async_get_candle_history(
pair, timeframe, candle_type, since) for since in
range(since_ms, arrow.utcnow().int_timestamp * 1000, one_call)]
range(since_ms, until_ms or (arrow.utcnow().int_timestamp * 1000), one_call)]
data: List = []
# Chunk requests into batches of 100 to avoid overwelming ccxt Throttling

View File

@ -223,42 +223,65 @@ def test_load_cached_data_for_updating(mocker, testdatadir) -> None:
# timeframe starts earlier than the cached data
# should fully update data
timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0)
data, start_ts = _load_cached_data_for_updating(
data, start_ts, end_ts = _load_cached_data_for_updating(
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
assert data.empty
assert start_ts == test_data[0][0] - 1000
assert end_ts is None
# timeframe starts earlier than the cached data - prepending
timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0)
data, start_ts, end_ts = _load_cached_data_for_updating(
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT, True)
assert_frame_equal(data, test_data_df.iloc[:-1])
assert start_ts == test_data[0][0] - 1000
assert end_ts == test_data[0][0]
# timeframe starts in the center of the cached data
# should return the cached data w/o the last item
timerange = TimeRange('date', None, test_data[0][0] / 1000 + 1, 0)
data, start_ts = _load_cached_data_for_updating(
data, start_ts, end_ts = _load_cached_data_for_updating(
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
assert_frame_equal(data, test_data_df.iloc[:-1])
assert test_data[-2][0] <= start_ts < test_data[-1][0]
assert end_ts is None
# timeframe starts after the cached data
# should return the cached data w/o the last item
timerange = TimeRange('date', None, test_data[-1][0] / 1000 + 100, 0)
data, start_ts = _load_cached_data_for_updating(
data, start_ts, end_ts = _load_cached_data_for_updating(
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
assert_frame_equal(data, test_data_df.iloc[:-1])
assert test_data[-2][0] <= start_ts < test_data[-1][0]
assert end_ts is None
# no datafile exist
# should return timestamp start time
timerange = TimeRange('date', None, now_ts - 10000, 0)
data, start_ts = _load_cached_data_for_updating(
data, start_ts, end_ts = _load_cached_data_for_updating(
'NONEXIST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
assert data.empty
assert start_ts == (now_ts - 10000) * 1000
assert end_ts is None
# no datafile exist
# should return timestamp start and end time time
timerange = TimeRange('date', 'date', now_ts - 1000000, now_ts - 100000)
data, start_ts, end_ts = _load_cached_data_for_updating(
'NONEXIST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
assert data.empty
assert start_ts == (now_ts - 1000000) * 1000
assert end_ts == (now_ts - 100000) * 1000
# no datafile exist, no timeframe is set
# should return an empty array and None
data, start_ts = _load_cached_data_for_updating(
data, start_ts, end_ts = _load_cached_data_for_updating(
'NONEXIST/BTC', '1m', None, data_handler, CandleType.SPOT)
assert data.empty
assert start_ts is None
assert end_ts is None
@pytest.mark.parametrize('candle_type,subdir,file_tail', [