Further enhance pair retrieval

This commit is contained in:
Matthias 2021-11-28 15:03:55 +01:00
parent 8d70672bee
commit 7faa7539b4
4 changed files with 30 additions and 3 deletions

View File

@ -33,7 +33,7 @@ class HDF5DataHandler(IDataHandler):
cls._OHLCV_REGEX, p.name
) for p in datadir.glob("*.h5")
]
return [(match[1].replace('_', '/'), match[2], match[3]) for match in _tmp
return [(cls.rebuild_pair_from_filename(match[1]), match[2], match[3]) for match in _tmp
if match and len(match.groups()) > 1]
@classmethod

View File

@ -4,6 +4,7 @@ It's subclasses handle and storing data from disk.
"""
import logging
import re
from abc import ABC, abstractclassmethod, abstractmethod
from copy import deepcopy
from datetime import datetime, timezone
@ -23,7 +24,7 @@ logger = logging.getLogger(__name__)
class IDataHandler(ABC):
_OHLCV_REGEX = r'^([a-zA-Z_]+)\-(\d+\S)\-?([a-zA-Z_]*)?(?=\.)'
_OHLCV_REGEX = r'^([a-zA-Z_-]+)\-(\d+\S)\-?([a-zA-Z_]*)?(?=\.)'
def __init__(self, datadir: Path) -> None:
self._datadir = datadir
@ -166,6 +167,16 @@ class IDataHandler(ABC):
"""
return trades_remove_duplicates(self._trades_load(pair, timerange=timerange))
@staticmethod
def rebuild_pair_from_filename(pair: str) -> str:
"""
Rebuild pair name from filename
Assumes a asset name of max. 7 length to also support BTC-PERP and BTC-PERP:USD names.
"""
res = re.sub(r'^(.{1,7})(_)', r'\g<1>/', pair, 1)
res = re.sub('_', ':', res, 1)
return res
def ohlcv_load(self, pair, timeframe: str,
timerange: Optional[TimeRange] = None,
fill_missing: bool = True,

View File

@ -33,7 +33,7 @@ class JsonDataHandler(IDataHandler):
re.search(
cls._OHLCV_REGEX, p.name
) for p in datadir.glob(f"*.{cls._get_file_extension()}")]
return [(match[1].replace('_', '/'), match[2], match[3]) for match in _tmp
return [(cls.rebuild_pair_from_filename(match[1]), match[2], match[3]) for match in _tmp
if match and len(match.groups()) > 1]
@classmethod

View File

@ -673,10 +673,12 @@ def test_datahandler_ohlcv_get_pairs(testdatadir):
@pytest.mark.parametrize('filename,pair,timeframe,candletype', [
('XMR_BTC-5m.json', 'XMR_BTC', '5m', ''),
('XMR_USDT-1h.h5', 'XMR_USDT', '1h', ''),
('BTC-PERP-1h.h5', 'BTC-PERP', '1h', ''),
('BTC_USDT-2h.jsongz', 'BTC_USDT', '2h', ''),
('BTC_USDT-2h-mark.jsongz', 'BTC_USDT', '2h', 'mark'),
('XMR_USDT-1h-mark.h5', 'XMR_USDT', '1h', 'mark'),
('XMR_USDT-1h-random.h5', 'XMR_USDT', '1h', 'random'),
('BTC-PERP-1h-index.h5', 'BTC-PERP', '1h', 'index'),
('XMR_USDT_USDT-1h-mark.h5', 'XMR_USDT_USDT', '1h', 'mark'),
])
def test_datahandler_ohlcv_regex(filename, pair, timeframe, candletype):
@ -689,6 +691,20 @@ def test_datahandler_ohlcv_regex(filename, pair, timeframe, candletype):
assert match[3] == candletype
@pytest.mark.parametrize('input,expected', [
('XMR_USDT', 'XMR/USDT'),
('BTC_USDT', 'BTC/USDT'),
('USDT_BUSD', 'USDT/BUSD'),
('BTC_USDT_USDT', 'BTC/USDT:USDT'), # Futures
('XRP_USDT_USDT', 'XRP/USDT:USDT'), # futures
('BTC-PERP', 'BTC-PERP'),
('BTC-PERP_USDT', 'BTC-PERP:USDT'), # potential FTX case
])
def test_rebuild_pair_from_filename(input, expected):
assert IDataHandler.rebuild_pair_from_filename(input) == expected
def test_datahandler_ohlcv_get_available_data(testdatadir):
paircombs = JsonDataHandler.ohlcv_get_available_data(testdatadir)
# Convert to set to avoid failures due to sorting