freqtrade_origin/freqtrade/data/history/jsondatahandler.py

import logging
import re
from pathlib import Path
from typing import List, Optional

import numpy as np
from pandas import DataFrame, read_json, to_datetime

from freqtrade import misc
from freqtrade.configuration import TimeRange
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, ListPairsWithTimeframes, TradeList
from freqtrade.data.converter import trades_dict_to_list

from .idatahandler import IDataHandler


logger = logging.getLogger(__name__)


class JsonDataHandler(IDataHandler):

    _use_zip = False
    _columns = DEFAULT_DATAFRAME_COLUMNS

    @classmethod
    def ohlcv_get_available_data(cls, datadir: Path) -> ListPairsWithTimeframes:
        """
        Returns a list of all pairs with ohlcv data available in this datadir
        :param datadir: Directory to search for ohlcv files
        :return: List of Tuples of (pair, timeframe)
        """
        _tmp = [re.search(r'^([a-zA-Z_]+)\-(\d+\S+)(?=.json)', p.name)
                for p in datadir.glob(f"*.{cls._get_file_extension()}")]
        return [(match[1].replace('_', '/'), match[2]) for match in _tmp
                if match and len(match.groups()) > 1]

    @classmethod
    def ohlcv_get_pairs(cls, datadir: Path, timeframe: str) -> List[str]:
        """
        Returns a list of all pairs with ohlcv data available in this datadir
        for the specified timeframe
        :param datadir: Directory to search for ohlcv files
        :param timeframe: Timeframe to search pairs for
        :return: List of Pairs
        """

        _tmp = [re.search(r'^(\S+)(?=\-' + timeframe + '.json)', p.name)
                for p in datadir.glob(f"*{timeframe}.{cls._get_file_extension()}")]
        # Check if regex found something and only return these results
        return [match[0].replace('_', '/') for match in _tmp if match]

    def ohlcv_store(self, pair: str, timeframe: str, data: DataFrame) -> None:
        """
        Store data in json format "values".
            format looks as follows:
            [[<date>,<open>,<high>,<low>,<close>]]
        :param pair: Pair - used to generate filename
        :param timeframe: Timeframe - used to generate filename
        :param data: Dataframe containing OHLCV data
        :return: None
        """
        filename = self._pair_data_filename(self._datadir, pair, timeframe)
        _data = data.copy()
        # Convert date to int
        _data['date'] = _data['date'].astype(np.int64) // 1000 // 1000

        # Reset index, select only appropriate columns and save as json
        _data.reset_index(drop=True).loc[:, self._columns].to_json(
            filename, orient="values",
            compression='gzip' if self._use_zip else None)

    def _ohlcv_load(self, pair: str, timeframe: str,
                    timerange: Optional[TimeRange] = None,
                    ) -> DataFrame:
        """
        Internal method used to load data for one pair from disk.
        Implements the loading and conversion to a Pandas dataframe.
        Timerange trimming and dataframe validation happens outside of this method.
        :param pair: Pair to load data
        :param timeframe: Timeframe (e.g. "5m")
        :param timerange: Limit data to be loaded to this timerange.
                        Optionally implemented by subclasses to avoid loading
                        all data where possible.
        :return: DataFrame with ohlcv data, or empty DataFrame
        """
        filename = self._pair_data_filename(self._datadir, pair, timeframe)
        if not filename.exists():
            return DataFrame(columns=self._columns)
        try:
            pairdata = read_json(filename, orient='values')
            pairdata.columns = self._columns
        except ValueError:
            logger.error(f"Could not load data for {pair}.")
            return DataFrame(columns=self._columns)
        pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',
                                          'low': 'float', 'close': 'float', 'volume': 'float'})
        pairdata['date'] = to_datetime(pairdata['date'],
                                       unit='ms',
                                       utc=True,
                                       infer_datetime_format=True)
        return pairdata

    def ohlcv_purge(self, pair: str, timeframe: str) -> bool:
        """
        Remove data for this pair
        :param pair: Delete data for this pair.
        :param timeframe: Timeframe (e.g. "5m")
        :return: True when deleted, false if file did not exist.
        """
        filename = self._pair_data_filename(self._datadir, pair, timeframe)
        if filename.exists():
            filename.unlink()
            return True
        return False

    def ohlcv_append(self, pair: str, timeframe: str, data: DataFrame) -> None:
        """
        Append data to existing data structures
        :param pair: Pair
        :param timeframe: Timeframe this ohlcv data is for
        :param data: Data to append.
        """
        raise NotImplementedError()

    @classmethod
    def trades_get_pairs(cls, datadir: Path) -> List[str]:
        """
        Returns a list of all pairs for which trade data is available in this
        :param datadir: Directory to search for ohlcv files
        :return: List of Pairs
        """
        _tmp = [re.search(r'^(\S+)(?=\-trades.json)', p.name)
                for p in datadir.glob(f"*trades.{cls._get_file_extension()}")]
        # Check if regex found something and only return these results to avoid exceptions.
        return [match[0].replace('_', '/') for match in _tmp if match]

    def trades_store(self, pair: str, data: TradeList) -> None:
        """
        Store trades data (list of Dicts) to file
        :param pair: Pair - used for filename
        :param data: List of Lists containing trade data,
                     column sequence as in DEFAULT_TRADES_COLUMNS
        """
        filename = self._pair_trades_filename(self._datadir, pair)
        misc.file_dump_json(filename, data, is_zip=self._use_zip)

    def trades_append(self, pair: str, data: TradeList):
        """
        Append data to existing files
        :param pair: Pair - used for filename
        :param data: List of Lists containing trade data,
                     column sequence as in DEFAULT_TRADES_COLUMNS
        """
        raise NotImplementedError()

    def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
        """
        Load a pair from file, either .json.gz or .json
        # TODO: respect timerange ...
        :param pair: Load trades for this pair
        :param timerange: Timerange to load trades for - currently not implemented
        :return: List of trades
        """
        filename = self._pair_trades_filename(self._datadir, pair)
        tradesdata = misc.file_load_json(filename)

        if not tradesdata:
            return []

        if isinstance(tradesdata[0], dict):
            # Convert trades dict to list
            logger.info("Old trades format detected - converting")
            tradesdata = trades_dict_to_list(tradesdata)
            pass
        return tradesdata

    def trades_purge(self, pair: str) -> bool:
        """
        Remove data for this pair
        :param pair: Delete data for this pair.
        :return: True when deleted, false if file did not exist.
        """
        filename = self._pair_trades_filename(self._datadir, pair)
        if filename.exists():
            filename.unlink()
            return True
        return False

    @classmethod
    def _pair_data_filename(cls, datadir: Path, pair: str, timeframe: str) -> Path:
        pair_s = misc.pair_to_filename(pair)
        filename = datadir.joinpath(f'{pair_s}-{timeframe}.{cls._get_file_extension()}')
        return filename

    @classmethod
    def _get_file_extension(cls):
        return "json.gz" if cls._use_zip else "json"

    @classmethod
    def _pair_trades_filename(cls, datadir: Path, pair: str) -> Path:
        pair_s = misc.pair_to_filename(pair)
        filename = datadir.joinpath(f'{pair_s}-trades.{cls._get_file_extension()}')
        return filename


class JsonGzDataHandler(JsonDataHandler):

    _use_zip = True
Use List of Lists instead of list of Dicts for trades data 2020-03-31 18:20:10 +00:00			`import logging`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`import re`
			`from pathlib import Path`
Use List of Lists instead of list of Dicts for trades data 2020-03-31 18:20:10 +00:00			`from typing import List, Optional`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00
Load and save using pandas internal function 2019-12-25 14:05:01 +00:00			`import numpy as np`
			`from pandas import DataFrame, read_json, to_datetime`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00
			`from freqtrade import misc`
			`from freqtrade.configuration import TimeRange`
Fix #3967, move TradeList type to constants 2020-11-21 09:52:15 +00:00			`from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, ListPairsWithTimeframes, TradeList`
fix convert to ohlcv 2020-03-31 18:46:42 +00:00			`from freqtrade.data.converter import trades_dict_to_list`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00
Fix #3967, move TradeList type to constants 2020-11-21 09:52:15 +00:00			`from .idatahandler import IDataHandler`
Use List of Lists instead of list of Dicts for trades data 2020-03-31 18:20:10 +00:00
Apply isort to freqtrade codebase 2020-09-28 17:39:41 +00:00
Use List of Lists instead of list of Dicts for trades data 2020-03-31 18:20:10 +00:00			`logger = logging.getLogger(__name__)`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00

			`class JsonDataHandler(IDataHandler):`

			`_use_zip = False`
Extract default dataframe columns to constant 2019-12-26 18:52:08 +00:00			`_columns = DEFAULT_DATAFRAME_COLUMNS`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00
Add ohlcv_get_available_data to find available data 2020-07-12 07:50:53 +00:00			`@classmethod`
			`def ohlcv_get_available_data(cls, datadir: Path) -> ListPairsWithTimeframes:`
			`"""`
			`Returns a list of all pairs with ohlcv data available in this datadir`
			`:param datadir: Directory to search for ohlcv files`
Update return type comment 2020-07-12 08:23:09 +00:00			`:return: List of Tuples of (pair, timeframe)`
Add ohlcv_get_available_data to find available data 2020-07-12 07:50:53 +00:00			`"""`
Add tests for ohlcv_get_available_data 2020-07-12 07:56:46 +00:00			`_tmp = [re.search(r'^([a-zA-Z_]+)\-(\d+\S+)(?=.json)', p.name)`
Add ohlcv_get_available_data to find available data 2020-07-12 07:50:53 +00:00			`for p in datadir.glob(f"*.{cls._get_file_extension()}")]`
Add tests for ohlcv_get_available_data 2020-07-12 07:56:46 +00:00			`return [(match[1].replace('_', '/'), match[2]) for match in _tmp`
			`if match and len(match.groups()) > 1]`
Add ohlcv_get_available_data to find available data 2020-07-12 07:50:53 +00:00
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`@classmethod`
			`def ohlcv_get_pairs(cls, datadir: Path, timeframe: str) -> List[str]:`
			`"""`
Improve some docstrings 2019-12-25 18:53:52 +00:00			`Returns a list of all pairs with ohlcv data available in this datadir`
			`for the specified timeframe`
			`:param datadir: Directory to search for ohlcv files`
			`:param timeframe: Timeframe to search pairs for`
			`:return: List of Pairs`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`"""`
Make data-finding safe 2019-12-25 09:21:30 +00:00
			`_tmp = [re.search(r'^(\S+)(?=\-' + timeframe + '.json)', p.name)`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`for p in datadir.glob(f"*{timeframe}.{cls._get_file_extension()}")]`
Make data-finding safe 2019-12-25 09:21:30 +00:00			`# Check if regex found something and only return these results`
Converting pairs from filename to pair corrected 2020-01-05 12:35:36 +00:00			`return [match[0].replace('_', '/') for match in _tmp if match]`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00
Change DataProvider interface to accept pair per method 2019-12-25 10:08:49 +00:00			`def ohlcv_store(self, pair: str, timeframe: str, data: DataFrame) -> None:`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`"""`
Load and save using pandas internal function 2019-12-25 14:05:01 +00:00			`Store data in json format "values".`
			`format looks as follows:`
			`[[<date>,<open>,<high>,<low>,<close>]]`
			`:param pair: Pair - used to generate filename`
unexpected docstring params 2021-06-25 17:13:31 +00:00			`:param timeframe: Timeframe - used to generate filename`
			`:param data: Dataframe containing OHLCV data`
Load and save using pandas internal function 2019-12-25 14:05:01 +00:00			`:return: None`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`"""`
Load and save using pandas internal function 2019-12-25 14:05:01 +00:00			`filename = self._pair_data_filename(self._datadir, pair, timeframe)`
			`_data = data.copy()`
			`# Convert date to int`
			`_data['date'] = _data['date'].astype(np.int64) // 1000 // 1000`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00
Load and save using pandas internal function 2019-12-25 14:05:01 +00:00			`# Reset index, select only appropriate columns and save as json`
			`_data.reset_index(drop=True).loc[:, self._columns].to_json(`
			`filename, orient="values",`
			`compression='gzip' if self._use_zip else None)`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00
Change DataProvider interface to accept pair per method 2019-12-25 10:08:49 +00:00			`def _ohlcv_load(self, pair: str, timeframe: str,`
			`timerange: Optional[TimeRange] = None,`
			`) -> DataFrame:`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`"""`
Trim dataframe, not tickerlist 2019-12-25 14:07:49 +00:00			`Internal method used to load data for one pair from disk.`
data conversion, not data conversation * we're not talking to the data yet ... 2020-01-05 08:55:02 +00:00			`Implements the loading and conversion to a Pandas dataframe.`
Move dataframe validation to abstract class 2019-12-26 08:56:42 +00:00			`Timerange trimming and dataframe validation happens outside of this method.`
Improve some docstrings 2019-12-25 18:53:52 +00:00			`:param pair: Pair to load data`
Do not use ticker where it's not a ticker 2020-03-08 10:35:31 +00:00			`:param timeframe: Timeframe (e.g. "5m")`
Move dataframe validation to abstract class 2019-12-26 08:56:42 +00:00			`:param timerange: Limit data to be loaded to this timerange.`
Move convert functions to convert module 2019-12-28 09:27:49 +00:00			`Optionally implemented by subclasses to avoid loading`
			`all data where possible.`
Trim dataframe, not tickerlist 2019-12-25 14:07:49 +00:00			`:return: DataFrame with ohlcv data, or empty DataFrame`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`"""`
Load and save using pandas internal function 2019-12-25 14:05:01 +00:00			`filename = self._pair_data_filename(self._datadir, pair, timeframe)`
Use exists instead of is_file 2019-12-27 10:08:47 +00:00			`if not filename.exists():`
Check if file exists before loading 2019-12-25 14:24:53 +00:00			`return DataFrame(columns=self._columns)`
Fix valueerror in case of empty array files 2021-01-31 18:49:14 +00:00			`try:`
			`pairdata = read_json(filename, orient='values')`
			`pairdata.columns = self._columns`
			`except ValueError:`
			`logger.error(f"Could not load data for {pair}.")`
			`return DataFrame(columns=self._columns)`
copy=False does not make the changes inline anyway, so not needed 2020-02-22 16:54:19 +00:00			`pairdata = pairdata.astype(dtype={'open': 'float', 'high': 'float',`
Fix #2948 2020-02-22 16:46:40 +00:00			`'low': 'float', 'close': 'float', 'volume': 'float'})`
Load and save using pandas internal function 2019-12-25 14:05:01 +00:00			`pairdata['date'] = to_datetime(pairdata['date'],`
			`unit='ms',`
			`utc=True,`
			`infer_datetime_format=True)`
Move dataframe validation to abstract class 2019-12-26 08:56:42 +00:00			`return pairdata`
Load and save using pandas internal function 2019-12-25 14:05:01 +00:00
Change to use ohlcv_purge 2019-12-26 09:22:38 +00:00			`def ohlcv_purge(self, pair: str, timeframe: str) -> bool:`
			`"""`
			`Remove data for this pair`
			`:param pair: Delete data for this pair.`
Do not use ticker where it's not a ticker 2020-03-08 10:35:31 +00:00			`:param timeframe: Timeframe (e.g. "5m")`
Change to use ohlcv_purge 2019-12-26 09:22:38 +00:00			`:return: True when deleted, false if file did not exist.`
			`"""`
			`filename = self._pair_data_filename(self._datadir, pair, timeframe)`
Add some tests for datahandler 2019-12-27 12:16:53 +00:00			`if filename.exists():`
Change to use ohlcv_purge 2019-12-26 09:22:38 +00:00			`filename.unlink()`
			`return True`
			`return False`

Load and save using pandas internal function 2019-12-25 14:05:01 +00:00			`def ohlcv_append(self, pair: str, timeframe: str, data: DataFrame) -> None:`
			`"""`
			`Append data to existing data structures`
			`:param pair: Pair`
			`:param timeframe: Timeframe this ohlcv data is for`
			`:param data: Data to append.`
			`"""`
			`raise NotImplementedError()`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00
			`@classmethod`
			`def trades_get_pairs(cls, datadir: Path) -> List[str]:`
			`"""`
Improve some docstrings 2019-12-25 18:53:52 +00:00			`Returns a list of all pairs for which trade data is available in this`
			`:param datadir: Directory to search for ohlcv files`
			`:return: List of Pairs`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`"""`
Make data-finding safe 2019-12-25 09:21:30 +00:00			`_tmp = [re.search(r'^(\S+)(?=\-trades.json)', p.name)`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`for p in datadir.glob(f"*trades.{cls._get_file_extension()}")]`
Make data-finding safe 2019-12-25 09:21:30 +00:00			`# Check if regex found something and only return these results to avoid exceptions.`
Converting pairs from filename to pair corrected 2020-01-05 12:35:36 +00:00			`return [match[0].replace('_', '/') for match in _tmp if match]`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00
Use List of Lists instead of list of Dicts for trades data 2020-03-31 18:20:10 +00:00			`def trades_store(self, pair: str, data: TradeList) -> None:`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`"""`
Improve some docstrings 2019-12-25 18:53:52 +00:00			`Store trades data (list of Dicts) to file`
			`:param pair: Pair - used for filename`
Use List of Lists instead of list of Dicts for trades data 2020-03-31 18:20:10 +00:00			`:param data: List of Lists containing trade data,`
			`column sequence as in DEFAULT_TRADES_COLUMNS`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`"""`
Change DataProvider interface to accept pair per method 2019-12-25 10:08:49 +00:00			`filename = self._pair_trades_filename(self._datadir, pair)`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`misc.file_dump_json(filename, data, is_zip=self._use_zip)`

Use List of Lists instead of list of Dicts for trades data 2020-03-31 18:20:10 +00:00			`def trades_append(self, pair: str, data: TradeList):`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`"""`
			`Append data to existing files`
Improve some docstrings 2019-12-25 18:53:52 +00:00			`:param pair: Pair - used for filename`
Use List of Lists instead of list of Dicts for trades data 2020-03-31 18:20:10 +00:00			`:param data: List of Lists containing trade data,`
			`column sequence as in DEFAULT_TRADES_COLUMNS`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`"""`
			`raise NotImplementedError()`

remove trade duplicates in datahandler 2020-04-01 05:58:39 +00:00			`def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`"""`
			`Load a pair from file, either .json.gz or .json`
Improve some docstrings 2019-12-25 18:53:52 +00:00			`# TODO: respect timerange ...`
			`:param pair: Load trades for this pair`
			`:param timerange: Timerange to load trades for - currently not implemented`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`:return: List of trades`
			`"""`
Change DataProvider interface to accept pair per method 2019-12-25 10:08:49 +00:00			`filename = self._pair_trades_filename(self._datadir, pair)`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`tradesdata = misc.file_load_json(filename)`
Use List of Lists instead of list of Dicts for trades data 2020-03-31 18:20:10 +00:00
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`if not tradesdata:`
			`return []`

Use List of Lists instead of list of Dicts for trades data 2020-03-31 18:20:10 +00:00			`if isinstance(tradesdata[0], dict):`
			`# Convert trades dict to list`
			`logger.info("Old trades format detected - converting")`
			`tradesdata = trades_dict_to_list(tradesdata)`
			`pass`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`return tradesdata`

Move dataframe trim to within jsondatahandler 2019-12-26 08:51:03 +00:00			`def trades_purge(self, pair: str) -> bool:`
			`"""`
			`Remove data for this pair`
			`:param pair: Delete data for this pair.`
			`:return: True when deleted, false if file did not exist.`
			`"""`
			`filename = self._pair_trades_filename(self._datadir, pair)`
Use exists instead of is_file 2019-12-27 10:08:47 +00:00			`if filename.exists():`
Move dataframe trim to within jsondatahandler 2019-12-26 08:51:03 +00:00			`filename.unlink()`
			`return True`
			`return False`

Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`@classmethod`
			`def _pair_data_filename(cls, datadir: Path, pair: str, timeframe: str) -> Path:`
Implement pair_to_filename to datahandler includes tests - taken from #2744 and modified to adapt to new structure 2020-01-05 09:36:08 +00:00			`pair_s = misc.pair_to_filename(pair)`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`filename = datadir.joinpath(f'{pair_s}-{timeframe}.{cls._get_file_extension()}')`
			`return filename`

			`@classmethod`
			`def _get_file_extension(cls):`
			`return "json.gz" if cls._use_zip else "json"`

			`@classmethod`
			`def _pair_trades_filename(cls, datadir: Path, pair: str) -> Path:`
Implement pair_to_filename to datahandler includes tests - taken from #2744 and modified to adapt to new structure 2020-01-05 09:36:08 +00:00			`pair_s = misc.pair_to_filename(pair)`
Implement first version of jsondatahandler 2019-12-23 13:56:48 +00:00			`filename = datadir.joinpath(f'{pair_s}-trades.{cls._get_file_extension()}')`
			`return filename`


			`class JsonGzDataHandler(JsonDataHandler):`

			`_use_zip = True`