freqtrade_origin/freqtrade/data/converter/orderflow.py

"""
Functions to convert orderflow data from public_trades

"""
import logging
import time

import numpy as np
import pandas as pd
from pandas import DataFrame

from freqtrade.constants import DEFAULT_ORDERFLOW_COLUMNS, Config
from freqtrade.exchange.exchange_utils import timeframe_to_resample_freq


logger = logging.getLogger(__name__)


def _init_dataframe_with_trades_columns(dataframe: DataFrame):
    """
    Populates a dataframe with trades columns
    :param dataframe: Dataframe to populate
    """
    dataframe['trades'] = dataframe.apply(lambda _: [], axis=1)
    dataframe['orderflow'] = dataframe.apply(lambda _: {}, axis=1)
    dataframe['bid'] = np.nan
    dataframe['ask'] = np.nan
    dataframe['delta'] = np.nan
    dataframe['min_delta'] = np.nan
    dataframe['max_delta'] = np.nan
    dataframe['total_trades'] = np.nan
    dataframe['stacked_imbalances_bid'] = np.nan
    dataframe['stacked_imbalances_ask'] = np.nan


def _convert_timeframe_to_pandas_frequency(timeframe: str):
    # convert timeframe to format usable by pandas
    from freqtrade.exchange import timeframe_to_minutes
    timeframe_minutes = timeframe_to_minutes(timeframe)
    timeframe_frequency = f'{timeframe_minutes}min'
    return (timeframe_frequency, timeframe_minutes)


def _calculate_ohlcv_candle_start_and_end(df: DataFrame, timeframe: str):
    _, timeframe_minutes = _convert_timeframe_to_pandas_frequency(
        timeframe)

    timeframe_frequency = timeframe_to_resample_freq(timeframe)
    # calculate ohlcv candle start and end
    if df is not None and not df.empty:
        df['datetime'] = pd.to_datetime(df['date'], unit='ms')
        df['candle_start'] = df['datetime'].dt.floor(timeframe_frequency)
        # used in _now_is_time_to_refresh_trades
        df['candle_end'] = df['candle_start'] + \
            pd.Timedelta(minutes=timeframe_minutes)
        df.drop(columns=['datetime'], inplace=True)


def populate_dataframe_with_trades(config: Config,
                                   dataframe: DataFrame,
                                   trades: DataFrame) -> DataFrame:
    """
    Populates a dataframe with trades
    :param dataframe: Dataframe to populate
    :param trades: Trades to populate with
    :return: Dataframe with trades populated
    """
    config_orderflow = config['orderflow']
    timeframe = config['timeframe']

    # create columns for trades
    _init_dataframe_with_trades_columns(dataframe)
    df = dataframe.copy()

    try:
        start_time = time.time()
        # calculate ohlcv candle start and end
        # TODO: check if call is necessary for df.
        _calculate_ohlcv_candle_start_and_end(df, timeframe)
        _calculate_ohlcv_candle_start_and_end(trades, timeframe)

        # slice of trades that are before current ohlcv candles to make groupby faster
        # TODO: maybe use df.date instead of df.candle_start at comparision below
        trades = trades.loc[trades.candle_start >= df.candle_start[0]]
        trades.reset_index(inplace=True, drop=True)

        # group trades by candle start
        trades_grouped_by_candle_start = trades.groupby(
            'candle_start', group_keys=False)

        for candle_start in trades_grouped_by_candle_start.groups:
            trades_grouped_df = trades[candle_start == trades['candle_start']]
            is_between = (candle_start == df['candle_start'])
            if np.any(is_between == True):  # noqa: E712
                (_, timeframe_minutes) = _convert_timeframe_to_pandas_frequency(timeframe)
                candle_next = candle_start + \
                    pd.Timedelta(minutes=timeframe_minutes)
                # skip if there are no trades at next candle
                # because that this candle isn't finished yet
                if candle_next not in trades_grouped_by_candle_start.groups:
                    logger.warning(
                        f"candle at {candle_start} with {len(trades_grouped_df)} trades "
                        f"might be unfinished, because no finished trades at {candle_next}")

                # add trades to each candle
                df.loc[is_between, 'trades'] = df.loc[is_between,
                                                      'trades'].apply(lambda _: trades_grouped_df)
                # calculate orderflow for each candle
                df.loc[is_between, 'orderflow'] = df.loc[is_between, 'orderflow'].apply(
                    lambda _: trades_to_volumeprofile_with_total_delta_bid_ask(
                        pd.DataFrame(trades_grouped_df),
                        scale=config_orderflow['scale']))
                # calculate imbalances for each candle's orderflow
                df.loc[is_between, 'imbalances'] = df.loc[is_between, 'orderflow'].apply(
                    lambda x: trades_orderflow_to_imbalances(
                        x,
                        imbalance_ratio=config_orderflow['imbalance_ratio'],
                        imbalance_volume=config_orderflow['imbalance_volume']))

                _stacked_imb = config_orderflow['stacked_imbalance_range']
                df.loc[is_between, 'stacked_imbalances_bid'] = df.loc[
                    is_between, 'imbalances'].apply(lambda x: stacked_imbalance_bid(
                        x,
                        stacked_imbalance_range=_stacked_imb))
                df.loc[is_between, 'stacked_imbalances_ask'] = df.loc[
                    is_between, 'imbalances'].apply(
                        lambda x: stacked_imbalance_ask(x, stacked_imbalance_range=_stacked_imb))

                # TODO: maybe use simple np.where instead
                buy = df.loc[is_between, 'bid'].apply(lambda _: np.where(
                    trades_grouped_df['side'].str.contains('buy'), 0, trades_grouped_df['amount']))
                sell = df.loc[is_between, 'ask'].apply(lambda _: np.where(
                    trades_grouped_df['side'].str.contains('sell'), 0, trades_grouped_df['amount']))
                deltas_per_trade = sell - buy
                min_delta = 0
                max_delta = 0
                delta = 0
                for deltas in deltas_per_trade:
                    for d in deltas:
                        delta += d
                        if delta > max_delta:
                            max_delta = delta
                        if delta < min_delta:
                            min_delta = delta
                df.loc[is_between, 'max_delta'] = max_delta
                df.loc[is_between, 'min_delta'] = min_delta

                df.loc[is_between, 'bid'] = np.where(trades_grouped_df['side'].str.contains(
                    'buy'), 0, trades_grouped_df['amount']).sum()
                df.loc[is_between, 'ask'] = np.where(trades_grouped_df['side'].str.contains(
                    'sell'), 0, trades_grouped_df['amount']).sum()
                df.loc[is_between, 'delta'] = df.loc[is_between,
                                                     'ask'] - df.loc[is_between, 'bid']
                min_delta = np.min(deltas_per_trade)
                max_delta = np.max(deltas_per_trade)

                df.loc[is_between, 'total_trades'] = len(trades_grouped_df)
                # copy to avoid memory leaks
                dataframe.loc[is_between] = df.loc[is_between].copy()
            else:
                logger.debug(
                    f"Found NO candles for trades starting with {candle_start}")
        logger.debug(
            f"trades.groups_keys in {time.time() - start_time} seconds")

        logger.debug(
            f"trades.singleton_iterate in {time.time() - start_time} seconds")

    except Exception as e:
        logger.exception("Error populating dataframe with trades:", e)

    return dataframe


def trades_to_volumeprofile_with_total_delta_bid_ask(trades: DataFrame, scale: float):
    """
    :param trades: dataframe
    :param scale: scale aka bin size e.g. 0.5
    :return: trades binned to levels according to scale aka orderflow
    """
    df = pd.DataFrame([], columns=DEFAULT_ORDERFLOW_COLUMNS)
    # create bid, ask where side is sell or buy
    df['bid_amount'] = np.where(
        trades['side'].str.contains('buy'), 0, trades['amount'])
    df['ask_amount'] = np.where(
        trades['side'].str.contains('sell'), 0, trades['amount'])
    df['bid'] = np.where(trades['side'].str.contains('buy'), 0, 1)
    df['ask'] = np.where(trades['side'].str.contains('sell'), 0, 1)

    # round the prices to the nearest multiple of the scale
    df['price'] = ((trades['price'] / scale).round()
                   * scale).astype('float64').values
    if df.empty:
        df['total'] = np.nan
        df['delta'] = np.nan
        return df

    df['delta'] = df['ask_amount'] - df['bid_amount']
    df['total_volume'] = df['ask_amount'] + df['bid_amount']
    df['total_trades'] = df['ask'] + df['bid']

    # group to bins aka apply scale
    df = df.groupby('price').sum(numeric_only=True)
    return df


def trades_orderflow_to_imbalances(df: DataFrame, imbalance_ratio: int, imbalance_volume: int):
    """
    :param df: dataframes with bid and ask
    :param imbalance_ratio: imbalance_ratio e.g. 300
    :param imbalance_volume: imbalance volume e.g. 3)
    :return: dataframe with bid and ask imbalance
    """
    bid = df.bid
    ask = df.ask.shift(-1)
    bid_imbalance = (bid / ask) > (imbalance_ratio / 100)
    # overwrite bid_imbalance with False if volume is not big enough
    bid_imbalance_filtered = np.where(
        df.total_volume < imbalance_volume, False, bid_imbalance)
    ask_imbalance = (ask / bid) > (imbalance_ratio / 100)
    # overwrite ask_imbalance with False if volume is not big enough
    ask_imbalance_filtered = np.where(
        df.total_volume < imbalance_volume, False, ask_imbalance)
    dataframe = DataFrame({
        "bid_imbalance": bid_imbalance_filtered,
        "ask_imbalance": ask_imbalance_filtered
    }, index=df.index,
    )

    return dataframe


def stacked_imbalance(df: DataFrame,
                      label: str,
                      stacked_imbalance_range: int,
                      should_reverse: bool):
    """
    y * (y.groupby((y != y.shift()).cumsum()).cumcount() + 1)
    https://stackoverflow.com/questions/27626542/counting-consecutive-positive-values-in-python-pandas-array
    """
    imbalance = df[f'{label}_imbalance']
    int_series = pd.Series(np.where(imbalance, 1, 0))
    stacked = (
        int_series * (
            int_series.groupby(
                (int_series != int_series.shift()).cumsum()).cumcount() + 1
        )
    )

    max_stacked_imbalance_idx = stacked.index[stacked >=
                                              stacked_imbalance_range]
    stacked_imbalance_price = np.nan
    if not max_stacked_imbalance_idx.empty:
        idx = max_stacked_imbalance_idx[0] if not should_reverse else np.flipud(
            max_stacked_imbalance_idx)[0]
        stacked_imbalance_price = imbalance.index[idx]
    return stacked_imbalance_price


def stacked_imbalance_bid(df: DataFrame, stacked_imbalance_range: int):
    return stacked_imbalance(df, 'bid', stacked_imbalance_range, should_reverse=False)


def stacked_imbalance_ask(df: DataFrame, stacked_imbalance_range: int):
    return stacked_imbalance(df, 'ask', stacked_imbalance_range, should_reverse=True)


def orderflow_to_volume_profile(df: DataFrame):
    """
    :param orderflow: dataframe
    :return: volume profile dataframe
    """
    bid = df.groupby('level').bid.sum()
    ask = df.groupby('level').ask.sum()
    df.groupby('level')['level'].sum()
    delta = df.groupby('level').ask.sum() - df.groupby('level').bid.sum()
    df = pd.DataFrame({'bid': bid, 'ask': ask, 'delta': delta})
    return df
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`"""`
			`Functions to convert orderflow data from public_trades`

			`"""`
			`import logging`
			`import time`

			`import numpy as np`
			`import pandas as pd`
			`from pandas import DataFrame`

			`from freqtrade.constants import DEFAULT_ORDERFLOW_COLUMNS, Config`
			`from freqtrade.exchange.exchange_utils import timeframe_to_resample_freq`


			`logger = logging.getLogger(__name__)`


			`def _init_dataframe_with_trades_columns(dataframe: DataFrame):`
			`"""`
			`Populates a dataframe with trades columns`
			`:param dataframe: Dataframe to populate`
			`"""`
			`dataframe['trades'] = dataframe.apply(lambda _: [], axis=1)`
			`dataframe['orderflow'] = dataframe.apply(lambda _: {}, axis=1)`
			`dataframe['bid'] = np.nan`
			`dataframe['ask'] = np.nan`
			`dataframe['delta'] = np.nan`
			`dataframe['min_delta'] = np.nan`
			`dataframe['max_delta'] = np.nan`
			`dataframe['total_trades'] = np.nan`
			`dataframe['stacked_imbalances_bid'] = np.nan`
			`dataframe['stacked_imbalances_ask'] = np.nan`


			`def _convert_timeframe_to_pandas_frequency(timeframe: str):`
			`# convert timeframe to format usable by pandas`
			`from freqtrade.exchange import timeframe_to_minutes`
			`timeframe_minutes = timeframe_to_minutes(timeframe)`
			`timeframe_frequency = f'{timeframe_minutes}min'`
			`return (timeframe_frequency, timeframe_minutes)`


			`def _calculate_ohlcv_candle_start_and_end(df: DataFrame, timeframe: str):`
			`_, timeframe_minutes = _convert_timeframe_to_pandas_frequency(`
			`timeframe)`

			`timeframe_frequency = timeframe_to_resample_freq(timeframe)`
			`# calculate ohlcv candle start and end`
			`if df is not None and not df.empty:`
			`df['datetime'] = pd.to_datetime(df['date'], unit='ms')`
			`df['candle_start'] = df['datetime'].dt.floor(timeframe_frequency)`
			`# used in _now_is_time_to_refresh_trades`
			`df['candle_end'] = df['candle_start'] + \`
			`pd.Timedelta(minutes=timeframe_minutes)`
			`df.drop(columns=['datetime'], inplace=True)`


			`def populate_dataframe_with_trades(config: Config,`
			`dataframe: DataFrame,`
chore: remove unused parameter `pairs` from `populate_dataframe_with_trades` 2024-02-26 10:55:59 +00:00			`trades: DataFrame) -> DataFrame:`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`"""`
			`Populates a dataframe with trades`
			`:param dataframe: Dataframe to populate`
			`:param trades: Trades to populate with`
			`:return: Dataframe with trades populated`
			`"""`
			`config_orderflow = config['orderflow']`
			`timeframe = config['timeframe']`

			`# create columns for trades`
			`_init_dataframe_with_trades_columns(dataframe)`
			`df = dataframe.copy()`

			`try:`
			`start_time = time.time()`
			`# calculate ohlcv candle start and end`
			`# TODO: check if call is necessary for df.`
			`_calculate_ohlcv_candle_start_and_end(df, timeframe)`
			`_calculate_ohlcv_candle_start_and_end(trades, timeframe)`

			`# slice of trades that are before current ohlcv candles to make groupby faster`
			`# TODO: maybe use df.date instead of df.candle_start at comparision below`
			`trades = trades.loc[trades.candle_start >= df.candle_start[0]]`
			`trades.reset_index(inplace=True, drop=True)`

			`# group trades by candle start`
			`trades_grouped_by_candle_start = trades.groupby(`
			`'candle_start', group_keys=False)`

			`for candle_start in trades_grouped_by_candle_start.groups:`
			`trades_grouped_df = trades[candle_start == trades['candle_start']]`
			`is_between = (candle_start == df['candle_start'])`
			`if np.any(is_between == True): # noqa: E712`
			`(_, timeframe_minutes) = _convert_timeframe_to_pandas_frequency(timeframe)`
			`candle_next = candle_start + \`
			`pd.Timedelta(minutes=timeframe_minutes)`
			`# skip if there are no trades at next candle`
			`# because that this candle isn't finished yet`
			`if candle_next not in trades_grouped_by_candle_start.groups:`
			`logger.warning(`
			`f"candle at {candle_start} with {len(trades_grouped_df)} trades "`
			`f"might be unfinished, because no finished trades at {candle_next}")`

			`# add trades to each candle`
			`df.loc[is_between, 'trades'] = df.loc[is_between,`
			`'trades'].apply(lambda _: trades_grouped_df)`
			`# calculate orderflow for each candle`
			`df.loc[is_between, 'orderflow'] = df.loc[is_between, 'orderflow'].apply(`
			`lambda _: trades_to_volumeprofile_with_total_delta_bid_ask(`
			`pd.DataFrame(trades_grouped_df),`
			`scale=config_orderflow['scale']))`
			`# calculate imbalances for each candle's orderflow`
			`df.loc[is_between, 'imbalances'] = df.loc[is_between, 'orderflow'].apply(`
			`lambda x: trades_orderflow_to_imbalances(`
			`x,`
			`imbalance_ratio=config_orderflow['imbalance_ratio'],`
			`imbalance_volume=config_orderflow['imbalance_volume']))`

			`_stacked_imb = config_orderflow['stacked_imbalance_range']`
			`df.loc[is_between, 'stacked_imbalances_bid'] = df.loc[`
			`is_between, 'imbalances'].apply(lambda x: stacked_imbalance_bid(`
			`x,`
			`stacked_imbalance_range=_stacked_imb))`
			`df.loc[is_between, 'stacked_imbalances_ask'] = df.loc[`
			`is_between, 'imbalances'].apply(`
			`lambda x: stacked_imbalance_ask(x, stacked_imbalance_range=_stacked_imb))`

			`# TODO: maybe use simple np.where instead`
			`buy = df.loc[is_between, 'bid'].apply(lambda _: np.where(`
			`trades_grouped_df['side'].str.contains('buy'), 0, trades_grouped_df['amount']))`
			`sell = df.loc[is_between, 'ask'].apply(lambda _: np.where(`
			`trades_grouped_df['side'].str.contains('sell'), 0, trades_grouped_df['amount']))`
			`deltas_per_trade = sell - buy`
			`min_delta = 0`
			`max_delta = 0`
			`delta = 0`
			`for deltas in deltas_per_trade:`
			`for d in deltas:`
			`delta += d`
			`if delta > max_delta:`
			`max_delta = delta`
			`if delta < min_delta:`
			`min_delta = delta`
			`df.loc[is_between, 'max_delta'] = max_delta`
			`df.loc[is_between, 'min_delta'] = min_delta`

			`df.loc[is_between, 'bid'] = np.where(trades_grouped_df['side'].str.contains(`
			`'buy'), 0, trades_grouped_df['amount']).sum()`
			`df.loc[is_between, 'ask'] = np.where(trades_grouped_df['side'].str.contains(`
			`'sell'), 0, trades_grouped_df['amount']).sum()`
			`df.loc[is_between, 'delta'] = df.loc[is_between,`
			`'ask'] - df.loc[is_between, 'bid']`
			`min_delta = np.min(deltas_per_trade)`
			`max_delta = np.max(deltas_per_trade)`

			`df.loc[is_between, 'total_trades'] = len(trades_grouped_df)`
			`# copy to avoid memory leaks`
			`dataframe.loc[is_between] = df.loc[is_between].copy()`
			`else:`
			`logger.debug(`
			`f"Found NO candles for trades starting with {candle_start}")`
			`logger.debug(`
			`f"trades.groups_keys in {time.time() - start_time} seconds")`

			`logger.debug(`
			`f"trades.singleton_iterate in {time.time() - start_time} seconds")`

			`except Exception as e:`
			`logger.exception("Error populating dataframe with trades:", e)`

			`return dataframe`


			`def trades_to_volumeprofile_with_total_delta_bid_ask(trades: DataFrame, scale: float):`
			`"""`
			`:param trades: dataframe`
			`:param scale: scale aka bin size e.g. 0.5`
			`:return: trades binned to levels according to scale aka orderflow`
			`"""`
			`df = pd.DataFrame([], columns=DEFAULT_ORDERFLOW_COLUMNS)`
			`# create bid, ask where side is sell or buy`
			`df['bid_amount'] = np.where(`
			`trades['side'].str.contains('buy'), 0, trades['amount'])`
			`df['ask_amount'] = np.where(`
			`trades['side'].str.contains('sell'), 0, trades['amount'])`
			`df['bid'] = np.where(trades['side'].str.contains('buy'), 0, 1)`
			`df['ask'] = np.where(trades['side'].str.contains('sell'), 0, 1)`

			`# round the prices to the nearest multiple of the scale`
			`df['price'] = ((trades['price'] / scale).round()`
			`* scale).astype('float64').values`
			`if df.empty:`
			`df['total'] = np.nan`
			`df['delta'] = np.nan`
			`return df`

			`df['delta'] = df['ask_amount'] - df['bid_amount']`
			`df['total_volume'] = df['ask_amount'] + df['bid_amount']`
			`df['total_trades'] = df['ask'] + df['bid']`

			`# group to bins aka apply scale`
			`df = df.groupby('price').sum(numeric_only=True)`
			`return df`


			`def trades_orderflow_to_imbalances(df: DataFrame, imbalance_ratio: int, imbalance_volume: int):`
			`"""`
			`:param df: dataframes with bid and ask`
			`:param imbalance_ratio: imbalance_ratio e.g. 300`
			`:param imbalance_volume: imbalance volume e.g. 3)`
			`:return: dataframe with bid and ask imbalance`
			`"""`
			`bid = df.bid`
			`ask = df.ask.shift(-1)`
			`bid_imbalance = (bid / ask) > (imbalance_ratio / 100)`
			`# overwrite bid_imbalance with False if volume is not big enough`
			`bid_imbalance_filtered = np.where(`
			`df.total_volume < imbalance_volume, False, bid_imbalance)`
			`ask_imbalance = (ask / bid) > (imbalance_ratio / 100)`
			`# overwrite ask_imbalance with False if volume is not big enough`
			`ask_imbalance_filtered = np.where(`
			`df.total_volume < imbalance_volume, False, ask_imbalance)`
			`dataframe = DataFrame({`
			`"bid_imbalance": bid_imbalance_filtered,`
			`"ask_imbalance": ask_imbalance_filtered`
			`}, index=df.index,`
			`)`

			`return dataframe`


			`def stacked_imbalance(df: DataFrame,`
			`label: str,`
			`stacked_imbalance_range: int,`
			`should_reverse: bool):`
			`"""`
			`y * (y.groupby((y != y.shift()).cumsum()).cumcount() + 1)`
			`https://stackoverflow.com/questions/27626542/counting-consecutive-positive-values-in-python-pandas-array`
			`"""`
			`imbalance = df[f'{label}_imbalance']`
			`int_series = pd.Series(np.where(imbalance, 1, 0))`
			`stacked = (`
			`int_series * (`
			`int_series.groupby(`
			`(int_series != int_series.shift()).cumsum()).cumcount() + 1`
			`)`
			`)`

			`max_stacked_imbalance_idx = stacked.index[stacked >=`
			`stacked_imbalance_range]`
			`stacked_imbalance_price = np.nan`
			`if not max_stacked_imbalance_idx.empty:`
			`idx = max_stacked_imbalance_idx[0] if not should_reverse else np.flipud(`
			`max_stacked_imbalance_idx)[0]`
			`stacked_imbalance_price = imbalance.index[idx]`
			`return stacked_imbalance_price`


			`def stacked_imbalance_bid(df: DataFrame, stacked_imbalance_range: int):`
			`return stacked_imbalance(df, 'bid', stacked_imbalance_range, should_reverse=False)`


			`def stacked_imbalance_ask(df: DataFrame, stacked_imbalance_range: int):`
			`return stacked_imbalance(df, 'ask', stacked_imbalance_range, should_reverse=True)`


			`def orderflow_to_volume_profile(df: DataFrame):`
			`"""`
			`:param orderflow: dataframe`
			`:return: volume profile dataframe`
			`"""`
			`bid = df.groupby('level').bid.sum()`
			`ask = df.groupby('level').ask.sum()`
			`df.groupby('level')['level'].sum()`
			`delta = df.groupby('level').ask.sum() - df.groupby('level').bid.sum()`
			`df = pd.DataFrame({'bid': bid, 'ask': ask, 'delta': delta})`
			`return df`