freqtrade_origin/freqtrade/data/converter/orderflow.py

"""
Functions to convert orderflow data from public_trades
"""

import logging
import time
import typing
from collections import OrderedDict
from datetime import datetime
from typing import Tuple

import numpy as np
import pandas as pd

from freqtrade.constants import DEFAULT_ORDERFLOW_COLUMNS
from freqtrade.enums import RunMode
from freqtrade.exceptions import DependencyException


logger = logging.getLogger(__name__)


def _init_dataframe_with_trades_columns(dataframe: pd.DataFrame):
    """
    Populates a dataframe with trades columns
    :param dataframe: Dataframe to populate
    """
    # Initialize columns with appropriate dtypes
    dataframe["trades"] = np.nan
    dataframe["orderflow"] = np.nan
    dataframe["imbalances"] = np.nan
    dataframe["stacked_imbalances_bid"] = np.nan
    dataframe["stacked_imbalances_ask"] = np.nan
    dataframe["max_delta"] = np.nan
    dataframe["min_delta"] = np.nan
    dataframe["bid"] = np.nan
    dataframe["ask"] = np.nan
    dataframe["delta"] = np.nan
    dataframe["total_trades"] = np.nan

    # Ensure the 'trades' column is of object type
    dataframe["trades"] = dataframe["trades"].astype(object)
    dataframe["orderflow"] = dataframe["orderflow"].astype(object)
    dataframe["imbalances"] = dataframe["imbalances"].astype(object)
    dataframe["stacked_imbalances_bid"] = dataframe["stacked_imbalances_bid"].astype(object)
    dataframe["stacked_imbalances_ask"] = dataframe["stacked_imbalances_ask"].astype(object)


def _calculate_ohlcv_candle_start_and_end(df: pd.DataFrame, timeframe: str):
    from freqtrade.exchange import timeframe_to_next_date, timeframe_to_resample_freq

    timeframe_frequency = timeframe_to_resample_freq(timeframe)
    # calculate ohlcv candle start and end
    if df is not None and not df.empty:
        df["datetime"] = pd.to_datetime(df["date"], unit="ms")
        df["candle_start"] = df["datetime"].dt.floor(timeframe_frequency)
        # used in _now_is_time_to_refresh_trades
        df["candle_end"] = df["candle_start"].apply(
            lambda candle_start: timeframe_to_next_date(timeframe, candle_start)
        )
        df.drop(columns=["datetime"], inplace=True)


def populate_dataframe_with_trades(
    cached_grouped_trades: OrderedDict[Tuple[datetime, datetime], pd.DataFrame],
    config,
    dataframe: pd.DataFrame,
    trades: pd.DataFrame,
) -> Tuple[pd.DataFrame, OrderedDict[Tuple[datetime, datetime], pd.DataFrame]]:
    """
    Populates a dataframe with trades
    :param dataframe: Dataframe to populate
    :param trades: Trades to populate with
    :return: Dataframe with trades populated
    """
    timeframe = config["timeframe"]
    config_orderflow = config["orderflow"]

    # create columns for trades
    _init_dataframe_with_trades_columns(dataframe)

    try:
        start_time = time.time()
        # calculate ohlcv candle start and end
        _calculate_ohlcv_candle_start_and_end(trades, timeframe)

        # get date of earliest max_candles candle
        max_candles = config_orderflow["max_candles"]
        start_date = dataframe.tail(max_candles).date.iat[0]
        # slice of trades that are before current ohlcv candles to make groupby faster
        trades = trades.loc[trades.candle_start >= start_date]
        trades.reset_index(inplace=True, drop=True)

        # group trades by candle start
        trades_grouped_by_candle_start = trades.groupby("candle_start", group_keys=False)
        # Create Series to hold complex data
        trades_series = pd.Series(index=dataframe.index, dtype=object)
        orderflow_series = pd.Series(index=dataframe.index, dtype=object)
        imbalances_series = pd.Series(index=dataframe.index, dtype=object)
        stacked_imbalances_bid_series = pd.Series(index=dataframe.index, dtype=object)
        stacked_imbalances_ask_series = pd.Series(index=dataframe.index, dtype=object)

        trades_grouped_by_candle_start = trades.groupby("candle_start", group_keys=False)
        for candle_start, trades_grouped_df in trades_grouped_by_candle_start:
            is_between = candle_start == dataframe["date"]
            if is_between.any():
                from freqtrade.exchange import timeframe_to_next_date

                candle_next = timeframe_to_next_date(timeframe, typing.cast(datetime, candle_start))
                if candle_next not in trades_grouped_by_candle_start.groups:
                    logger.warning(
                        f"candle at {candle_start} with {len(trades_grouped_df)} trades "
                        f"might be unfinished, because no finished trades at {candle_next}"
                    )

                indices = dataframe.index[is_between].tolist()
                # Add trades to each candle
                trades_series.loc[indices] = [trades_grouped_df]
                # Use caching mechanism
                if (candle_start, candle_next) in cached_grouped_trades:
                    cache_entry = cached_grouped_trades[
                        (typing.cast(datetime, candle_start), candle_next)
                    ]
                    # dataframe.loc[is_between] = cache_entry # doesn't take, so we need workaround:
                    # Create a dictionary of the column values to be assigned
                    update_dict = {c: cache_entry[c].iat[0] for c in cache_entry.columns}
                    # Assign the values using the update_dict
                    dataframe.loc[is_between, update_dict.keys()] = pd.DataFrame(
                        [update_dict], index=dataframe.loc[is_between].index
                    )
                    continue

                # Calculate orderflow for each candle
                orderflow = trades_to_volumeprofile_with_total_delta_bid_ask(
                    trades_grouped_df, scale=config_orderflow["scale"]
                )
                orderflow_series.loc[indices] = [orderflow]
                # Calculate imbalances for each candle's orderflow
                imbalances = trades_orderflow_to_imbalances(
                    orderflow,
                    imbalance_ratio=config_orderflow["imbalance_ratio"],
                    imbalance_volume=config_orderflow["imbalance_volume"],
                )
                imbalances_series.loc[indices] = [imbalances]

                stacked_imbalance_range = config_orderflow["stacked_imbalance_range"]
                stacked_imbalances_bid_series.loc[indices] = [
                    stacked_imbalance_bid(
                        imbalances, stacked_imbalance_range=stacked_imbalance_range
                    )
                ]
                stacked_imbalances_ask_series.loc[indices] = [
                    stacked_imbalance_ask(
                        imbalances, stacked_imbalance_range=stacked_imbalance_range
                    )
                ]

                bid = np.where(
                    trades_grouped_df["side"].str.contains("sell"), trades_grouped_df["amount"], 0
                )

                ask = np.where(
                    trades_grouped_df["side"].str.contains("buy"), trades_grouped_df["amount"], 0
                )
                deltas_per_trade = ask - bid
                min_delta = deltas_per_trade.cumsum().min()
                max_delta = deltas_per_trade.cumsum().max()
                dataframe.loc[indices, "max_delta"] = max_delta
                dataframe.loc[indices, "min_delta"] = min_delta

                dataframe.loc[indices, "bid"] = bid.sum()
                dataframe.loc[indices, "ask"] = ask.sum()
                dataframe.loc[indices, "delta"] = (
                    dataframe.loc[indices, "ask"] - dataframe.loc[indices, "bid"]
                )
                dataframe.loc[indices, "total_trades"] = len(trades_grouped_df)

                # Cache the result
                cached_grouped_trades[(typing.cast(datetime, candle_start), candle_next)] = (
                    dataframe.loc[is_between].copy()
                )

                # Maintain cache size
                if (
                    config.get("runmode") in (RunMode.DRY_RUN, RunMode.LIVE)
                    and len(cached_grouped_trades) > config_orderflow["cache_size"]
                ):
                    cached_grouped_trades.popitem(last=False)
            else:
                logger.debug(f"Found NO candles for trades starting with {candle_start}")
        logger.debug(f"trades.groups_keys in {time.time() - start_time} seconds")

        # Merge the complex data Series back into the DataFrame
        # TODO: maybe candle_start and candle_end should be dropped before assignment?
        dataframe["trades"] = trades_series
        dataframe["orderflow"] = orderflow_series
        dataframe["imbalances"] = imbalances_series
        dataframe["stacked_imbalances_bid"] = stacked_imbalances_bid_series
        dataframe["stacked_imbalances_ask"] = stacked_imbalances_ask_series

    except Exception as e:
        logger.exception("Error populating dataframe with trades")
        raise DependencyException(e)

    return dataframe, cached_grouped_trades


def trades_to_volumeprofile_with_total_delta_bid_ask(
    trades: pd.DataFrame, scale: float
) -> pd.DataFrame:
    """
    :param trades: dataframe
    :param scale: scale aka bin size e.g. 0.5
    :return: trades binned to levels according to scale aka orderflow
    """
    df = pd.DataFrame([], columns=DEFAULT_ORDERFLOW_COLUMNS)
    # create bid, ask where side is sell or buy
    df["bid_amount"] = np.where(trades["side"].str.contains("sell"), trades["amount"], 0)
    df["ask_amount"] = np.where(trades["side"].str.contains("buy"), trades["amount"], 0)
    df["bid"] = np.where(trades["side"].str.contains("sell"), 1, 0)
    df["ask"] = np.where(trades["side"].str.contains("buy"), 1, 0)
    # round the prices to the nearest multiple of the scale
    df["price"] = ((trades["price"] / scale).round() * scale).astype("float64").values
    if df.empty:
        df["total"] = np.nan
        df["delta"] = np.nan
        return df

    df["delta"] = df["ask_amount"] - df["bid_amount"]
    df["total_volume"] = df["ask_amount"] + df["bid_amount"]
    df["total_trades"] = df["ask"] + df["bid"]

    # group to bins aka apply scale
    df = df.groupby("price").sum(numeric_only=True)
    return df


def trades_orderflow_to_imbalances(df: pd.DataFrame, imbalance_ratio: int, imbalance_volume: int):
    """
    :param df: dataframes with bid and ask
    :param imbalance_ratio: imbalance_ratio e.g. 3
    :param imbalance_volume: imbalance volume e.g. 10
    :return: dataframe with bid and ask imbalance
    """
    bid = df.bid
    # compares bid and ask diagonally
    ask = df.ask.shift(-1)
    bid_imbalance = (bid / ask) > (imbalance_ratio)
    # overwrite bid_imbalance with False if volume is not big enough
    bid_imbalance_filtered = np.where(df.total_volume < imbalance_volume, False, bid_imbalance)
    ask_imbalance = (ask / bid) > (imbalance_ratio)
    # overwrite ask_imbalance with False if volume is not big enough
    ask_imbalance_filtered = np.where(df.total_volume < imbalance_volume, False, ask_imbalance)
    dataframe = pd.DataFrame(
        {"bid_imbalance": bid_imbalance_filtered, "ask_imbalance": ask_imbalance_filtered},
        index=df.index,
    )

    return dataframe


def stacked_imbalance(
    df: pd.DataFrame, label: str, stacked_imbalance_range: int, should_reverse: bool
):
    """
    y * (y.groupby((y != y.shift()).cumsum()).cumcount() + 1)
    https://stackoverflow.com/questions/27626542/counting-consecutive-positive-values-in-python-pandas-array
    """
    imbalance = df[f"{label}_imbalance"]
    int_series = pd.Series(np.where(imbalance, 1, 0))
    stacked = int_series * (
        int_series.groupby((int_series != int_series.shift()).cumsum()).cumcount() + 1
    )

    max_stacked_imbalance_idx = stacked.index[stacked >= stacked_imbalance_range]
    stacked_imbalance_price = np.nan
    if not max_stacked_imbalance_idx.empty:
        idx = (
            max_stacked_imbalance_idx[0]
            if not should_reverse
            else np.flipud(max_stacked_imbalance_idx)[0]
        )
        stacked_imbalance_price = imbalance.index[idx]
    return stacked_imbalance_price


def stacked_imbalance_ask(df: pd.DataFrame, stacked_imbalance_range: int):
    return stacked_imbalance(df, "ask", stacked_imbalance_range, should_reverse=True)


def stacked_imbalance_bid(df: pd.DataFrame, stacked_imbalance_range: int):
    return stacked_imbalance(df, "bid", stacked_imbalance_range, should_reverse=False)
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`"""`
			`Functions to convert orderflow data from public_trades`
			`"""`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`import logging`
			`import time`
orderflow: fixing typing 2024-06-26 17:43:13 +00:00			`import typing`
Merge branch 'feature/fetch-public-trades-cached' into feature/fetch-public-trades 2024-06-24 15:47:45 +00:00			`from collections import OrderedDict`
orderflow: use cache per pair 2024-06-26 17:54:36 +00:00			`from datetime import datetime`
Extract global `cached_grouped_trades_per_pair` into `IStrategy` 2024-07-04 15:12:30 +00:00			`from typing import Tuple`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00
			`import numpy as np`
			`import pandas as pd`

Merge branch 'feature/fetch-public-trades-cached' into feature/fetch-public-trades 2024-06-24 15:47:45 +00:00			`from freqtrade.constants import DEFAULT_ORDERFLOW_COLUMNS`
Extract global `cached_grouped_trades_per_pair` into `IStrategy` 2024-07-04 15:12:30 +00:00			`from freqtrade.enums import RunMode`
orderflow: raise DependencyException instead of raising generic exception 2024-05-28 18:18:41 +00:00			`from freqtrade.exceptions import DependencyException`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00

			`logger = logging.getLogger(__name__)`


Avoid duplicate pandas imports 2024-03-16 15:26:17 +00:00			`def _init_dataframe_with_trades_columns(dataframe: pd.DataFrame):`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`"""`
			`Populates a dataframe with trades columns`
			`:param dataframe: Dataframe to populate`
			`"""`
orderflow: clean up populate_dataframe_with_trades code 2024-06-21 14:01:16 +00:00			`# Initialize columns with appropriate dtypes`
			`dataframe["trades"] = np.nan`
			`dataframe["orderflow"] = np.nan`
			`dataframe["imbalances"] = np.nan`
			`dataframe["stacked_imbalances_bid"] = np.nan`
			`dataframe["stacked_imbalances_ask"] = np.nan`
			`dataframe["max_delta"] = np.nan`
			`dataframe["min_delta"] = np.nan`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`dataframe["bid"] = np.nan`
			`dataframe["ask"] = np.nan`
			`dataframe["delta"] = np.nan`
			`dataframe["total_trades"] = np.nan`
orderflow: clean up populate_dataframe_with_trades code 2024-06-21 14:01:16 +00:00
			`# Ensure the 'trades' column is of object type`
			`dataframe["trades"] = dataframe["trades"].astype(object)`
			`dataframe["orderflow"] = dataframe["orderflow"].astype(object)`
			`dataframe["imbalances"] = dataframe["imbalances"].astype(object)`
			`dataframe["stacked_imbalances_bid"] = dataframe["stacked_imbalances_bid"].astype(object)`
			`dataframe["stacked_imbalances_ask"] = dataframe["stacked_imbalances_ask"].astype(object)`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00

Avoid duplicate pandas imports 2024-03-16 15:26:17 +00:00			`def _calculate_ohlcv_candle_start_and_end(df: pd.DataFrame, timeframe: str):`
cleanup ruff and isort errors 2024-05-14 15:28:07 +00:00			`from freqtrade.exchange import timeframe_to_next_date, timeframe_to_resample_freq`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00
			`timeframe_frequency = timeframe_to_resample_freq(timeframe)`
			`# calculate ohlcv candle start and end`
			`if df is not None and not df.empty:`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`df["datetime"] = pd.to_datetime(df["date"], unit="ms")`
			`df["candle_start"] = df["datetime"].dt.floor(timeframe_frequency)`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`# used in _now_is_time_to_refresh_trades`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`df["candle_end"] = df["candle_start"].apply(`
use `timeframe_to_next_date` and `date` to calculate `candle_start`/`candle_end` 2024-05-08 13:11:24 +00:00			`lambda candle_start: timeframe_to_next_date(timeframe, candle_start)`
			`)`
			`df.drop(columns=["datetime"], inplace=True)`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00

orderflow: use cache per pair 2024-06-26 17:54:36 +00:00			`def populate_dataframe_with_trades(`
Extract global `cached_grouped_trades_per_pair` into `IStrategy` 2024-07-04 15:12:30 +00:00			`cached_grouped_trades: OrderedDict[Tuple[datetime, datetime], pd.DataFrame],`
			`config,`
			`dataframe: pd.DataFrame,`
			`trades: pd.DataFrame,`
			`) -> Tuple[pd.DataFrame, OrderedDict[Tuple[datetime, datetime], pd.DataFrame]]:`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`"""`
			`Populates a dataframe with trades`
			`:param dataframe: Dataframe to populate`
			`:param trades: Trades to populate with`
			`:return: Dataframe with trades populated`
			`"""`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`timeframe = config["timeframe"]`
orderflow: adds `cache_size` to config 2024-06-24 15:21:29 +00:00			`config_orderflow = config["orderflow"]`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00
			`# create columns for trades`
			`_init_dataframe_with_trades_columns(dataframe)`

			`try:`
			`start_time = time.time()`
			`# calculate ohlcv candle start and end`
			`_calculate_ohlcv_candle_start_and_end(trades, timeframe)`

feat: adds `max_candles` to orderflow config 2024-06-19 17:22:49 +00:00			`# get date of earliest max_candles candle`
			`max_candles = config_orderflow["max_candles"]`
orderflow: clean up populate_dataframe_with_trades code 2024-06-21 14:01:16 +00:00			`start_date = dataframe.tail(max_candles).date.iat[0]`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`# slice of trades that are before current ohlcv candles to make groupby faster`
feat: adds `max_candles` to orderflow config 2024-06-19 17:22:49 +00:00			`trades = trades.loc[trades.candle_start >= start_date]`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`trades.reset_index(inplace=True, drop=True)`

			`# group trades by candle start`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`trades_grouped_by_candle_start = trades.groupby("candle_start", group_keys=False)`
orderflow: clean up populate_dataframe_with_trades code 2024-06-21 14:01:16 +00:00			`# Create Series to hold complex data`
			`trades_series = pd.Series(index=dataframe.index, dtype=object)`
			`orderflow_series = pd.Series(index=dataframe.index, dtype=object)`
			`imbalances_series = pd.Series(index=dataframe.index, dtype=object)`
			`stacked_imbalances_bid_series = pd.Series(index=dataframe.index, dtype=object)`
			`stacked_imbalances_ask_series = pd.Series(index=dataframe.index, dtype=object)`

Merge branch 'feature/fetch-public-trades-cached' into feature/fetch-public-trades 2024-06-24 15:47:45 +00:00			`trades_grouped_by_candle_start = trades.groupby("candle_start", group_keys=False)`
orderflow: clean up populate_dataframe_with_trades code 2024-06-21 14:01:16 +00:00			`for candle_start, trades_grouped_df in trades_grouped_by_candle_start:`
			`is_between = candle_start == dataframe["date"]`
			`if is_between.any():`
use `timeframe_to_next_date` and `date` to calculate `candle_start`/`candle_end` 2024-05-08 13:11:24 +00:00			`from freqtrade.exchange import timeframe_to_next_date`

orderflow: fixing typing 2024-06-26 17:43:13 +00:00			`candle_next = timeframe_to_next_date(timeframe, typing.cast(datetime, candle_start))`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`if candle_next not in trades_grouped_by_candle_start.groups:`
			`logger.warning(`
			`f"candle at {candle_start} with {len(trades_grouped_df)} trades "`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`f"might be unfinished, because no finished trades at {candle_next}"`
			`)`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00
orderflow: clean up populate_dataframe_with_trades code 2024-06-21 14:01:16 +00:00			`indices = dataframe.index[is_between].tolist()`
			`# Add trades to each candle`
orderflow: clean code 2024-06-24 16:07:17 +00:00			`trades_series.loc[indices] = [trades_grouped_df]`
feat: add caching to populate_dataframe_with_trades 2024-06-21 13:28:11 +00:00			`# Use caching mechanism`
			`if (candle_start, candle_next) in cached_grouped_trades:`
orderflow: use cache per pair 2024-06-26 17:54:36 +00:00			`cache_entry = cached_grouped_trades[`
			`(typing.cast(datetime, candle_start), candle_next)`
			`]`
fix: copying orderflow dataframe from cache doesn't work 2024-06-24 15:14:22 +00:00			`# dataframe.loc[is_between] = cache_entry # doesn't take, so we need workaround:`
			`# Create a dictionary of the column values to be assigned`
			`update_dict = {c: cache_entry[c].iat[0] for c in cache_entry.columns}`
			`# Assign the values using the update_dict`
			`dataframe.loc[is_between, update_dict.keys()] = pd.DataFrame(`
			`[update_dict], index=dataframe.loc[is_between].index`
			`)`
feat: add caching to populate_dataframe_with_trades 2024-06-21 13:28:11 +00:00			`continue`
orderflow: clean up populate_dataframe_with_trades code 2024-06-21 14:01:16 +00:00
			`# Calculate orderflow for each candle`
			`orderflow = trades_to_volumeprofile_with_total_delta_bid_ask(`
			`trades_grouped_df, scale=config_orderflow["scale"]`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`)`
orderflow: clean code 2024-06-24 16:07:17 +00:00			`orderflow_series.loc[indices] = [orderflow]`
orderflow: clean up populate_dataframe_with_trades code 2024-06-21 14:01:16 +00:00			`# Calculate imbalances for each candle's orderflow`
			`imbalances = trades_orderflow_to_imbalances(`
			`orderflow,`
			`imbalance_ratio=config_orderflow["imbalance_ratio"],`
			`imbalance_volume=config_orderflow["imbalance_volume"],`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`)`
orderflow: clean code 2024-06-24 16:07:17 +00:00			`imbalances_series.loc[indices] = [imbalances]`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00
orderflow: clean up populate_dataframe_with_trades code 2024-06-21 14:01:16 +00:00			`stacked_imbalance_range = config_orderflow["stacked_imbalance_range"]`
			`stacked_imbalances_bid_series.loc[indices] = [`
			`stacked_imbalance_bid(`
			`imbalances, stacked_imbalance_range=stacked_imbalance_range`
			`)`
orderflow: clean code 2024-06-24 16:07:17 +00:00			`]`
orderflow: clean up populate_dataframe_with_trades code 2024-06-21 14:01:16 +00:00			`stacked_imbalances_ask_series.loc[indices] = [`
			`stacked_imbalance_ask(`
			`imbalances, stacked_imbalance_range=stacked_imbalance_range`
			`)`
orderflow: clean code 2024-06-24 16:07:17 +00:00			`]`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00
use np.where instead of apply 2024-06-02 15:09:23 +00:00			`bid = np.where(`
Merge branch 'feature/fetch-public-trades-cached' into feature/fetch-public-trades 2024-06-24 15:47:45 +00:00			`trades_grouped_df["side"].str.contains("sell"), trades_grouped_df["amount"], 0`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`)`
Merge branch 'feature/fetch-public-trades-cached' into feature/fetch-public-trades 2024-06-24 15:47:45 +00:00
use np.where instead of apply 2024-06-02 15:09:23 +00:00			`ask = np.where(`
Merge branch 'feature/fetch-public-trades-cached' into feature/fetch-public-trades 2024-06-24 15:47:45 +00:00			`trades_grouped_df["side"].str.contains("buy"), trades_grouped_df["amount"], 0`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`)`
orderflow: fix ask/bid & buy/sell mixup 2024-05-31 17:21:41 +00:00			`deltas_per_trade = ask - bid`
orderflow: clean up populate_dataframe_with_trades code 2024-06-21 14:01:16 +00:00			`min_delta = deltas_per_trade.cumsum().min()`
			`max_delta = deltas_per_trade.cumsum().max()`
			`dataframe.loc[indices, "max_delta"] = max_delta`
			`dataframe.loc[indices, "min_delta"] = min_delta`

			`dataframe.loc[indices, "bid"] = bid.sum()`
			`dataframe.loc[indices, "ask"] = ask.sum()`
			`dataframe.loc[indices, "delta"] = (`
			`dataframe.loc[indices, "ask"] - dataframe.loc[indices, "bid"]`
			`)`
			`dataframe.loc[indices, "total_trades"] = len(trades_grouped_df)`
feat: add caching to populate_dataframe_with_trades 2024-06-21 13:28:11 +00:00
			`# Cache the result`
orderflow: use cache per pair 2024-06-26 17:54:36 +00:00			`cached_grouped_trades[(typing.cast(datetime, candle_start), candle_next)] = (`
			`dataframe.loc[is_between].copy()`
			`)`
feat: add caching to populate_dataframe_with_trades 2024-06-21 13:28:11 +00:00
			`# Maintain cache size`
Extract global `cached_grouped_trades_per_pair` into `IStrategy` 2024-07-04 15:12:30 +00:00			`if (`
			`config.get("runmode") in (RunMode.DRY_RUN, RunMode.LIVE)`
			`and len(cached_grouped_trades) > config_orderflow["cache_size"]`
			`):`
feat: add caching to populate_dataframe_with_trades 2024-06-21 13:28:11 +00:00			`cached_grouped_trades.popitem(last=False)`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`else:`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`logger.debug(f"Found NO candles for trades starting with {candle_start}")`
			`logger.debug(f"trades.groups_keys in {time.time() - start_time} seconds")`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00
orderflow: clean up populate_dataframe_with_trades code 2024-06-21 14:01:16 +00:00			`# Merge the complex data Series back into the DataFrame`
chore: Add small todo 2024-07-13 07:49:21 +00:00			`# TODO: maybe candle_start and candle_end should be dropped before assignment?`
orderflow: clean up populate_dataframe_with_trades code 2024-06-21 14:01:16 +00:00			`dataframe["trades"] = trades_series`
			`dataframe["orderflow"] = orderflow_series`
			`dataframe["imbalances"] = imbalances_series`
			`dataframe["stacked_imbalances_bid"] = stacked_imbalances_bid_series`
			`dataframe["stacked_imbalances_ask"] = stacked_imbalances_ask_series`

Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`except Exception as e:`
orderflow: clean up populate_dataframe_with_trades code 2024-06-21 14:01:16 +00:00			`logger.exception("Error populating dataframe with trades")`
orderflow: raise DependencyException instead of raising generic exception 2024-05-28 18:18:41 +00:00			`raise DependencyException(e)`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00
Extract global `cached_grouped_trades_per_pair` into `IStrategy` 2024-07-04 15:12:30 +00:00			`return dataframe, cached_grouped_trades`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00

orderflow: use cache per pair 2024-06-26 17:54:36 +00:00			`def trades_to_volumeprofile_with_total_delta_bid_ask(`
			`trades: pd.DataFrame, scale: float`
			`) -> pd.DataFrame:`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`"""`
			`:param trades: dataframe`
			`:param scale: scale aka bin size e.g. 0.5`
			`:return: trades binned to levels according to scale aka orderflow`
			`"""`
			`df = pd.DataFrame([], columns=DEFAULT_ORDERFLOW_COLUMNS)`
			`# create bid, ask where side is sell or buy`
orderflow: fix ask/bid & buy/sell mixup 2024-05-31 17:21:41 +00:00			`df["bid_amount"] = np.where(trades["side"].str.contains("sell"), trades["amount"], 0)`
			`df["ask_amount"] = np.where(trades["side"].str.contains("buy"), trades["amount"], 0)`
			`df["bid"] = np.where(trades["side"].str.contains("sell"), 1, 0)`
			`df["ask"] = np.where(trades["side"].str.contains("buy"), 1, 0)`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`# round the prices to the nearest multiple of the scale`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`df["price"] = ((trades["price"] / scale).round() * scale).astype("float64").values`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`if df.empty:`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`df["total"] = np.nan`
			`df["delta"] = np.nan`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`return df`

ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`df["delta"] = df["ask_amount"] - df["bid_amount"]`
			`df["total_volume"] = df["ask_amount"] + df["bid_amount"]`
			`df["total_trades"] = df["ask"] + df["bid"]`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00
			`# group to bins aka apply scale`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`df = df.groupby("price").sum(numeric_only=True)`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`return df`


Avoid duplicate pandas imports 2024-03-16 15:26:17 +00:00			`def trades_orderflow_to_imbalances(df: pd.DataFrame, imbalance_ratio: int, imbalance_volume: int):`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`"""`
			`:param df: dataframes with bid and ask`
`imbalance_ratio`: use ratio instead of percentage 2024-05-17 13:49:53 +00:00			`:param imbalance_ratio: imbalance_ratio e.g. 3`
			`:param imbalance_volume: imbalance volume e.g. 10`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`:return: dataframe with bid and ask imbalance`
			`"""`
			`bid = df.bid`
orderflow: add comment with insight about imbalances 2024-05-28 18:22:26 +00:00			`# compares bid and ask diagonally`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`ask = df.ask.shift(-1)`
`imbalance_ratio`: use ratio instead of percentage 2024-05-17 13:49:53 +00:00			`bid_imbalance = (bid / ask) > (imbalance_ratio)`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`# overwrite bid_imbalance with False if volume is not big enough`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`bid_imbalance_filtered = np.where(df.total_volume < imbalance_volume, False, bid_imbalance)`
`imbalance_ratio`: use ratio instead of percentage 2024-05-17 13:49:53 +00:00			`ask_imbalance = (ask / bid) > (imbalance_ratio)`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`# overwrite ask_imbalance with False if volume is not big enough`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`ask_imbalance_filtered = np.where(df.total_volume < imbalance_volume, False, ask_imbalance)`
			`dataframe = pd.DataFrame(`
			`{"bid_imbalance": bid_imbalance_filtered, "ask_imbalance": ask_imbalance_filtered},`
			`index=df.index,`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`)`

			`return dataframe`


ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`def stacked_imbalance(`
			`df: pd.DataFrame, label: str, stacked_imbalance_range: int, should_reverse: bool`
			`):`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`"""`
			`y * (y.groupby((y != y.shift()).cumsum()).cumcount() + 1)`
			`https://stackoverflow.com/questions/27626542/counting-consecutive-positive-values-in-python-pandas-array`
			`"""`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`imbalance = df[f"{label}_imbalance"]`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`int_series = pd.Series(np.where(imbalance, 1, 0))`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`stacked = int_series * (`
			`int_series.groupby((int_series != int_series.shift()).cumsum()).cumcount() + 1`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`)`

ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`max_stacked_imbalance_idx = stacked.index[stacked >= stacked_imbalance_range]`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`stacked_imbalance_price = np.nan`
			`if not max_stacked_imbalance_idx.empty:`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`idx = (`
			`max_stacked_imbalance_idx[0]`
			`if not should_reverse`
			`else np.flipud(max_stacked_imbalance_idx)[0]`
			`)`
Moves orderflow logic to it's own file 2024-02-12 12:20:52 +00:00			`stacked_imbalance_price = imbalance.index[idx]`
			`return stacked_imbalance_price`


Avoid duplicate pandas imports 2024-03-16 15:26:17 +00:00			`def stacked_imbalance_ask(df: pd.DataFrame, stacked_imbalance_range: int):`
ruff format: orderflow / public trades 2024-05-15 15:09:32 +00:00			`return stacked_imbalance(df, "ask", stacked_imbalance_range, should_reverse=True)`
orderflow: fix ask/bid & buy/sell mixup 2024-05-31 17:21:41 +00:00

			`def stacked_imbalance_bid(df: pd.DataFrame, stacked_imbalance_range: int):`
			`return stacked_imbalance(df, "bid", stacked_imbalance_range, should_reverse=False)`