Skip to content

Commit

Permalink
Fix and Refine TradeTickDataWrangler.process_bar_data (#1586)
Browse files Browse the repository at this point in the history
  • Loading branch information
rsmb7z authored Apr 11, 2024
1 parent a8592cd commit c267d30
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 14 deletions.
27 changes: 14 additions & 13 deletions nautilus_trader/persistence/wranglers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,8 @@ def prepare_tick_data_from_bars(

# Merge tick data
df_ticks_final = pd.concat([df_ticks_o, df_ticks_h, df_ticks_l, df_ticks_c])
df_ticks_final.dropna(inplace=True)
df_ticks_final.sort_index(axis=0, kind="mergesort", inplace=True)
df_ticks_final = df_ticks_final.dropna()
df_ticks_final = df_ticks_final.sort_index(axis=0, kind="mergesort")

cdef int i
# Randomly shift high low prices
Expand Down Expand Up @@ -572,32 +572,24 @@ cdef class TradeTickDataWrangler:
# Ensure index is tz-aware UTC
data = as_utc_index(data)

# Determine the Aggressor Side based on Close vs Open
if "side" not in data and "buyer_maker" not in data:
data['side'] = ['BUY' if close > open_ else 'SELL' for open_, close in zip(data['open'], data['close'])]

cdef dict data_open = {
"price": data["open"],
"size": data["volume"] / 4,
"side": data["side"],
}

cdef dict data_high = {
"price": data["high"],
"size": data["volume"] / 4,
"side": data["side"],
}

cdef dict data_low = {
"price": data["low"],
"size": data["volume"] / 4,
"side": data["side"],
}

cdef dict data_close = {
"price": data["close"],
"size": data["volume"] / 4,
"side": data["side"],
}

df_ticks_final, ts_events, ts_inits = prepare_tick_data_from_bars(
Expand All @@ -612,12 +604,19 @@ cdef class TradeTickDataWrangler:
)
df_ticks_final["trade_id"] = df_ticks_final.index.view(np.uint64).astype(str)

# Adjust size precision
size_precision = self.instrument.size_precision
if is_raw:
df_ticks_final["size"] = df_ticks_final["size"].apply(lambda x: round(x, size_precision - 9))
else:
df_ticks_final["size"] = df_ticks_final["size"].round(size_precision)

if is_raw:
return list(map(
self._build_tick_from_raw,
df_ticks_final["price"],
df_ticks_final["size"],
self._create_side_if_not_exist(data),
self._create_side_if_not_exist(df_ticks_final),
df_ticks_final["trade_id"],
ts_events,
ts_inits,
Expand All @@ -627,7 +626,7 @@ cdef class TradeTickDataWrangler:
self._build_tick,
df_ticks_final["price"],
df_ticks_final["size"],
self._create_side_if_not_exist(data),
self._create_side_if_not_exist(df_ticks_final),
df_ticks_final["trade_id"],
ts_events,
ts_inits,
Expand All @@ -636,8 +635,10 @@ cdef class TradeTickDataWrangler:
def _create_side_if_not_exist(self, data):
if "side" in data.columns:
return data["side"].apply(lambda x: AggressorSide.BUYER if str(x).upper() == "BUY" else AggressorSide.SELLER)
else:
elif "buyer_maker" in data.columns:
return data["buyer_maker"].apply(lambda x: AggressorSide.SELLER if x is True else AggressorSide.BUYER)
else:
return [AggressorSide.NO_AGGRESSOR] * len(data)

# cpdef method for Python wrap() (called with map)
cpdef TradeTick _build_tick_from_raw(
Expand Down
42 changes: 41 additions & 1 deletion tests/unit_tests/persistence/test_wranglers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# -------------------------------------------------------------------------------------------------

import pandas as pd
import pytest

from nautilus_trader.model.enums import BookAction
Expand Down Expand Up @@ -111,6 +111,7 @@ def test_trade_bar_data_wrangler(
provider = TestDataProvider()
data = provider.read_csv_bars("fxcm/usdjpy-m1-bid-2013.csv")
data.loc[:, "volume"] = 100_0000
expected_ticks_count = len(data) * 4

# Act
ticks = wrangler.process_bar_data(
Expand All @@ -124,3 +125,42 @@ def test_trade_bar_data_wrangler(
assert ticks[1].ts_event == ts_event2
assert ticks[2].ts_event == ts_event3
assert ticks[3].ts_event == ts_event4
assert len(ticks) == expected_ticks_count


@pytest.mark.parametrize("is_raw", [False, True])
def test_trade_bar_data_wrangler_size_precision(is_raw: bool) -> None:
# Arrange
spy = TestInstrumentProvider.equity("SPY", "ARCA")
wrangler = TradeTickDataWrangler(instrument=spy)
factor = 1e9 if is_raw else 1
ts = pd.Timestamp("2024-01-05 21:00:00+0000", tz="UTC")
data = pd.DataFrame(
{
"open": {ts: 468.01 * factor},
"high": {ts: 468.08 * factor},
"low": {ts: 467.81 * factor},
"close": {ts: 467.96 * factor},
"volume": {ts: 18735.0 * factor},
},
)

# Calculate expected_size
if is_raw:
# For raw data, adjust precision by -9
expected_size = round(data["volume"].iloc[0] / 4, spy.size_precision - 9)
else:
# For non-raw data, apply standard precision and scale back up to compare with raw
expected_size = round(data["volume"].iloc[0] / 4, spy.size_precision) * 1e9

# Act
ticks = wrangler.process_bar_data(
data=data,
offset_interval_ms=0,
timestamp_is_close=True,
is_raw=is_raw,
)

# Assert
for tick in ticks:
assert tick.size.raw == expected_size

0 comments on commit c267d30

Please sign in to comment.