diff --git a/nautilus_trader/persistence/wranglers.pyx b/nautilus_trader/persistence/wranglers.pyx index 5c56cbce58a3..20ad73fbfe06 100644 --- a/nautilus_trader/persistence/wranglers.pyx +++ b/nautilus_trader/persistence/wranglers.pyx @@ -200,8 +200,8 @@ def prepare_tick_data_from_bars( # Merge tick data df_ticks_final = pd.concat([df_ticks_o, df_ticks_h, df_ticks_l, df_ticks_c]) - df_ticks_final.dropna(inplace=True) - df_ticks_final.sort_index(axis=0, kind="mergesort", inplace=True) + df_ticks_final = df_ticks_final.dropna() + df_ticks_final = df_ticks_final.sort_index(axis=0, kind="mergesort") cdef int i # Randomly shift high low prices @@ -572,32 +572,24 @@ cdef class TradeTickDataWrangler: # Ensure index is tz-aware UTC data = as_utc_index(data) - # Determine the Aggressor Side based on Close vs Open - if "side" not in data and "buyer_maker" not in data: - data['side'] = ['BUY' if close > open_ else 'SELL' for open_, close in zip(data['open'], data['close'])] - cdef dict data_open = { "price": data["open"], "size": data["volume"] / 4, - "side": data["side"], } cdef dict data_high = { "price": data["high"], "size": data["volume"] / 4, - "side": data["side"], } cdef dict data_low = { "price": data["low"], "size": data["volume"] / 4, - "side": data["side"], } cdef dict data_close = { "price": data["close"], "size": data["volume"] / 4, - "side": data["side"], } df_ticks_final, ts_events, ts_inits = prepare_tick_data_from_bars( @@ -612,12 +604,19 @@ cdef class TradeTickDataWrangler: ) df_ticks_final["trade_id"] = df_ticks_final.index.view(np.uint64).astype(str) + # Adjust size precision + size_precision = self.instrument.size_precision + if is_raw: + df_ticks_final["size"] = df_ticks_final["size"].apply(lambda x: round(x, size_precision - 9)) + else: + df_ticks_final["size"] = df_ticks_final["size"].round(size_precision) + if is_raw: return list(map( self._build_tick_from_raw, df_ticks_final["price"], df_ticks_final["size"], - self._create_side_if_not_exist(data), + self._create_side_if_not_exist(df_ticks_final), df_ticks_final["trade_id"], ts_events, ts_inits, @@ -627,7 +626,7 @@ cdef class TradeTickDataWrangler: self._build_tick, df_ticks_final["price"], df_ticks_final["size"], - self._create_side_if_not_exist(data), + self._create_side_if_not_exist(df_ticks_final), df_ticks_final["trade_id"], ts_events, ts_inits, @@ -636,8 +635,10 @@ cdef class TradeTickDataWrangler: def _create_side_if_not_exist(self, data): if "side" in data.columns: return data["side"].apply(lambda x: AggressorSide.BUYER if str(x).upper() == "BUY" else AggressorSide.SELLER) - else: + elif "buyer_maker" in data.columns: return data["buyer_maker"].apply(lambda x: AggressorSide.SELLER if x is True else AggressorSide.BUYER) + else: + return [AggressorSide.NO_AGGRESSOR] * len(data) # cpdef method for Python wrap() (called with map) cpdef TradeTick _build_tick_from_raw( diff --git a/tests/unit_tests/persistence/test_wranglers.py b/tests/unit_tests/persistence/test_wranglers.py index 67c2bf4b976d..d4a382ace201 100644 --- a/tests/unit_tests/persistence/test_wranglers.py +++ b/tests/unit_tests/persistence/test_wranglers.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ------------------------------------------------------------------------------------------------- - +import pandas as pd import pytest from nautilus_trader.model.enums import BookAction @@ -111,6 +111,7 @@ def test_trade_bar_data_wrangler( provider = TestDataProvider() data = provider.read_csv_bars("fxcm/usdjpy-m1-bid-2013.csv") data.loc[:, "volume"] = 100_0000 + expected_ticks_count = len(data) * 4 # Act ticks = wrangler.process_bar_data( @@ -124,3 +125,42 @@ def test_trade_bar_data_wrangler( assert ticks[1].ts_event == ts_event2 assert ticks[2].ts_event == ts_event3 assert ticks[3].ts_event == ts_event4 + assert len(ticks) == expected_ticks_count + + +@pytest.mark.parametrize("is_raw", [False, True]) +def test_trade_bar_data_wrangler_size_precision(is_raw: bool) -> None: + # Arrange + spy = TestInstrumentProvider.equity("SPY", "ARCA") + wrangler = TradeTickDataWrangler(instrument=spy) + factor = 1e9 if is_raw else 1 + ts = pd.Timestamp("2024-01-05 21:00:00+0000", tz="UTC") + data = pd.DataFrame( + { + "open": {ts: 468.01 * factor}, + "high": {ts: 468.08 * factor}, + "low": {ts: 467.81 * factor}, + "close": {ts: 467.96 * factor}, + "volume": {ts: 18735.0 * factor}, + }, + ) + + # Calculate expected_size + if is_raw: + # For raw data, adjust precision by -9 + expected_size = round(data["volume"].iloc[0] / 4, spy.size_precision - 9) + else: + # For non-raw data, apply standard precision and scale back up to compare with raw + expected_size = round(data["volume"].iloc[0] / 4, spy.size_precision) * 1e9 + + # Act + ticks = wrangler.process_bar_data( + data=data, + offset_interval_ms=0, + timestamp_is_close=True, + is_raw=is_raw, + ) + + # Assert + for tick in ticks: + assert tick.size.raw == expected_size