Skip to content

Commit

Permalink
FIX-#0000: make sure pickling is zero-copy for Ray
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <[email protected]>
  • Loading branch information
anmyachev committed Oct 31, 2023
1 parent 521eb60 commit f220102
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,9 @@ def split_pandas_df_into_partitions(
parts = [
[
update_bar(
put_func(col_part.iloc[i : i + row_chunksize]),
put_func(
col_part.iloc[i : i + row_chunksize].copy()
), # `copy()` to fix zero-copy pickling
)
for col_part in col_parts
]
Expand Down
21 changes: 21 additions & 0 deletions modin/test/storage_formats/pandas/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1511,3 +1511,24 @@ def assert_materialized(obj):

assert call_queue == reconstructed_queue
assert_everything_materialized(reconstructed_queue)


@pytest.mark.skipif(Engine.get() != "Ray", reason="Ray specific")
def test_zero_copy_pickling():
import ray

df = pd.DataFrame(np.zeros((100, 100)))
part = ray.get(df._query_compiler._modin_frame._partitions[0][0]._data)

try:
part.values[0, 0] = 10
except ValueError as err:
breakpoint()
if "assignment destination is read-only" in str(err):
pass
else:
# Unexpected exception
raise err
else:
# The exception must be thrown
raise RuntimeError("not zero copy pickling")

0 comments on commit f220102

Please sign in to comment.