Skip to content

Commit

Permalink
Fix shuffle after set_index from 1 partition df (#1040)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Apr 25, 2024
1 parent cc002e8 commit 301c1a6
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
2 changes: 1 addition & 1 deletion dask_expr/_shuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -864,7 +864,7 @@ def _lower(self):
self.frame,
self._other,
self.drop,
self._npartitions_input,
self._npartitions_input if self.user_divisions is None else None,
self.ascending,
self.upsample,
self.user_divisions,
Expand Down
8 changes: 8 additions & 0 deletions dask_expr/tests/test_shuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,3 +794,11 @@ def test_set_index_before_assign(df, pdf):
expected = pdf.set_index("x")
expected["z"] = expected.y + 1
assert_eq(result["z"], expected["z"])


def test_set_index_shuffle_afterwards(pdf):
ddf = from_pandas(pdf, npartitions=1)
ddf = ddf.set_index("y", sort=True, divisions=[0, 10, 20, 100], shuffle="tasks")
result = ddf.reset_index().y.unique()
expected = pd.Series(pdf.y.unique(), name="y")
assert_eq(result, expected, check_index=False)

0 comments on commit 301c1a6

Please sign in to comment.