Skip to content

Commit

Permalink
Pandas 2.x support (#5758)
Browse files Browse the repository at this point in the history
Adds changes to account for cuDF support of Pandas 2.x

Fixes #5759.

Authors:
  - Dante Gama Dessavre (https://github.com/dantegd)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Divye Gala (https://github.com/divyegala)
  - Bradley Dice (https://github.com/bdice)

URL: #5758
  • Loading branch information
dantegd authored Feb 8, 2024
1 parent a979aec commit d2f509e
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 5 deletions.
2 changes: 1 addition & 1 deletion python/cuml/benchmark/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def _gen_data_regression(
)

X_df = cudf.DataFrame(X_arr)
y_df = cudf.Series(y_arr)
y_df = cudf.Series(np.squeeze(y_arr))

return X_df, y_df

Expand Down
3 changes: 2 additions & 1 deletion python/cuml/common/sparsefuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,9 @@ def create_csr_matrix_from_count_df(

doc_token_counts = count_df["doc_id"].value_counts().reset_index()
del count_df

doc_token_counts = doc_token_counts.rename(
{"doc_id": "token_counts", "index": "doc_id"}, axis=1
{"count": "token_counts"}, axis=1
).sort_values(by="doc_id")

token_counts = _insert_zeros(
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/preprocessing/encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ def inverse_transform(self, X):
dropped_class_idx = Series(self.drop_idx_[feature])
dropped_class_mask = Series(cats).isin(cats[dropped_class_idx])
if len(cats) == 1:
inv = Series(Index(cats[0]).repeat(X.shape[0]))
inv = Series(Index([cats[0]]).repeat(X.shape[0]))
result[feature] = inv
continue
cats = cats[~dropped_class_mask]
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/tests/test_train_test_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_split_dataframe(train_size, shuffle):
assert all(X_test.index.to_pandas() == y_test.index.to_pandas())

X_reconstructed = cudf.concat([X_train, X_test]).sort_values(by=["x"])
y_reconstructed = y_train.append(y_test).sort_values()
y_reconstructed = cudf.concat([y_train, y_test]).sort_values()

assert all(X_reconstructed.reset_index(drop=True) == X)
out = y_reconstructed.reset_index(drop=True).values_host == y.values_host
Expand Down Expand Up @@ -96,7 +96,7 @@ def test_split_column():
)

X_reconstructed = cudf.concat([X_train, X_test]).sort_values(by=["x"])
y_reconstructed = y_train.append(y_test).sort_values()
y_reconstructed = cudf.concat([y_train, y_test]).sort_values()

assert all(
data
Expand Down

0 comments on commit d2f509e

Please sign in to comment.