Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update cudf matrix calls for to_numpy and to_cupy #4293

Merged
8 changes: 4 additions & 4 deletions notebooks/arima_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -120,15 +120,15 @@
" title = y.columns[i]\n",
" if pred is not None:\n",
" ax[i].plot(np.r_[pred_start:pred_end],\n",
" pred[pred.columns[i]].to_array(),\n",
" pred[pred.columns[i]].to_numpy(),\n",
" linestyle=\"--\", color=col[1])\n",
" # Prediction intervals\n",
" if lower is not None and upper is not None:\n",
" ax[i].fill_between(np.r_[pred_start:pred_end],\n",
" lower[lower.columns[i]].to_array(),\n",
" upper[upper.columns[i]].to_array(),\n",
" lower[lower.columns[i]].to_numpy(),\n",
" upper[upper.columns[i]].to_numpy(),\n",
" alpha=0.2, color=col[1])\n",
" ax[i].plot(np.r_[:n_obs], y[title].to_array(), color=col[0])\n",
" ax[i].plot(np.r_[:n_obs], y[title].to_numpy(), color=col[0])\n",
" ax[i].title.set_text(title)\n",
" ax[i].set_xlim((0, pred_end))\n",
" for i in range(batch_size, r*c):\n",
Expand Down
2 changes: 1 addition & 1 deletion notebooks/kmeans_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@
"outputs": [],
"source": [
"%%time\n",
"cuml_score = adjusted_rand_score(host_labels, kmeans_cuml.labels_.to_array())\n",
"cuml_score = adjusted_rand_score(host_labels, kmeans_cuml.labels_.to_numpy())\n",
"sk_score = adjusted_rand_score(host_labels, kmeans_sk.labels_)"
]
},
Expand Down
4 changes: 2 additions & 2 deletions notebooks/nearest_neighbors_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@
"metadata": {},
"outputs": [],
"source": [
"passed = np.allclose(D_sk, D_cuml.as_gpu_matrix(), atol=1e-3)\n",
"passed = np.allclose(D_sk, D_cuml.to_numpy(), atol=1e-3)\n",
"print('compare knn: cuml vs sklearn distances %s'%('equal'if passed else 'NOT equal'))"
]
},
Expand All @@ -185,7 +185,7 @@
"outputs": [],
"source": [
"sk_sorted = np.sort(I_sk, axis=1)\n",
"cuml_sorted = np.sort(I_cuml.as_gpu_matrix(), axis=1)\n",
"cuml_sorted = np.sort(I_cuml.to_cupy(), axis=1)\n",
"\n",
"diff = sk_sorted - cuml_sorted\n",
"\n",
Expand Down
2 changes: 1 addition & 1 deletion notebooks/random_forest_mnmg_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@
"outputs": [],
"source": [
"skl_y_pred = skl_model.predict(X_test.get())\n",
"cuml_y_pred = cuml_model.predict(X_test_dask).compute().to_array()\n",
"cuml_y_pred = cuml_model.predict(X_test_dask).compute().to_numpy()\n",
"\n",
"# Due to randomness in the algorithm, you may see slight variation in accuracies\n",
"print(\"SKLearn accuracy: \", accuracy_score(y_test, skl_y_pred))\n",
Expand Down
6 changes: 3 additions & 3 deletions python/cuml/benchmark/bench_helper_funcs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2019, NVIDIA CORPORATION.
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -48,8 +48,8 @@ def _training_data_to_numpy(X, y):
X_np = X
y_np = y
elif isinstance(X, cudf.DataFrame):
X_np = X.as_gpu_matrix().copy_to_host()
y_np = y.to_gpu_array().copy_to_host()
X_np = X.to_numpy()
y_np = y.to_numpy()
elif cuda.devicearray.is_cuda_ndarray(X):
X_np = X.copy_to_host()
y_np = y.copy_to_host()
Expand Down
8 changes: 4 additions & 4 deletions python/cuml/benchmark/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,9 @@ def _convert_to_numpy(data):
elif isinstance(data, np.ndarray):
return data
elif isinstance(data, cudf.DataFrame):
return data.as_matrix()
return data.to_numpy()
elif isinstance(data, cudf.Series):
return data.to_array()
return data.to_numpy()
elif isinstance(data, (pd.DataFrame, pd.Series)):
return data.to_numpy()
else:
Expand Down Expand Up @@ -259,9 +259,9 @@ def _convert_to_scipy_sparse(data, input_type):
elif isinstance(data, np.ndarray):
return _sparsify_and_convert(data, input_type)
elif isinstance(data, cudf.DataFrame):
return _sparsify_and_convert(data.as_matrix(), input_type)
return _sparsify_and_convert(data.to_numpy(), input_type)
elif isinstance(data, cudf.Series):
return _sparsify_and_convert(data.to_array(), input_type)
return _sparsify_and_convert(data.to_numpy(), input_type)
elif isinstance(data, (pd.DataFrame, pd.Series)):
return _sparsify_and_convert(data.to_numpy(), input_type)
else:
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/benchmark/runners.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def _run_one_size(
else:
y_pred_cuml = cuml_model.transform(X_test)
if isinstance(y_pred_cuml, Series):
y_pred_cuml = y_pred_cuml.to_array()
y_pred_cuml = y_pred_cuml.to_numpy()
cuml_accuracy = algo_pair.accuracy_function(
y_test, y_pred_cuml
)
Expand Down
5 changes: 2 additions & 3 deletions python/cuml/common/input_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,10 +324,9 @@ def check_order(arr_order):

if isinstance(X, cudf.DataFrame):
if order == 'K':
X_m = CumlArray(data=X.as_gpu_matrix(order='F'),
index=index)
X_m = CumlArray(data=X.to_cupy(), index=index)
else:
X_m = CumlArray(data=X.as_gpu_matrix(order=order),
X_m = CumlArray(data=cp.array(X.to_cupy(), order=order),
index=index)

elif isinstance(X, CumlArray):
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/common/memory_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ def numba_row_matrix(df):

"""

col_major = df.as_gpu_matrix(order='F')
col_major = df.to_cupy()

row_major = cp.array(col_major, order='C')

Expand Down
2 changes: 1 addition & 1 deletion python/cuml/dask/common/dask_arr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def validate_dask_array(darray, client=None):

def _conv_df_to_sparse(x):
cupy_ary = rmm_cupy_ary(cp.asarray,
x.as_gpu_matrix(),
x.to_cupy(),
dtype=x.dtypes[0])

return cupyx.scipy.sparse.csr_matrix(cupy_ary)
Expand Down
6 changes: 3 additions & 3 deletions python/cuml/experimental/hyperparams/HPO_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -335,11 +335,11 @@
"outputs": [],
"source": [
"X_cpu = X_train.to_pandas()\n",
"y_cpu = y_train.label.to_array()\n",
"y_cpu = y_train.label.to_numpy()\n",
"\n",
"\n",
"X_test_cpu = X_test.to_pandas()\n",
"y_test_cpu = y_test.label.to_array()"
"y_test_cpu = y_test.label.to_numpy()"
]
},
{
Expand Down Expand Up @@ -1028,4 +1028,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
}
2 changes: 1 addition & 1 deletion python/cuml/linear_model/mbsgd_classifier.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class MBSGDClassifier(Base,
loss='squared_loss',
alpha=0.5)
cu_mbsgd_classifier.fit(X, y)
cu_pred = cu_mbsgd_classifier.predict(pred_data).to_array()
cu_pred = cu_mbsgd_classifier.predict(pred_data).to_numpy()
print(" cuML intercept : ", cu_mbsgd_classifier.intercept_)
print(" cuML coef : ", cu_mbsgd_classifier.coef_)
print("cuML predictions : ", cu_pred)
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/linear_model/mbsgd_regressor.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class MBSGDRegressor(Base,
loss='squared_loss',
alpha=0.5)
cu_mbsgd_regressor.fit(X, y)
cu_pred = cu_mbsgd_regressor.predict(pred_data).to_array()
cu_pred = cu_mbsgd_regressor.predict(pred_data).to_numpy()
print(" cuML intercept : ", cu_mbsgd_regressor.intercept_)
print(" cuML coef : ", cu_mbsgd_regressor.coef_)
print("cuML predictions : ", cu_pred)
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/preprocessing/encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ def inverse_transform(self, X):
j += enc_size
if self.input_type == 'array':
try:
result = cp.asarray(result.as_gpu_matrix())
result = result.to_cupy()
except ValueError:
warnings.warn("The input one hot encoding contains rows with "
"unknown categories. Since device arrays do not "
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/solvers/sgd.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ class SGD(Base,
fit_intercept=True, batch_size=2,
tol=0.0, penalty='none', loss='squared_loss')
cu_sgd.fit(X, y)
cu_pred = cu_sgd.predict(pred_data).to_array()
cu_pred = cu_sgd.predict(pred_data).to_numpy()
print(" cuML intercept : ", cu_sgd.intercept_)
print(" cuML coef : ", cu_sgd.coef_)
print("cuML predictions : ", cu_pred)
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/test/dask/test_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def test_transform(nrows, ncols, nclusters, n_parts, input_type, client):
if input_type == "dataframe":
xformed = cp.array(xformed
if len(xformed.shape) == 1
else xformed.as_gpu_matrix())
else xformed.to_cupy())

if nclusters == 1:
# series shape is (nrows,) not (nrows, 1) but both are valid
Expand Down
6 changes: 3 additions & 3 deletions python/cuml/test/dask/test_kneighbors_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def test_predict_and_score(dataset, datatype, parameters, client):
d_outputs = d_model.predict(X_test, convert_dtype=True)
d_outputs = d_outputs.compute()

d_outputs = d_outputs.as_matrix() \
d_outputs = d_outputs.to_numpy() \
if isinstance(d_outputs, DataFrame) \
else d_outputs

Expand Down Expand Up @@ -164,9 +164,9 @@ def test_predict_proba(dataset, datatype, parameters, client):
d_probas = da.compute(d_probas)[0]

if datatype == 'dask_cudf':
d_probas = list(map(lambda o: o.as_matrix()
d_probas = list(map(lambda o: o.to_numpy()
if isinstance(o, DataFrame)
else o.to_array()[..., np.newaxis],
else o.to_numpy()[..., np.newaxis],
d_probas))

check_probabilities(l_probas, d_probas)
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/test/dask/test_kneighbors_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def test_predict_and_score(dataset, datatype, parameters, client):
d_outputs = d_model.predict(X_test, convert_dtype=True)
d_outputs = d_outputs.compute()

d_outputs = d_outputs.as_matrix() \
d_outputs = d_outputs.to_numpy() \
if isinstance(d_outputs, DataFrame) \
else d_outputs

Expand Down
8 changes: 4 additions & 4 deletions python/cuml/test/dask/test_label_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ def test_labelencoder_fit_transform(length, cardinality, client):
df = dask_cudf.from_cudf(tmp, npartitions=len(client.has_what()))
encoded = cuml.dask.preprocessing.LabelEncoder().fit_transform(df)

df_arr = df.compute().to_array()
df_arr = df.compute().to_numpy()
df_arr = _arr_to_similarity_mat(df_arr)
encoder_arr = cp.asnumpy(encoded.compute().to_array())
encoder_arr = cp.asnumpy(encoded.compute().to_numpy())
encoded_arr = _arr_to_similarity_mat(encoder_arr)
assert ((encoded_arr == encoded_arr.T) == (df_arr == df_arr.T)).all()

Expand All @@ -54,9 +54,9 @@ def test_labelencoder_transform(length, cardinality, client):

encoded = le.transform(df)

df_arr = df.compute().to_array()
df_arr = df.compute().to_numpy()
df_arr = _arr_to_similarity_mat(df_arr)
encoder_arr = cp.asnumpy(encoded.compute().to_array())
encoder_arr = cp.asnumpy(encoded.compute().to_numpy())
encoded_arr = _arr_to_similarity_mat(encoder_arr)
assert (
(encoded_arr == encoded_arr.T) == (df_arr == df_arr.T)
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/test/dask/test_nearest_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def test_compare_skl(nrows, ncols, nclusters, n_parts, n_neighbors,

out_d, out_i = cumlModel.kneighbors(X_cudf)

local_i = np.array(out_i.compute().as_gpu_matrix(), dtype="int64")
local_i = np.array(out_i.compute().to_numpy(), dtype="int64")

sklModel = KNeighborsClassifier(n_neighbors=n_neighbors).fit(X, y)
skl_y_hat = sklModel.predict(X)
Expand Down Expand Up @@ -167,7 +167,7 @@ def test_batch_size(nrows, ncols, n_parts,

out_d, out_i = cumlModel.kneighbors(X_cudf)

local_i = np.array(out_i.compute().as_gpu_matrix())
local_i = out_i.compute().to_numpy()

y_hat, _ = predict(local_i, y, n_neighbors)

Expand Down
6 changes: 3 additions & 3 deletions python/cuml/test/dask/test_one_hot_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@
from pandas.testing import assert_frame_equal
from cuml.test.test_one_hot_encoder import generate_inputs_from_categories
from cuml.test.test_one_hot_encoder import assert_inverse_equal
from cuml.test.test_one_hot_encoder import from_df_to_array
from cuml.test.test_one_hot_encoder import from_df_to_numpy


@pytest.mark.mg
def test_onehot_vs_skonehot(client):
X = DataFrame({'gender': ['Male', 'Female', 'Female'], 'int': [1, 3, 2]})
skX = from_df_to_array(X)
skX = from_df_to_numpy(X)
X = dask_cudf.from_cudf(X, npartitions=2)

enc = OneHotEncoder(sparse=False)
Expand Down Expand Up @@ -207,4 +207,4 @@ def test_onehot_get_categories(client):
cats = enc.categories_

for i in range(len(ref)):
np.testing.assert_array_equal(ref[i], cats[i].to_array())
np.testing.assert_array_equal(ref[i], cats[i].to_numpy())
2 changes: 1 addition & 1 deletion python/cuml/test/dask/test_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def test_pca_fit(nrows, ncols, n_parts, input_type, client):
with_sign = False if attr in ['components_'] else True
cuml_res = (getattr(cupca, attr))
if type(cuml_res) == np.ndarray:
cuml_res = cuml_res.as_matrix()
cuml_res = cuml_res.to_numpy()
skl_res = getattr(skpca, attr)
assert array_equal(cuml_res, skl_res, 1e-1, with_sign=with_sign)

Expand Down
8 changes: 4 additions & 4 deletions python/cuml/test/dask/test_random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,9 +275,9 @@ def test_rf_classification_dask_fil_predict_proba(partitions_per_worker,
cu_rf_mg.fit(X_train_df, y_train_df)

fil_preds = cu_rf_mg.predict(X_test_df).compute()
fil_preds = fil_preds.to_array()
fil_preds = fil_preds.to_numpy()
fil_preds_proba = cu_rf_mg.predict_proba(X_test_df).compute()
fil_preds_proba = cp.asnumpy(fil_preds_proba.as_gpu_matrix())
fil_preds_proba = fil_preds_proba.to_numpy()
np.testing.assert_equal(fil_preds, np.argmax(fil_preds_proba, axis=1))

y_proba = np.zeros(np.shape(fil_preds_proba))
Expand Down Expand Up @@ -426,13 +426,13 @@ def predict_with_json_rf_regressor(rf, x):

if estimator_type == 'classification':
expected_pred = cu_rf_mg.predict(X_dask).astype(np.int32)
expected_pred = expected_pred.compute().to_array()
expected_pred = expected_pred.compute().to_numpy()
for idx, row in enumerate(X):
majority_vote = predict_with_json_rf_classifier(json_obj, row)
assert expected_pred[idx] == majority_vote
elif estimator_type == 'regression':
expected_pred = cu_rf_mg.predict(X_dask).astype(np.float32)
expected_pred = expected_pred.compute().to_array()
expected_pred = expected_pred.compute().to_numpy()
pred = []
for idx, row in enumerate(X):
pred.append(predict_with_json_rf_regressor(json_obj, row))
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/test/dask/test_tsvd.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def test_pca_fit(data_info, input_type, client):
with_sign = False if attr in ['components_'] else True
cuml_res = (getattr(cutsvd, attr))
if type(cuml_res) == np.ndarray:
cuml_res = cuml_res.as_matrix()
cuml_res = cuml_res.to_numpy()
skl_res = getattr(sktsvd, attr)
if attr == 'singular_values_':
assert array_equal(cuml_res, skl_res, 1, with_sign=with_sign)
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/test/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,14 +359,14 @@ def test_output(output_type, dtype, out_dtype, order, shape):

elif output_type == 'series':
comp = cudf.Series(np.ravel(inp)) == res
assert np.all(comp.to_array())
assert np.all(comp.to_numpy())

elif output_type == 'dataframe':
if len(inp.shape) == 1:
inp = inp.reshape(inp.shape[0], 1)
comp = cudf.DataFrame(inp)
comp = comp == res
assert np.all(comp.as_gpu_matrix().copy_to_host())
assert np.all(comp.to_numpy())

# check for e2e cartesian product:
if output_type not in ['dataframe', 'cudf']:
Expand Down
Loading