rapidsai · rapids-bot · Nov 17, 2021 · Oct 18, 2021 · Oct 19, 2021 · Oct 19, 2021
diff --git a/notebooks/arima_demo.ipynb b/notebooks/arima_demo.ipynb
@@ -120,15 +120,15 @@
     "        title = y.columns[i]\n",
     "        if pred is not None:\n",
     "            ax[i].plot(np.r_[pred_start:pred_end],\n",
-    "                       pred[pred.columns[i]].to_array(),\n",
+    "                       pred[pred.columns[i]].to_numpy(),\n",
     "                       linestyle=\"--\", color=col[1])\n",
     "        # Prediction intervals\n",
     "        if lower is not None and upper is not None:\n",
     "            ax[i].fill_between(np.r_[pred_start:pred_end],\n",
-    "                               lower[lower.columns[i]].to_array(),\n",
-    "                               upper[upper.columns[i]].to_array(),\n",
+    "                               lower[lower.columns[i]].to_numpy(),\n",
+    "                               upper[upper.columns[i]].to_numpy(),\n",
     "                               alpha=0.2, color=col[1])\n",
-    "        ax[i].plot(np.r_[:n_obs], y[title].to_array(), color=col[0])\n",
+    "        ax[i].plot(np.r_[:n_obs], y[title].to_numpy(), color=col[0])\n",
     "        ax[i].title.set_text(title)\n",
     "        ax[i].set_xlim((0, pred_end))\n",
     "    for i in range(batch_size, r*c):\n",

diff --git a/notebooks/kmeans_demo.ipynb b/notebooks/kmeans_demo.ipynb
@@ -193,7 +193,7 @@
    "outputs": [],
    "source": [
     "%%time\n",
-    "cuml_score = adjusted_rand_score(host_labels, kmeans_cuml.labels_.to_array())\n",
+    "cuml_score = adjusted_rand_score(host_labels, kmeans_cuml.labels_.to_numpy())\n",
     "sk_score = adjusted_rand_score(host_labels, kmeans_sk.labels_)"
    ]
   },

diff --git a/notebooks/nearest_neighbors_demo.ipynb b/notebooks/nearest_neighbors_demo.ipynb
@@ -167,7 +167,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "passed = np.allclose(D_sk, D_cuml.as_gpu_matrix(), atol=1e-3)\n",
+    "passed = np.allclose(D_sk, D_cuml.to_numpy(), atol=1e-3)\n",
     "print('compare knn: cuml vs sklearn distances %s'%('equal'if passed else 'NOT equal'))"
    ]
   },
@@ -185,7 +185,7 @@
    "outputs": [],
    "source": [
     "sk_sorted = np.sort(I_sk, axis=1)\n",
-    "cuml_sorted = np.sort(I_cuml.as_gpu_matrix(), axis=1)\n",
+    "cuml_sorted = np.sort(I_cuml.to_cupy(), axis=1)\n",
     "\n",
     "diff = sk_sorted - cuml_sorted\n",
     "\n",

diff --git a/notebooks/random_forest_mnmg_demo.ipynb b/notebooks/random_forest_mnmg_demo.ipynb
@@ -208,7 +208,7 @@
    "outputs": [],
    "source": [
     "skl_y_pred = skl_model.predict(X_test.get())\n",
-    "cuml_y_pred = cuml_model.predict(X_test_dask).compute().to_array()\n",
+    "cuml_y_pred = cuml_model.predict(X_test_dask).compute().to_numpy()\n",
     "\n",
     "# Due to randomness in the algorithm, you may see slight variation in accuracies\n",
     "print(\"SKLearn accuracy:  \", accuracy_score(y_test, skl_y_pred))\n",

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2019, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -48,8 +48,8 @@ def _training_data_to_numpy(X, y):
         X_np = X
         y_np = y
     elif isinstance(X, cudf.DataFrame):
-        X_np = X.as_gpu_matrix().copy_to_host()
-        y_np = y.to_gpu_array().copy_to_host()
+        X_np = X.to_numpy()
+        y_np = y.to_numpy()
     elif cuda.devicearray.is_cuda_ndarray(X):
         X_np = X.copy_to_host()
         y_np = y.copy_to_host()

@@ -174,9 +174,9 @@ def _convert_to_numpy(data):
     elif isinstance(data, np.ndarray):
         return data
     elif isinstance(data, cudf.DataFrame):
-        return data.as_matrix()
+        return data.to_numpy()
     elif isinstance(data, cudf.Series):
-        return data.to_array()
+        return data.to_numpy()
     elif isinstance(data, (pd.DataFrame, pd.Series)):
         return data.to_numpy()
     else:
@@ -259,9 +259,9 @@ def _convert_to_scipy_sparse(data, input_type):
     elif isinstance(data, np.ndarray):
         return _sparsify_and_convert(data, input_type)
     elif isinstance(data, cudf.DataFrame):
-        return _sparsify_and_convert(data.as_matrix(), input_type)
+        return _sparsify_and_convert(data.to_numpy(), input_type)
     elif isinstance(data, cudf.Series):
-        return _sparsify_and_convert(data.to_array(), input_type)
+        return _sparsify_and_convert(data.to_numpy(), input_type)
     elif isinstance(data, (pd.DataFrame, pd.Series)):
         return _sparsify_and_convert(data.to_numpy(), input_type)
     else:

@@ -248,7 +248,7 @@ def _run_one_size(
             else:
                 y_pred_cuml = cuml_model.transform(X_test)
             if isinstance(y_pred_cuml, Series):
-                y_pred_cuml = y_pred_cuml.to_array()
+                y_pred_cuml = y_pred_cuml.to_numpy()
             cuml_accuracy = algo_pair.accuracy_function(
                 y_test, y_pred_cuml
             )

@@ -324,10 +324,9 @@ def check_order(arr_order):
 
     if isinstance(X, cudf.DataFrame):
         if order == 'K':
-            X_m = CumlArray(data=X.as_gpu_matrix(order='F'),
-                            index=index)
+            X_m = CumlArray(data=X.to_cupy(), index=index)
         else:
-            X_m = CumlArray(data=X.as_gpu_matrix(order=order),
+            X_m = CumlArray(data=cp.array(X.to_cupy(), order=order),
                             index=index)
 
     elif isinstance(X, CumlArray):

@@ -526,7 +526,7 @@ def numba_row_matrix(df):
 
     """
 
-    col_major = df.as_gpu_matrix(order='F')
+    col_major = df.to_cupy()
 
     row_major = cp.array(col_major, order='C')
 

@@ -40,7 +40,7 @@ def validate_dask_array(darray, client=None):
 
 def _conv_df_to_sparse(x):
     cupy_ary = rmm_cupy_ary(cp.asarray,
-                            x.as_gpu_matrix(),
+                            x.to_cupy(),
                             dtype=x.dtypes[0])
 
     return cupyx.scipy.sparse.csr_matrix(cupy_ary)

@@ -335,11 +335,11 @@
    "outputs": [],
    "source": [
     "X_cpu = X_train.to_pandas()\n",
-    "y_cpu = y_train.label.to_array()\n",
+    "y_cpu = y_train.label.to_numpy()\n",
     "\n",
     "\n",
     "X_test_cpu = X_test.to_pandas()\n",
-    "y_test_cpu = y_test.label.to_array()"
+    "y_test_cpu = y_test.label.to_numpy()"
    ]
   },
   {
@@ -1028,4 +1028,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
@@ -64,7 +64,7 @@ class MBSGDClassifier(Base,
                                                  loss='squared_loss',
                                                  alpha=0.5)
         cu_mbsgd_classifier.fit(X, y)
-        cu_pred = cu_mbsgd_classifier.predict(pred_data).to_array()
+        cu_pred = cu_mbsgd_classifier.predict(pred_data).to_numpy()
         print(" cuML intercept : ", cu_mbsgd_classifier.intercept_)
         print(" cuML coef : ", cu_mbsgd_classifier.coef_)
         print("cuML predictions : ", cu_pred)

@@ -64,7 +64,7 @@ class MBSGDRegressor(Base,
                                                 loss='squared_loss',
                                                 alpha=0.5)
         cu_mbsgd_regressor.fit(X, y)
-        cu_pred = cu_mbsgd_regressor.predict(pred_data).to_array()
+        cu_pred = cu_mbsgd_regressor.predict(pred_data).to_numpy()
         print(" cuML intercept : ", cu_mbsgd_regressor.intercept_)
         print(" cuML coef : ", cu_mbsgd_regressor.coef_)
         print("cuML predictions : ", cu_pred)

@@ -451,7 +451,7 @@ def inverse_transform(self, X):
             j += enc_size
         if self.input_type == 'array':
             try:
-                result = cp.asarray(result.as_gpu_matrix())
+                result = result.to_cupy()
             except ValueError:
                 warnings.warn("The input one hot encoding contains rows with "
                               "unknown categories. Since device arrays do not "

@@ -149,7 +149,7 @@ class SGD(Base,
                         fit_intercept=True, batch_size=2,
                         tol=0.0, penalty='none', loss='squared_loss')
         cu_sgd.fit(X, y)
-        cu_pred = cu_sgd.predict(pred_data).to_array()
+        cu_pred = cu_sgd.predict(pred_data).to_numpy()
         print(" cuML intercept : ", cu_sgd.intercept_)
         print(" cuML coef : ", cu_sgd.coef_)
         print("cuML predictions : ", cu_pred)

@@ -196,7 +196,7 @@ def test_transform(nrows, ncols, nclusters, n_parts, input_type, client):
     if input_type == "dataframe":
         xformed = cp.array(xformed
                            if len(xformed.shape) == 1
-                           else xformed.as_gpu_matrix())
+                           else xformed.to_cupy())
 
     if nclusters == 1:
         # series shape is (nrows,) not (nrows, 1) but both are valid

@@ -126,7 +126,7 @@ def test_predict_and_score(dataset, datatype, parameters, client):
     d_outputs = d_model.predict(X_test, convert_dtype=True)
     d_outputs = d_outputs.compute()
 
-    d_outputs = d_outputs.as_matrix() \
+    d_outputs = d_outputs.to_numpy() \
         if isinstance(d_outputs, DataFrame) \
         else d_outputs
 
@@ -164,9 +164,9 @@ def test_predict_proba(dataset, datatype, parameters, client):
     d_probas = da.compute(d_probas)[0]
 
     if datatype == 'dask_cudf':
-        d_probas = list(map(lambda o: o.as_matrix()
+        d_probas = list(map(lambda o: o.to_numpy()
                             if isinstance(o, DataFrame)
-                            else o.to_array()[..., np.newaxis],
+                            else o.to_numpy()[..., np.newaxis],
                             d_probas))
 
     check_probabilities(l_probas, d_probas)

@@ -120,7 +120,7 @@ def test_predict_and_score(dataset, datatype, parameters, client):
     d_outputs = d_model.predict(X_test, convert_dtype=True)
     d_outputs = d_outputs.compute()
 
-    d_outputs = d_outputs.as_matrix() \
+    d_outputs = d_outputs.to_numpy() \
         if isinstance(d_outputs, DataFrame) \
         else d_outputs
 

@@ -35,9 +35,9 @@ def test_labelencoder_fit_transform(length, cardinality, client):
     df = dask_cudf.from_cudf(tmp, npartitions=len(client.has_what()))
     encoded = cuml.dask.preprocessing.LabelEncoder().fit_transform(df)
 
-    df_arr = df.compute().to_array()
+    df_arr = df.compute().to_numpy()
     df_arr = _arr_to_similarity_mat(df_arr)
-    encoder_arr = cp.asnumpy(encoded.compute().to_array())
+    encoder_arr = cp.asnumpy(encoded.compute().to_numpy())
     encoded_arr = _arr_to_similarity_mat(encoder_arr)
     assert ((encoded_arr == encoded_arr.T) == (df_arr == df_arr.T)).all()
 
@@ -54,9 +54,9 @@ def test_labelencoder_transform(length, cardinality, client):
 
     encoded = le.transform(df)
 
-    df_arr = df.compute().to_array()
+    df_arr = df.compute().to_numpy()
     df_arr = _arr_to_similarity_mat(df_arr)
-    encoder_arr = cp.asnumpy(encoded.compute().to_array())
+    encoder_arr = cp.asnumpy(encoded.compute().to_numpy())
     encoded_arr = _arr_to_similarity_mat(encoder_arr)
     assert (
         (encoded_arr == encoded_arr.T) == (df_arr == df_arr.T)

@@ -113,7 +113,7 @@ def test_compare_skl(nrows, ncols, nclusters, n_parts, n_neighbors,
 
     out_d, out_i = cumlModel.kneighbors(X_cudf)
 
-    local_i = np.array(out_i.compute().as_gpu_matrix(), dtype="int64")
+    local_i = np.array(out_i.compute().to_numpy(), dtype="int64")
 
     sklModel = KNeighborsClassifier(n_neighbors=n_neighbors).fit(X, y)
     skl_y_hat = sklModel.predict(X)
@@ -167,7 +167,7 @@ def test_batch_size(nrows, ncols, n_parts,
 
     out_d, out_i = cumlModel.kneighbors(X_cudf)
 
-    local_i = np.array(out_i.compute().as_gpu_matrix())
+    local_i = out_i.compute().to_numpy()
 
     y_hat, _ = predict(local_i, y, n_neighbors)
 

@@ -24,13 +24,13 @@
 from pandas.testing import assert_frame_equal
 from cuml.test.test_one_hot_encoder import generate_inputs_from_categories
 from cuml.test.test_one_hot_encoder import assert_inverse_equal
-from cuml.test.test_one_hot_encoder import from_df_to_array
+from cuml.test.test_one_hot_encoder import from_df_to_numpy
 
 
 @pytest.mark.mg
 def test_onehot_vs_skonehot(client):
     X = DataFrame({'gender': ['Male', 'Female', 'Female'], 'int': [1, 3, 2]})
-    skX = from_df_to_array(X)
+    skX = from_df_to_numpy(X)
     X = dask_cudf.from_cudf(X, npartitions=2)
 
     enc = OneHotEncoder(sparse=False)
@@ -207,4 +207,4 @@ def test_onehot_get_categories(client):
     cats = enc.categories_
 
     for i in range(len(ref)):
-        np.testing.assert_array_equal(ref[i], cats[i].to_array())
+        np.testing.assert_array_equal(ref[i], cats[i].to_numpy())
@@ -65,7 +65,7 @@ def test_pca_fit(nrows, ncols, n_parts, input_type, client):
         with_sign = False if attr in ['components_'] else True
         cuml_res = (getattr(cupca, attr))
         if type(cuml_res) == np.ndarray:
-            cuml_res = cuml_res.as_matrix()
+            cuml_res = cuml_res.to_numpy()
         skl_res = getattr(skpca, attr)
         assert array_equal(cuml_res, skl_res, 1e-1, with_sign=with_sign)
 

@@ -275,9 +275,9 @@ def test_rf_classification_dask_fil_predict_proba(partitions_per_worker,
     cu_rf_mg.fit(X_train_df, y_train_df)
 
     fil_preds = cu_rf_mg.predict(X_test_df).compute()
-    fil_preds = fil_preds.to_array()
+    fil_preds = fil_preds.to_numpy()
     fil_preds_proba = cu_rf_mg.predict_proba(X_test_df).compute()
-    fil_preds_proba = cp.asnumpy(fil_preds_proba.as_gpu_matrix())
+    fil_preds_proba = fil_preds_proba.to_numpy()
     np.testing.assert_equal(fil_preds, np.argmax(fil_preds_proba, axis=1))
 
     y_proba = np.zeros(np.shape(fil_preds_proba))
@@ -426,13 +426,13 @@ def predict_with_json_rf_regressor(rf, x):
 
     if estimator_type == 'classification':
         expected_pred = cu_rf_mg.predict(X_dask).astype(np.int32)
-        expected_pred = expected_pred.compute().to_array()
+        expected_pred = expected_pred.compute().to_numpy()
         for idx, row in enumerate(X):
             majority_vote = predict_with_json_rf_classifier(json_obj, row)
             assert expected_pred[idx] == majority_vote
     elif estimator_type == 'regression':
         expected_pred = cu_rf_mg.predict(X_dask).astype(np.float32)
-        expected_pred = expected_pred.compute().to_array()
+        expected_pred = expected_pred.compute().to_numpy()
         pred = []
         for idx, row in enumerate(X):
             pred.append(predict_with_json_rf_regressor(json_obj, row))

@@ -70,7 +70,7 @@ def test_pca_fit(data_info, input_type, client):
         with_sign = False if attr in ['components_'] else True
         cuml_res = (getattr(cutsvd, attr))
         if type(cuml_res) == np.ndarray:
-            cuml_res = cuml_res.as_matrix()
+            cuml_res = cuml_res.to_numpy()
         skl_res = getattr(sktsvd, attr)
         if attr == 'singular_values_':
             assert array_equal(cuml_res, skl_res, 1, with_sign=with_sign)

@@ -359,14 +359,14 @@ def test_output(output_type, dtype, out_dtype, order, shape):
 
         elif output_type == 'series':
             comp = cudf.Series(np.ravel(inp)) == res
-            assert np.all(comp.to_array())
+            assert np.all(comp.to_numpy())
 
         elif output_type == 'dataframe':
             if len(inp.shape) == 1:
                 inp = inp.reshape(inp.shape[0], 1)
             comp = cudf.DataFrame(inp)
             comp = comp == res
-            assert np.all(comp.as_gpu_matrix().copy_to_host())
+            assert np.all(comp.to_numpy())
 
         # check for e2e cartesian product:
         if output_type not in ['dataframe', 'cudf']:
-Original file line number
+Diff line change
@@ Expand Up / @@ -526,7 +526,7 @@ def numba_row_matrix(df): @@
         """
-        col_major = df.as_gpu_matrix(order='F')
+        col_major = df.to_cupy()
         row_major = cp.array(col_major, order='C')
@@ Expand Down @@