From 238a8decfe6f5196ab2e1b24b131c12a97b90d09 Mon Sep 17 00:00:00 2001 From: Tyler Thomas <36181311+tylerjthomas9@users.noreply.github.com> Date: Tue, 17 Nov 2020 14:56:29 -0600 Subject: [PATCH] [REVIEW] Fix Stochastic Gradient Descent Example (#3136) * Fix Stochastic Gradient Descent Example The example that is currently in the docs does not run. dtype, penalty, lrate, loss are not defined. This new version sets the default values for the parameters of cumlSGD, and copies Mini Batch SGD Regression's dtype for pred_data['col1'], pred_data['col2']. When running this example, I also got slightly different values for the output, so these were also updated. * Added PR #3136 to 0.17 Bug Fixes --- CHANGELOG.md | 1 + python/cuml/solvers/sgd.pyx | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c65f2758a6..1f8bbc6f76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,7 @@ - PR #3117: Fix two crashes in experimental RF backend - PR #3119: Fix memset args for benchmark - PR #3130: Return Python string from `dump_as_json()` of RF +- PR #3136: Fix stochastic gradient descent example # cuML 0.16.0 (Date TBD) diff --git a/python/cuml/solvers/sgd.pyx b/python/cuml/solvers/sgd.pyx index 091fada5ed..8c968b2d1b 100644 --- a/python/cuml/solvers/sgd.pyx +++ b/python/cuml/solvers/sgd.pyx @@ -137,15 +137,15 @@ class SGD(Base): import cudf from cuml.solvers import SGD as cumlSGD X = cudf.DataFrame() - X['col1'] = np.array([1,1,2,2], dtype = np.float32) - X['col2'] = np.array([1,2,2,3], dtype = np.float32) + X['col1'] = np.array([1,1,2,2], dtype=np.float32) + X['col2'] = np.array([1,2,2,3], dtype=np.float32) y = cudf.Series(np.array([1, 1, 2, 2], dtype=np.float32)) pred_data = cudf.DataFrame() - pred_data['col1'] = np.asarray([3, 2], dtype=dtype) - pred_data['col2'] = np.asarray([5, 5], dtype=dtype) - cu_sgd = cumlSGD(learning_rate=lrate, eta0=0.005, epochs=2000, + pred_data['col1'] = np.asarray([3, 2], dtype=np.float32) + pred_data['col2'] = np.asarray([5, 5], dtype=np.float32) + cu_sgd = cumlSGD(learning_rate='constant', eta0=0.005, epochs=2000, fit_intercept=True, batch_size=2, - tol=0.0, penalty=penalty, loss=loss) + tol=0.0, penalty='none', loss='squared_loss') cu_sgd.fit(X, y) cu_pred = cu_sgd.predict(pred_data).to_array() print(" cuML intercept : ", cu_sgd.intercept_) @@ -156,11 +156,11 @@ class SGD(Base): .. code-block:: python - cuML intercept : 0.004561662673950195 - cuML coef : 0 0.9834546 - 1 0.010128272 - dtype: float32 - cuML predictions : [3.0055666 2.0221121] + cuML intercept : 0.0041877031326293945 + cuML coef : 0 0.984174 + 1 0.009776 + dtype: float32 + cuML predictions : [3.005588 2.0214138] Parameters