Skip to content

Commit

Permalink
FIX correct and adjust tolerances of mnmg logreg pytests
Browse files Browse the repository at this point in the history
  • Loading branch information
dantegd committed Mar 20, 2024
1 parent 334d796 commit 88d659e
Showing 1 changed file with 63 additions and 11 deletions.
74 changes: 63 additions & 11 deletions python/cuml/tests/dask/test_dask_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,9 +394,19 @@ def array_to_numpy(ary):

if sk_solver == "lbfgs" and standardization is False:
assert len(lr_coef) == len(sk_coef)
assert array_equal(lr_coef, sk_coef, tolerance, with_sign=True)
assert array_equal(
lr_intercept, sk_intercept, tolerance, with_sign=True
lr_coef,
sk_coef,
unit_tol=tolerance,
total_tol=tolerance,
with_sign=True,
)
assert array_equal(
lr_intercept,
sk_intercept,
unit_tol=tolerance,
total_tol=tolerance,
with_sign=True,
)

# test predict
Expand Down Expand Up @@ -777,7 +787,12 @@ def to_dask_data(X_train, X_test, y_train, y_test):
)
mgon_accuracy = accuracy_score(y_test, mgon_preds)

assert array_equal(X_train_dask.compute().to_numpy(), X_train)
assert array_equal(
X_train_dask.compute().to_numpy(),
X_train,
unit_tol=tolerance,
total_tol=tolerance,
)

# run CPU with StandardScaler
# if fit_intercept is true, mean center then scale the dataset
Expand Down Expand Up @@ -814,8 +829,18 @@ def to_dask_data(X_train, X_test, y_train, y_test):
mgon_intercept_origin = mgon.intercept_.to_numpy()

if sk_solver == "lbfgs":
assert array_equal(mgon_coef_origin, cpu.coef_, tolerance)
assert array_equal(mgon_intercept_origin, cpu.intercept_, tolerance)
assert array_equal(
mgon_coef_origin,
cpu.coef_,
unit_tol=tolerance,
total_tol=tolerance,
)
assert array_equal(
mgon_intercept_origin,
cpu.intercept_,
unit_tol=tolerance,
total_tol=tolerance,
)

# running MG with standardization=False
mgoff = cumlLBFGS_dask(
Expand Down Expand Up @@ -846,9 +871,17 @@ def to_dask_data(X_train, X_test, y_train, y_test):
print(f"mgoff.coef_: {mgoff.coef_.to_numpy()}")
print(f"mgon_intercept_origin: {mgon_intercept_origin}")
print(f"mgoff.intercept_: {mgoff.intercept_.to_numpy()}")
assert array_equal(mgon_coef_origin, mgoff.coef_.to_numpy(), tolerance)
assert array_equal(
mgon_intercept_origin, mgoff.intercept_.to_numpy(), tolerance
mgon_coef_origin,
mgoff.coef_.to_numpy(),
unit_tol=tolerance,
total_tol=tolerance,
)
assert array_equal(
mgon_intercept_origin,
mgoff.intercept_.to_numpy(),
unit_tol=tolerance,
total_tol=tolerance,
)


Expand Down Expand Up @@ -885,6 +918,8 @@ def test_standardization_example(fit_intercept, regularization, client):
"max_iter": max_iter,
}

tolerance = 0.005

X, y = make_classification_dataset(
datatype, n_rows, n_cols, n_info, n_classes=n_classes
)
Expand Down Expand Up @@ -914,13 +949,30 @@ def test_standardization_example(fit_intercept, regularization, client):
lr_off = cumlLBFGS_dask(standardization=False, **est_params)
lr_off.fit(X_df_scaled, y_df)

assert array_equal(lron_coef_origin, lr_off.coef_.to_numpy())
assert array_equal(lron_intercept_origin, lr_off.intercept_.to_numpy())
assert array_equal(
lron_coef_origin,
lr_off.coef_.to_numpy(),
unit_tol=tolerance,
total_tol=tolerance,
)
assert array_equal(
lron_intercept_origin,
lr_off.intercept_.to_numpy(),
unit_tol=tolerance,
total_tol=tolerance,
)

from cuml.linear_model import LogisticRegression as SG

sg = SG(**est_params)
sg.fit(X_scaled, y)

assert array_equal(lron_coef_origin, sg.coef_)
assert array_equal(lron_intercept_origin, sg.intercept_)
assert array_equal(
lron_coef_origin, sg.coef_, unit_tol=tolerance, total_tol=tolerance
)
assert array_equal(
lron_intercept_origin,
sg.intercept_,
unit_tol=tolerance,
total_tol=tolerance,
)

0 comments on commit 88d659e

Please sign in to comment.