From fb56a1d90965bf07d4a86ffc01ee4cc62ba410cf Mon Sep 17 00:00:00 2001 From: Nick Becker Date: Mon, 1 Mar 2021 07:36:44 -0800 Subject: [PATCH] switch cup_conf_mat to confusion_matrix --- gpu_bdb/bdb_tools/cupy_metrics.py | 26 --------------------- gpu_bdb/queries/q05/gpu_bdb_query_05.py | 7 ++++-- gpu_bdb/queries/q05/gpu_bdb_query_05_sql.py | 7 ++++-- 3 files changed, 10 insertions(+), 30 deletions(-) diff --git a/gpu_bdb/bdb_tools/cupy_metrics.py b/gpu_bdb/bdb_tools/cupy_metrics.py index 65c1c8c3..534e680a 100755 --- a/gpu_bdb/bdb_tools/cupy_metrics.py +++ b/gpu_bdb/bdb_tools/cupy_metrics.py @@ -17,32 +17,6 @@ import cupy as cp -def cupy_conf_mat(y, y_pred): - """ - Simple, fast confusion matrix for two class models designed to match sklearn. - Assumes the classes are one of [0, 1]. It will fail edge cases, which are fairly - numerous. - - Implementation taken from rapidsai/cuml#1524 - """ - nclasses = len(cp.unique(y)) - assert nclasses == 2 - res = cp.zeros((2, 2)) - - pos_pred_ix = cp.where(y_pred == 1) - neg_pred_ix = cp.where(y_pred != 1) - tn_sum = (y[neg_pred_ix] == 0).sum() - fn_sum = (y[neg_pred_ix] == 1).sum() - tp_sum = (y[pos_pred_ix] == 1).sum() - fp_sum = (y[pos_pred_ix] == 0).sum() - - res[0, 0] = tn_sum - res[1, 0] = fn_sum - res[0, 1] = fp_sum - res[1, 1] = tp_sum - return res - - def cupy_precision_score(y, y_pred): """ Simple precision score method for two class models. diff --git a/gpu_bdb/queries/q05/gpu_bdb_query_05.py b/gpu_bdb/queries/q05/gpu_bdb_query_05.py index 7a01b558..290cf127 100755 --- a/gpu_bdb/queries/q05/gpu_bdb_query_05.py +++ b/gpu_bdb/queries/q05/gpu_bdb_query_05.py @@ -25,7 +25,7 @@ ) from bdb_tools.readers import build_reader -from bdb_tools.cupy_metrics import cupy_conf_mat, cupy_precision_score +from bdb_tools.cupy_metrics import cupy_precision_score import cupy as cp import numpy as np @@ -78,6 +78,7 @@ def build_and_predict_model(ml_input_df): Returns the model and accuracy statistics """ import cuml + from cuml.metrics import confusion_matrix feature_names = ["college_education", "male"] + [ "clicks_in_%d" % i for i in range(1, 8) @@ -105,7 +106,9 @@ def build_and_predict_model(ml_input_df): results_dict["auc"] = roc_auc_score(y.to_array(), y_pred.to_array()) results_dict["precision"] = cupy_precision_score(cp.asarray(y), cp.asarray(y_pred)) - results_dict["confusion_matrix"] = cupy_conf_mat(cp.asarray(y), cp.asarray(y_pred)) + results_dict["confusion_matrix"] = confusion_matrix( + cp.asarray(y, dtype="int32"), cp.asarray(y_pred, dtype="int32") + ) results_dict["output_type"] = "supervised" return results_dict diff --git a/gpu_bdb/queries/q05/gpu_bdb_query_05_sql.py b/gpu_bdb/queries/q05/gpu_bdb_query_05_sql.py index edfd0c50..ffc0e3ee 100755 --- a/gpu_bdb/queries/q05/gpu_bdb_query_05_sql.py +++ b/gpu_bdb/queries/q05/gpu_bdb_query_05_sql.py @@ -29,7 +29,7 @@ gpubdb_argparser, run_query, ) -from bdb_tools.cupy_metrics import cupy_conf_mat, cupy_precision_score +from bdb_tools.cupy_metrics import cupy_precision_score from sklearn.metrics import roc_auc_score import cupy as cp @@ -58,6 +58,7 @@ def build_and_predict_model(ml_input_df): Returns the model and accuracy statistics """ import cuml + from cuml.metrics import confusion_matrix feature_names = ["college_education", "male"] + [ "clicks_in_%d" % i for i in range(1, 8) @@ -85,7 +86,9 @@ def build_and_predict_model(ml_input_df): results_dict["auc"] = roc_auc_score(y.to_array(), y_pred.to_array()) results_dict["precision"] = cupy_precision_score(cp.asarray(y), cp.asarray(y_pred)) - results_dict["confusion_matrix"] = cupy_conf_mat(cp.asarray(y), cp.asarray(y_pred)) + results_dict["confusion_matrix"] = confusion_matrix( + cp.asarray(y, dtype="int32"), cp.asarray(y_pred, dtype="int32") + ) results_dict["output_type"] = "supervised" return results_dict