Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

switch cupy_conf_mat to confusion_matrix #191

Merged
merged 1 commit into from
Mar 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 0 additions & 26 deletions gpu_bdb/bdb_tools/cupy_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,6 @@
import cupy as cp


def cupy_conf_mat(y, y_pred):
"""
Simple, fast confusion matrix for two class models designed to match sklearn.
Assumes the classes are one of [0, 1]. It will fail edge cases, which are fairly
numerous.

Implementation taken from rapidsai/cuml#1524
"""
nclasses = len(cp.unique(y))
assert nclasses == 2
res = cp.zeros((2, 2))

pos_pred_ix = cp.where(y_pred == 1)
neg_pred_ix = cp.where(y_pred != 1)
tn_sum = (y[neg_pred_ix] == 0).sum()
fn_sum = (y[neg_pred_ix] == 1).sum()
tp_sum = (y[pos_pred_ix] == 1).sum()
fp_sum = (y[pos_pred_ix] == 0).sum()

res[0, 0] = tn_sum
res[1, 0] = fn_sum
res[0, 1] = fp_sum
res[1, 1] = tp_sum
return res


def cupy_precision_score(y, y_pred):
"""
Simple precision score method for two class models.
Expand Down
7 changes: 5 additions & 2 deletions gpu_bdb/queries/q05/gpu_bdb_query_05.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
)

from bdb_tools.readers import build_reader
from bdb_tools.cupy_metrics import cupy_conf_mat, cupy_precision_score
from bdb_tools.cupy_metrics import cupy_precision_score

import cupy as cp
import numpy as np
Expand Down Expand Up @@ -78,6 +78,7 @@ def build_and_predict_model(ml_input_df):
Returns the model and accuracy statistics
"""
import cuml
from cuml.metrics import confusion_matrix

feature_names = ["college_education", "male"] + [
"clicks_in_%d" % i for i in range(1, 8)
Expand Down Expand Up @@ -105,7 +106,9 @@ def build_and_predict_model(ml_input_df):

results_dict["auc"] = roc_auc_score(y.to_array(), y_pred.to_array())
results_dict["precision"] = cupy_precision_score(cp.asarray(y), cp.asarray(y_pred))
results_dict["confusion_matrix"] = cupy_conf_mat(cp.asarray(y), cp.asarray(y_pred))
results_dict["confusion_matrix"] = confusion_matrix(
cp.asarray(y, dtype="int32"), cp.asarray(y_pred, dtype="int32")
)
results_dict["output_type"] = "supervised"
return results_dict

Expand Down
7 changes: 5 additions & 2 deletions gpu_bdb/queries/q05/gpu_bdb_query_05_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
gpubdb_argparser,
run_query,
)
from bdb_tools.cupy_metrics import cupy_conf_mat, cupy_precision_score
from bdb_tools.cupy_metrics import cupy_precision_score
from sklearn.metrics import roc_auc_score
import cupy as cp

Expand Down Expand Up @@ -58,6 +58,7 @@ def build_and_predict_model(ml_input_df):
Returns the model and accuracy statistics
"""
import cuml
from cuml.metrics import confusion_matrix

feature_names = ["college_education", "male"] + [
"clicks_in_%d" % i for i in range(1, 8)
Expand Down Expand Up @@ -85,7 +86,9 @@ def build_and_predict_model(ml_input_df):

results_dict["auc"] = roc_auc_score(y.to_array(), y_pred.to_array())
results_dict["precision"] = cupy_precision_score(cp.asarray(y), cp.asarray(y_pred))
results_dict["confusion_matrix"] = cupy_conf_mat(cp.asarray(y), cp.asarray(y_pred))
results_dict["confusion_matrix"] = confusion_matrix(
cp.asarray(y, dtype="int32"), cp.asarray(y_pred, dtype="int32")
)
results_dict["output_type"] = "supervised"
return results_dict

Expand Down