Skip to content

Commit

Permalink
Merge pull request #191 from beckernick/confusion-matrix-cleanup
Browse files Browse the repository at this point in the history
switch cupy_conf_mat to confusion_matrix
  • Loading branch information
beckernick authored Mar 1, 2021
2 parents 429d1ec + fb56a1d commit f97a7cf
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 30 deletions.
26 changes: 0 additions & 26 deletions gpu_bdb/bdb_tools/cupy_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,6 @@
import cupy as cp


def cupy_conf_mat(y, y_pred):
"""
Simple, fast confusion matrix for two class models designed to match sklearn.
Assumes the classes are one of [0, 1]. It will fail edge cases, which are fairly
numerous.
Implementation taken from rapidsai/cuml#1524
"""
nclasses = len(cp.unique(y))
assert nclasses == 2
res = cp.zeros((2, 2))

pos_pred_ix = cp.where(y_pred == 1)
neg_pred_ix = cp.where(y_pred != 1)
tn_sum = (y[neg_pred_ix] == 0).sum()
fn_sum = (y[neg_pred_ix] == 1).sum()
tp_sum = (y[pos_pred_ix] == 1).sum()
fp_sum = (y[pos_pred_ix] == 0).sum()

res[0, 0] = tn_sum
res[1, 0] = fn_sum
res[0, 1] = fp_sum
res[1, 1] = tp_sum
return res


def cupy_precision_score(y, y_pred):
"""
Simple precision score method for two class models.
Expand Down
7 changes: 5 additions & 2 deletions gpu_bdb/queries/q05/gpu_bdb_query_05.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
)

from bdb_tools.readers import build_reader
from bdb_tools.cupy_metrics import cupy_conf_mat, cupy_precision_score
from bdb_tools.cupy_metrics import cupy_precision_score

import cupy as cp
import numpy as np
Expand Down Expand Up @@ -78,6 +78,7 @@ def build_and_predict_model(ml_input_df):
Returns the model and accuracy statistics
"""
import cuml
from cuml.metrics import confusion_matrix

feature_names = ["college_education", "male"] + [
"clicks_in_%d" % i for i in range(1, 8)
Expand Down Expand Up @@ -105,7 +106,9 @@ def build_and_predict_model(ml_input_df):

results_dict["auc"] = roc_auc_score(y.to_array(), y_pred.to_array())
results_dict["precision"] = cupy_precision_score(cp.asarray(y), cp.asarray(y_pred))
results_dict["confusion_matrix"] = cupy_conf_mat(cp.asarray(y), cp.asarray(y_pred))
results_dict["confusion_matrix"] = confusion_matrix(
cp.asarray(y, dtype="int32"), cp.asarray(y_pred, dtype="int32")
)
results_dict["output_type"] = "supervised"
return results_dict

Expand Down
7 changes: 5 additions & 2 deletions gpu_bdb/queries/q05/gpu_bdb_query_05_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
gpubdb_argparser,
run_query,
)
from bdb_tools.cupy_metrics import cupy_conf_mat, cupy_precision_score
from bdb_tools.cupy_metrics import cupy_precision_score
from sklearn.metrics import roc_auc_score
import cupy as cp

Expand Down Expand Up @@ -58,6 +58,7 @@ def build_and_predict_model(ml_input_df):
Returns the model and accuracy statistics
"""
import cuml
from cuml.metrics import confusion_matrix

feature_names = ["college_education", "male"] + [
"clicks_in_%d" % i for i in range(1, 8)
Expand Down Expand Up @@ -85,7 +86,9 @@ def build_and_predict_model(ml_input_df):

results_dict["auc"] = roc_auc_score(y.to_array(), y_pred.to_array())
results_dict["precision"] = cupy_precision_score(cp.asarray(y), cp.asarray(y_pred))
results_dict["confusion_matrix"] = cupy_conf_mat(cp.asarray(y), cp.asarray(y_pred))
results_dict["confusion_matrix"] = confusion_matrix(
cp.asarray(y, dtype="int32"), cp.asarray(y_pred, dtype="int32")
)
results_dict["output_type"] = "supervised"
return results_dict

Expand Down

0 comments on commit f97a7cf

Please sign in to comment.