From 4d9eb29e8d0d49b7e888a4411c92eedcc73f8e58 Mon Sep 17 00:00:00 2001 From: Minh Le Date: Sun, 21 May 2017 09:32:17 +0200 Subject: [PATCH 1/9] Tpr: in Tpr and gmean we use the term specificity, therefore I think it make sense to also introduce the term sensitivity fdr: wrong definition of the denominator (wikipedia roc, englisch version) mcc: missing definition of numerator --- R/measures.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/measures.R b/R/measures.R index e7ce586ee8..dea13bde74 100644 --- a/R/measures.R +++ b/R/measures.R @@ -958,7 +958,7 @@ measureFN = function(truth, response, negative) { tpr = makeMeasure(id = "tpr", minimize = FALSE, best = 1, worst = 0, properties = c("classif", "req.pred", "req.truth"), name = "True positive rate", - note = "Percentage of correctly classified observations in the positive class. Also called hit rate or recall.", + note = "Percentage of correctly classified observations in the positive class. Also called hit rate or recall or sensitivity.", fun = function(task, model, pred, feats, extra.args) { measureTPR(pred$data$truth, pred$data$response, pred$task.desc$positive) } @@ -1089,7 +1089,7 @@ measureNPV = function(truth, response, negative) { fdr = makeMeasure(id = "fdr", minimize = TRUE, best = 0, worst = 1, properties = c("classif", "req.pred", "req.truth"), name = "False discovery rate", - note = "Defined as: (fp) / (tn + fn).", + note = "Defined as: (fp) / (tp + fp).", fun = function(task, model, pred, feats, extra.args) { measureFDR(pred$data$truth, pred$data$response, pred$task.desc$positive) } @@ -1108,7 +1108,7 @@ measureFDR = function(truth, response, positive) { mcc = makeMeasure(id = "mcc", minimize = FALSE, properties = c("classif", "req.pred", "req.truth"), best = 1, worst = -1, name = "Matthews correlation coefficient", - note = "Defined as sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)), denominator set to 1 if 0", + note = "Defined as (tp * tn -fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)), denominator set to 1 if 0", fun = function(task, model, pred, feats, extra.args) { measureMCC(pred$data$truth, pred$data$response, pred$task.desc$negative, pred$task.desc$positive) } From cfce72e3d162e6c21a683c21a148a0005c94c7aa Mon Sep 17 00:00:00 2001 From: Minh Le Date: Sun, 21 May 2017 14:53:37 +0200 Subject: [PATCH 2/9] mmce.test is basically manual< comparing two vectors (the truth and the response column of the data in prediction.classif) The output of pred.classif is: Prediction: 4 observations predict.type: prob threshold: 0=0.33,1=0.33,2=0.33 time: 0.00 id truth prob.0 prob.1 prob.2 response 1 1 1 0.25 0.5 0.25 1 2 2 2 0.25 0.5 0.25 1 3 3 0 0.25 0.5 0.25 0 4 4 1 0.25 0.5 0.25 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit one can see that the second row of the response column was wrongly copied (instead of value 1 it had value 0). This small typo doesn’t change the outcome of the test. (Usually I’m not that nitpicky :D) --- tests/testthat/test_base_measures.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_base_measures.R b/tests/testthat/test_base_measures.R index 57dbd349c7..99b6d234cb 100644 --- a/tests/testthat/test_base_measures.R +++ b/tests/testthat/test_base_measures.R @@ -338,7 +338,7 @@ test_that("check measure calculations", { #test multiclass measures #mmce - mmce.test = mean(c(1L != 1L, 2L != 0L, 0L != 0L, 1L != 2L)) + mmce.test = mean(c(1L != 1L, 2L != 1L, 0L != 0L, 1L != 2L)) mmce.perf = performance(pred.classif, measures = mmce, model = mod.classif) expect_equal(mmce.test, mmce$fun(pred = pred.classif)) expect_equal(mmce.test, as.numeric(mmce.perf)) From 1048da9fd619b9d5360d41cf3971afdeb87a0385 Mon Sep 17 00:00:00 2001 From: Minh Le Date: Sun, 21 May 2017 23:17:44 +0200 Subject: [PATCH 3/9] no message --- R/measures.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/measures.R b/R/measures.R index dea13bde74..84a22e33bf 100644 --- a/R/measures.R +++ b/R/measures.R @@ -1108,7 +1108,7 @@ measureFDR = function(truth, response, positive) { mcc = makeMeasure(id = "mcc", minimize = FALSE, properties = c("classif", "req.pred", "req.truth"), best = 1, worst = -1, name = "Matthews correlation coefficient", - note = "Defined as (tp * tn -fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)), denominator set to 1 if 0", + note = "Defined as (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)), denominator set to 1 if 0", fun = function(task, model, pred, feats, extra.args) { measureMCC(pred$data$truth, pred$data$response, pred$task.desc$negative, pred$task.desc$positive) } From e9af7a44053be320f33f89db8757e2dba0090aa2 Mon Sep 17 00:00:00 2001 From: Minh Le Date: Mon, 22 May 2017 10:26:36 +0200 Subject: [PATCH 4/9] no message --- R/measures.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/measures.R b/R/measures.R index 84a22e33bf..a8e822fc46 100644 --- a/R/measures.R +++ b/R/measures.R @@ -1108,7 +1108,7 @@ measureFDR = function(truth, response, positive) { mcc = makeMeasure(id = "mcc", minimize = FALSE, properties = c("classif", "req.pred", "req.truth"), best = 1, worst = -1, name = "Matthews correlation coefficient", - note = "Defined as (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)), denominator set to 1 if 0", + note = "Defined as (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)), denominator set to 1 if 0", fun = function(task, model, pred, feats, extra.args) { measureMCC(pred$data$truth, pred$data$response, pred$task.desc$negative, pred$task.desc$positive) } From 4eec73272b3968a56029dc90c722a44504f08d3b Mon Sep 17 00:00:00 2001 From: Minh Le Date: Mon, 22 May 2017 10:27:05 +0200 Subject: [PATCH 5/9] no message --- R/measures.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/measures.R b/R/measures.R index a8e822fc46..84a22e33bf 100644 --- a/R/measures.R +++ b/R/measures.R @@ -1108,7 +1108,7 @@ measureFDR = function(truth, response, positive) { mcc = makeMeasure(id = "mcc", minimize = FALSE, properties = c("classif", "req.pred", "req.truth"), best = 1, worst = -1, name = "Matthews correlation coefficient", - note = "Defined as (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)), denominator set to 1 if 0", + note = "Defined as (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)), denominator set to 1 if 0", fun = function(task, model, pred, feats, extra.args) { measureMCC(pred$data$truth, pred$data$response, pred$task.desc$negative, pred$task.desc$positive) } From 245d9836e6da79dbf8dab45d7422e3169b595e66 Mon Sep 17 00:00:00 2001 From: Minh Le Date: Wed, 24 May 2017 09:59:09 +0200 Subject: [PATCH 6/9] number of fp is the same as fp --- R/measures.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/measures.R b/R/measures.R index 84a22e33bf..24e86346a2 100644 --- a/R/measures.R +++ b/R/measures.R @@ -1034,7 +1034,7 @@ measureFNR = function(truth, response, negative, positive) { ppv = makeMeasure(id = "ppv", minimize = FALSE, best = 1, worst = 0, properties = c("classif", "req.pred", "req.truth"), name = "Positive predictive value", - note = "Defined as: tp / (tp + number of fp). Also called precision. If the denominator is 0, PPV is set to be either 1 or 0 depending on whether the highest probability prediction is positive (1) or negative (0).", + note = "Defined as: tp / (tp + fp). Also called precision. If the denominator is 0, PPV is set to be either 1 or 0 depending on whether the highest probability prediction is positive (1) or negative (0).", fun = function(task, model, pred, feats, extra.args) { if (pred$predict.type == "prob") { prob = getPredictionProbabilities(pred) From 0b06f2116fdc6bd71b9f5a660c889d8813b38c26 Mon Sep 17 00:00:00 2001 From: Minh Le Date: Mon, 7 Aug 2017 10:26:54 +0200 Subject: [PATCH 7/9] no message --- R/measures.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/measures.R b/R/measures.R index ed87c82b03..2bae607d6c 100644 --- a/R/measures.R +++ b/R/measures.R @@ -35,7 +35,7 @@ NULL ############################################################################### -### general ### +### general ## ############################################################################### #' @export #' @rdname measures From 8f75510cc759b786b56833c5d0a2c1dc9cd7e194 Mon Sep 17 00:00:00 2001 From: Minh Le Date: Mon, 7 Aug 2017 10:27:06 +0200 Subject: [PATCH 8/9] no message --- R/measures.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/measures.R b/R/measures.R index 2bae607d6c..ed87c82b03 100644 --- a/R/measures.R +++ b/R/measures.R @@ -35,7 +35,7 @@ NULL ############################################################################### -### general ## +### general ### ############################################################################### #' @export #' @rdname measures From 564cf3ead465fd4bfcca9cdea008fbf895b16ca5 Mon Sep 17 00:00:00 2001 From: Bernd Bischl Date: Wed, 9 Aug 2017 17:48:57 +0200 Subject: [PATCH 9/9] ... --- R/measures.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/measures.R b/R/measures.R index b1056c8dd3..b27abbd11b 100644 --- a/R/measures.R +++ b/R/measures.R @@ -1086,7 +1086,7 @@ measureEdgeCase = function(truth, positive, prob) { npv = makeMeasure(id = "npv", minimize = FALSE, best = 1, worst = 0, properties = c("classif", "req.pred", "req.truth"), name = "Negative predictive value", - note = "Defined as: (tn) / (tn + fn).", + note = "Defined as: tn / (tn + fn).", fun = function(task, model, pred, feats, extra.args) { measureNPV(pred$data$truth, pred$data$response, pred$task.desc$negative) } @@ -1105,7 +1105,7 @@ measureNPV = function(truth, response, negative) { fdr = makeMeasure(id = "fdr", minimize = TRUE, best = 0, worst = 1, properties = c("classif", "req.pred", "req.truth"), name = "False discovery rate", - note = "Defined as: (fp) / (tp + fp).", + note = "Defined as: fp / (tp + fp).", fun = function(task, model, pred, feats, extra.args) { measureFDR(pred$data$truth, pred$data$response, pred$task.desc$positive) }