From cebb1f332f4ee17eec92bea99dee49a3b1748502 Mon Sep 17 00:00:00 2001
From: adi-darachi <adi.darachi@gmail.com>
Date: Wed, 27 Oct 2021 12:04:09 +0300
Subject: [PATCH] Adding the options to receive 'undefined' from 'categorize'
 in case all of the tokenes dose not match anything

---
 lib/naive_bayes.js  | 10 +++++++---
 readme.md           |  6 +++++-
 test/naive_bayes.js | 42 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 54 insertions(+), 4 deletions(-)
diff --git a/lib/naive_bayes.js b/lib/naive_bayes.js
index 15eb5b2..d326222 100644
--- a/lib/naive_bayes.js
+++ b/lib/naive_bayes.js
@@ -170,7 +170,7 @@ Naivebayes.prototype.learn = async function (text, category) {
  * Determine what category `text` belongs to.
  *
  * @param  {String} text
- * @return {Promise<string>} category
+ * @return {Promise<string|void>} category
  */
 Naivebayes.prototype.categorize = async function (text) {
   var self = this
@@ -193,20 +193,24 @@ Naivebayes.prototype.categorize = async function (text) {
     //take the log to avoid underflow
     var logProbability = Math.log(categoryProbability)
 
+    var atleastOneTokenExist = false
+
     //now determine P( w | c ) for each word `w` in the text
     Object
     .keys(frequencyTable)
     .forEach(function (token) {
       var frequencyInText = frequencyTable[token]
       var tokenProbability = self.tokenProbability(token, category)
-
+      var wordFrequencyCount = self.wordFrequencyCount[category][token] || 0;
       // console.log('token: %s category: `%s` tokenProbability: %d', token, category, tokenProbability)
 
+      atleastOneTokenExist =  atleastOneTokenExist || wordFrequencyCount > 0;
+
       //determine the log of the P( w | c ) for this word
       logProbability += frequencyInText * Math.log(tokenProbability)
     })
 
-    if (logProbability > maxProbability) {
+    if ((!self.options.allowNoMatch || atleastOneTokenExist) && logProbability > maxProbability) {
       maxProbability = logProbability
       chosenCategory = category
     }
diff --git a/readme.md b/readme.md
index 2d0bc91..994beb3 100644
--- a/readme.md
+++ b/readme.md
@@ -54,11 +54,15 @@ Returns an instance of a Naive-Bayes Classifier.
 
 Pass in an optional `options` object to configure the instance. If you specify a `tokenizer` function in `options`, it will be used as the instance's tokenizer. It receives a (string) `text` argument - this is the string value that is passed in by you when you call `.learn()` or `.categorize()`. It must return an array of tokens. The default tokenizer removes punctuation and splits on spaces.
 
+if `allowNoMatch` is specified with the value `true` (defaults to `false`) in case of no-match to any token in any of the categories, and `undefined` value will be returned.
+By default, in case of a no-match (as described above) the first category will be returned.
+
 Eg.
 
 ```js
 var classifier = bayes({
-    tokenizer: function (text) { return text.split(' ') }
+    tokenizer: function (text) { return text.split(' ') },
+    allowNoMatch: true
 })
 
 var classifier2 = bayes({
diff --git a/test/naive_bayes.js b/test/naive_bayes.js
index 3897991..f2e28b1 100644
--- a/test/naive_bayes.js
+++ b/test/naive_bayes.js
@@ -48,6 +48,48 @@ describe('bayes using custom tokenizer', async function () {
   })
 })
 
+describe('bayes using "allowNoMatch" flag', async function () {
+
+  it('returns no-match value when "allowNoMatch" flag is provided in `options`.', async function () {
+
+    var classifier = bayes({allowNoMatch: true})
+
+    //teach it positive phrases
+    await classifier.learn('amazing, awesome movie!! Yeah!!', 'positive')
+    await classifier.learn('Sweet, this is incredibly, amazing, perfect, great!!', 'positive')
+
+    //teach it a negative phrase
+    await classifier.learn('terrible, shitty thing. Damn. Sucks!!', 'negative')
+
+    //teach it a neutral phrase
+    await classifier.learn('I dont really know what to make of this.', 'neutral')
+
+    //now test it to see that it returns no result
+    assert.equal(await classifier.categorize('notcategorizeable'), undefined);
+
+  })
+
+  it('dose not returns no-match value when "allowNoMatch" flag is not provided in `options` (for backcomp).', async function () {
+
+    var classifier = bayes()
+
+    //teach it positive phrases
+    await classifier.learn('amazing, awesome movie!! Yeah!!', 'positive')
+    await classifier.learn('Sweet, this is incredibly, amazing, perfect, great!!', 'positive')
+
+    //teach it a negative phrase
+    await classifier.learn('terrible, shitty thing. Damn. Sucks!!', 'negative')
+
+    //teach it a neutral phrase
+    await classifier.learn('I dont really know what to make of this.', 'neutral')
+
+    //now test it to see that it returns the first option just like it always used to
+    assert.equal(await classifier.categorize('notcategorizeable'), 'positive')
+
+  })
+
+})
+
 describe('bayes serializing/deserializing its state', function () {
   it('serializes/deserializes its state as JSON correctly.', async function () {
       var classifier = bayes()