From cebb1f332f4ee17eec92bea99dee49a3b1748502 Mon Sep 17 00:00:00 2001 From: adi-darachi Date: Wed, 27 Oct 2021 12:04:09 +0300 Subject: [PATCH] Adding the options to receive 'undefined' from 'categorize' in case all of the tokenes dose not match anything --- lib/naive_bayes.js | 10 +++++++--- readme.md | 6 +++++- test/naive_bayes.js | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/lib/naive_bayes.js b/lib/naive_bayes.js index 15eb5b2..d326222 100644 --- a/lib/naive_bayes.js +++ b/lib/naive_bayes.js @@ -170,7 +170,7 @@ Naivebayes.prototype.learn = async function (text, category) { * Determine what category `text` belongs to. * * @param {String} text - * @return {Promise} category + * @return {Promise} category */ Naivebayes.prototype.categorize = async function (text) { var self = this @@ -193,20 +193,24 @@ Naivebayes.prototype.categorize = async function (text) { //take the log to avoid underflow var logProbability = Math.log(categoryProbability) + var atleastOneTokenExist = false + //now determine P( w | c ) for each word `w` in the text Object .keys(frequencyTable) .forEach(function (token) { var frequencyInText = frequencyTable[token] var tokenProbability = self.tokenProbability(token, category) - + var wordFrequencyCount = self.wordFrequencyCount[category][token] || 0; // console.log('token: %s category: `%s` tokenProbability: %d', token, category, tokenProbability) + atleastOneTokenExist = atleastOneTokenExist || wordFrequencyCount > 0; + //determine the log of the P( w | c ) for this word logProbability += frequencyInText * Math.log(tokenProbability) }) - if (logProbability > maxProbability) { + if ((!self.options.allowNoMatch || atleastOneTokenExist) && logProbability > maxProbability) { maxProbability = logProbability chosenCategory = category } diff --git a/readme.md b/readme.md index 2d0bc91..994beb3 100644 --- a/readme.md +++ b/readme.md @@ -54,11 +54,15 @@ Returns an instance of a Naive-Bayes Classifier. Pass in an optional `options` object to configure the instance. If you specify a `tokenizer` function in `options`, it will be used as the instance's tokenizer. It receives a (string) `text` argument - this is the string value that is passed in by you when you call `.learn()` or `.categorize()`. It must return an array of tokens. The default tokenizer removes punctuation and splits on spaces. +if `allowNoMatch` is specified with the value `true` (defaults to `false`) in case of no-match to any token in any of the categories, and `undefined` value will be returned. +By default, in case of a no-match (as described above) the first category will be returned. + Eg. ```js var classifier = bayes({ - tokenizer: function (text) { return text.split(' ') } + tokenizer: function (text) { return text.split(' ') }, + allowNoMatch: true }) var classifier2 = bayes({ diff --git a/test/naive_bayes.js b/test/naive_bayes.js index 3897991..f2e28b1 100644 --- a/test/naive_bayes.js +++ b/test/naive_bayes.js @@ -48,6 +48,48 @@ describe('bayes using custom tokenizer', async function () { }) }) +describe('bayes using "allowNoMatch" flag', async function () { + + it('returns no-match value when "allowNoMatch" flag is provided in `options`.', async function () { + + var classifier = bayes({allowNoMatch: true}) + + //teach it positive phrases + await classifier.learn('amazing, awesome movie!! Yeah!!', 'positive') + await classifier.learn('Sweet, this is incredibly, amazing, perfect, great!!', 'positive') + + //teach it a negative phrase + await classifier.learn('terrible, shitty thing. Damn. Sucks!!', 'negative') + + //teach it a neutral phrase + await classifier.learn('I dont really know what to make of this.', 'neutral') + + //now test it to see that it returns no result + assert.equal(await classifier.categorize('notcategorizeable'), undefined); + + }) + + it('dose not returns no-match value when "allowNoMatch" flag is not provided in `options` (for backcomp).', async function () { + + var classifier = bayes() + + //teach it positive phrases + await classifier.learn('amazing, awesome movie!! Yeah!!', 'positive') + await classifier.learn('Sweet, this is incredibly, amazing, perfect, great!!', 'positive') + + //teach it a negative phrase + await classifier.learn('terrible, shitty thing. Damn. Sucks!!', 'negative') + + //teach it a neutral phrase + await classifier.learn('I dont really know what to make of this.', 'neutral') + + //now test it to see that it returns the first option just like it always used to + assert.equal(await classifier.categorize('notcategorizeable'), 'positive') + + }) + +}) + describe('bayes serializing/deserializing its state', function () { it('serializes/deserializes its state as JSON correctly.', async function () { var classifier = bayes()