Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding the options to receive 'undefined' from 'categorize' in case a… #26

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions lib/naive_bayes.js
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ Naivebayes.prototype.learn = async function (text, category) {
* Determine what category `text` belongs to.
*
* @param {String} text
* @return {Promise<string>} category
* @return {Promise<string|void>} category
*/
Naivebayes.prototype.categorize = async function (text) {
var self = this
Expand All @@ -193,20 +193,24 @@ Naivebayes.prototype.categorize = async function (text) {
//take the log to avoid underflow
var logProbability = Math.log(categoryProbability)

var atleastOneTokenExist = false

//now determine P( w | c ) for each word `w` in the text
Object
.keys(frequencyTable)
.forEach(function (token) {
var frequencyInText = frequencyTable[token]
var tokenProbability = self.tokenProbability(token, category)

var wordFrequencyCount = self.wordFrequencyCount[category][token] || 0;
// console.log('token: %s category: `%s` tokenProbability: %d', token, category, tokenProbability)

atleastOneTokenExist = atleastOneTokenExist || wordFrequencyCount > 0;

//determine the log of the P( w | c ) for this word
logProbability += frequencyInText * Math.log(tokenProbability)
})

if (logProbability > maxProbability) {
if ((!self.options.allowNoMatch || atleastOneTokenExist) && logProbability > maxProbability) {
maxProbability = logProbability
chosenCategory = category
}
Expand Down
6 changes: 5 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,15 @@ Returns an instance of a Naive-Bayes Classifier.

Pass in an optional `options` object to configure the instance. If you specify a `tokenizer` function in `options`, it will be used as the instance's tokenizer. It receives a (string) `text` argument - this is the string value that is passed in by you when you call `.learn()` or `.categorize()`. It must return an array of tokens. The default tokenizer removes punctuation and splits on spaces.

if `allowNoMatch` is specified with the value `true` (defaults to `false`) in case of no-match to any token in any of the categories, and `undefined` value will be returned.
By default, in case of a no-match (as described above) the first category will be returned.

Eg.

```js
var classifier = bayes({
tokenizer: function (text) { return text.split(' ') }
tokenizer: function (text) { return text.split(' ') },
allowNoMatch: true
})

var classifier2 = bayes({
Expand Down
42 changes: 42 additions & 0 deletions test/naive_bayes.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,48 @@ describe('bayes using custom tokenizer', async function () {
})
})

describe('bayes using "allowNoMatch" flag', async function () {

it('returns no-match value when "allowNoMatch" flag is provided in `options`.', async function () {

var classifier = bayes({allowNoMatch: true})

//teach it positive phrases
await classifier.learn('amazing, awesome movie!! Yeah!!', 'positive')
await classifier.learn('Sweet, this is incredibly, amazing, perfect, great!!', 'positive')

//teach it a negative phrase
await classifier.learn('terrible, shitty thing. Damn. Sucks!!', 'negative')

//teach it a neutral phrase
await classifier.learn('I dont really know what to make of this.', 'neutral')

//now test it to see that it returns no result
assert.equal(await classifier.categorize('notcategorizeable'), undefined);

})

it('dose not returns no-match value when "allowNoMatch" flag is not provided in `options` (for backcomp).', async function () {

var classifier = bayes()

//teach it positive phrases
await classifier.learn('amazing, awesome movie!! Yeah!!', 'positive')
await classifier.learn('Sweet, this is incredibly, amazing, perfect, great!!', 'positive')

//teach it a negative phrase
await classifier.learn('terrible, shitty thing. Damn. Sucks!!', 'negative')

//teach it a neutral phrase
await classifier.learn('I dont really know what to make of this.', 'neutral')

//now test it to see that it returns the first option just like it always used to
assert.equal(await classifier.categorize('notcategorizeable'), 'positive')

})

})

describe('bayes serializing/deserializing its state', function () {
it('serializes/deserializes its state as JSON correctly.', async function () {
var classifier = bayes()
Expand Down