Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented toJsonObject and allowed fromJson to accept JSON objects #10

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/node_modules/
/nbproject/private/
104 changes: 62 additions & 42 deletions lib/naive_bayes.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,21 @@ var STATE_KEYS = module.exports.STATE_KEYS = [
* Initializes a NaiveBayes instance from a JSON state representation.
* Use this with classifier.toJson().
*
* @param {String} jsonStr state representation obtained by classifier.toJson()
* @param {String} jsonStr state representation obtained by classifier.toJson() or classifier.toJsonObject()
* @return {NaiveBayes} Classifier
*/
module.exports.fromJson = function (jsonStr) {
var parsed;
try {
parsed = JSON.parse(jsonStr)
} catch (e) {
throw new Error('Naivebayes.fromJson expects a valid JSON string.')
if(typeof jsonStr === 'string') {
try {
parsed = JSON.parse(jsonStr)
} catch (e) {
throw new Error('Naivebayes.fromJson expects a valid JSON string.')
}
} else if(typeof jsonStr === 'object') {
parsed = jsonStr // if it's an object try use it directly
}

// init a new classifier
var classifier = new Naivebayes(parsed.options)

Expand Down Expand Up @@ -167,52 +172,60 @@ Naivebayes.prototype.learn = function (text, category) {
}

/**
* Determine what category `text` belongs to.
*
* @param {String} text
* @return {String} category
* Returns the probability the given text matches each category, sorted in order from most likely to least likely. This
* method allows you to check the differences between the category probabilities.
*
* @param {string} text the text to categorize
* @returns {Array} an array of objects in the form {category, probability}
*/
Naivebayes.prototype.categorize = function (text) {
Naivebayes.prototype.probabilities = function (text) {
var self = this
, maxProbability = -Infinity
, chosenCategory = null

var tokens = self.tokenizer(text)
var frequencyTable = self.frequencyTable(tokens)

//iterate thru our categories to find the one with max probability for this text
Object
.keys(self.categories)
.forEach(function (category) {
var categoryProbabilities = Object
.keys(self.categories)
.map(function (category) {

//start by calculating the overall probability of this category
//=> out of all documents we've ever looked at, how many were
// mapped to this category
var categoryProbability = self.docCount[category] / self.totalDocuments
//start by calculating the overall probability of this category
//=> out of all documents we've ever looked at, how many were
// mapped to this category
var categoryProbability = self.docCount[category] / self.totalDocuments

//take the log to avoid underflow
var logProbability = Math.log(categoryProbability)
//take the log to avoid underflow
var logProbability = Math.log(categoryProbability)

//now determine P( w | c ) for each word `w` in the text
Object
.keys(frequencyTable)
.forEach(function (token) {
var frequencyInText = frequencyTable[token]
var tokenProbability = self.tokenProbability(token, category)
//now determine P( w | c ) for each word `w` in the text
Object
.keys(frequencyTable)
.forEach(function (token) {
var frequencyInText = frequencyTable[token]
var tokenProbability = self.tokenProbability(token, category)

// console.log('token: %s category: `%s` tokenProbability: %d', token, category, tokenProbability)
// console.log('token: %s category: `%s` tokenProbability: %d', token, category, tokenProbability)

//determine the log of the P( w | c ) for this word
logProbability += frequencyInText * Math.log(tokenProbability)
})
//determine the log of the P( w | c ) for this word
logProbability += frequencyInText * Math.log(tokenProbability)
})

if (logProbability > maxProbability) {
maxProbability = logProbability
chosenCategory = category
}
})
return {category: category, probability: logProbability}
});

categoryProbabilities.sort(function(c1, c2) {return c2.probability - c1.probability})

return categoryProbabilities
}

return chosenCategory
/**
* Determine what category `text` belongs to.
*
* @param {String} text
* @return {String} category
*/
Naivebayes.prototype.categorize = function(text) {
return this.probabilities(text)[0].category
}

/**
Expand Down Expand Up @@ -255,18 +268,25 @@ Naivebayes.prototype.frequencyTable = function (tokens) {
}

/**
* Dump the classifier's state as a JSON string.
* @return {String} Representation of the classifier.
* Dump the classifier's state as a simple object, suitable for embedding within a JSON document or another object.
* @return {Object} Representation of the classifier.
*/
Naivebayes.prototype.toJson = function () {
Naivebayes.prototype.toJsonObject = function () {
var state = {}
var self = this
STATE_KEYS.forEach(function (k) {
state[k] = self[k]
})

return state
}

var jsonStr = JSON.stringify(state)

/**
* Dump the classifier's state as a JSON string.
* @return {String} Representation of the classifier.
*/
Naivebayes.prototype.toJson = function () {
var jsonStr = JSON.stringify(this.toJsonObject())
return jsonStr
}

Expand Down
8 changes: 6 additions & 2 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,15 @@ Returns the `category` it thinks `text` belongs to. Its judgement is based on wh

###`classifier.toJson()`

Returns the JSON representation of a classifier.
Returns the JSON representation of a classifier. This is the same as `JSON.stringify(classifier.toJsonObject())`.

###`classifier.toJsonObject()`

Returns a JSON-friendly representation of the classifier as an `object`.

###`var classifier = bayes.fromJson(jsonStr)`

Returns a classifier instance from the JSON representation. Use this with the JSON representation obtained from `classifier.toJson()`
Returns a classifier instance from the JSON representation. Use this with the JSON representation obtained from `classifier.toJson()` or `classifier.toJsonObject()`

## License

Expand Down
18 changes: 18 additions & 0 deletions test/naive_bayes.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,24 @@ describe('bayes serializing/deserializing its state', function () {
assert.deepEqual(revivedClassifier[k], classifier[k])
})

done()
})

it('serializes/deserializes its state as an Object correctly.', function (done) {
var classifier = bayes()

classifier.learn('Fun times were had by all', 'positive')
classifier.learn('sad dark rainy day in the cave', 'negative')

var jsonRepr = classifier.toJsonObject()

var revivedClassifier = bayes.fromJson(jsonRepr)

// ensure the revived classifier's state is same as original state
bayes.STATE_KEYS.forEach(function (k) {
assert.deepEqual(revivedClassifier[k], classifier[k])
})

done()
})
})
Expand Down