A simple term frequency library that takes in a document vector, and compiles the frequency calculation of your choosing.
First make the necessary require-ments
var sw = require('stopword')
var tf = require('term-frequency');
var tv = require('term-vector');
You can then do:
var vec = tv.getVector(
sw.removeStopwords(
'This is a really, really cool vector. I like this VeCTor'
.toLowerCase()
.split(/[ ,\.]+/)
)
)
var freq = tf.getTermFrequency(vec);
// freq is now
// [ [ [ 'cool' ], 1 ], [ [ 'really' ], 2 ], [ [ 'vector' ], 2 ] ];
Or you can specify a TF scheme like so:
var vec = tv.getVector('This is a really, really cool vector. I like this VeCTor');
var freq = tf.getTermFrequency(vec, {scheme: tf.logNormalization});
// freq is now:
// [
// [ [ 'cool' ], 0.6931471805599453 ],
// [ [ 'really' ], 1.0986122886681098 ],
// [ [ 'vector' ], 1.0986122886681098 ]
// ]);
Currently supported schemes are
raw
logNormalization
doubleNormalization0point5
selfString
selfNumeric
See the Wikipedia page for more info about term frequency calculation
You can also weight
your calculations like so. A weight is a numeric
value that will be added to the calculated score.
var freq = tf.getTermFrequency(vec, {
scheme: tf.doubleNormalization0point5,
weight: 5
});
// freq is now
// [
// [ [ 'cool' ], 5.7027325540540822 ],
// [ [ 'really' ], 5.9581453659370776 ],
// [ [ 'vector' ], 5.9581453659370776 ]
// ]);