From 270195b92c1cb06f10819eaae3d1bfc6d77b0e87 Mon Sep 17 00:00:00 2001 From: Nolan Lawson Date: Sat, 26 Jul 2014 14:11:34 -0400 Subject: [PATCH] split on hyphens and remove empty tokens --- lib/tokenizer.js | 3 +++ test/tokenizer_test.js | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/lib/tokenizer.js b/lib/tokenizer.js index ca02aab4..f6021eae 100644 --- a/lib/tokenizer.js +++ b/lib/tokenizer.js @@ -26,6 +26,9 @@ lunr.tokenizer = function (obj) { return str .split(/(?:\s+|\-)/) + .filter(function (token) { + return !!token; + }) .map(function (token) { return token.toLowerCase() }) diff --git a/test/tokenizer_test.js b/test/tokenizer_test.js index 71fc3d50..3948a141 100644 --- a/test/tokenizer_test.js +++ b/test/tokenizer_test.js @@ -56,3 +56,10 @@ test("splitting strings with hyphens", function () { deepEqual(tokens, ['take', 'the', 'new', 'york', 'san', 'francisco', 'flight']) }) + +test("splitting strings with hyphens and spaces", function () { + var simpleString = "Solve for A - B", + tokens = lunr.tokenizer(simpleString) + + deepEqual(tokens, ['solve', 'for', 'a', 'b']) +})