diff --git a/l10n/en-US/viewer.properties b/l10n/en-US/viewer.properties
index 3929482459c42b..85b5f80e90034c 100644
--- a/l10n/en-US/viewer.properties
+++ b/l10n/en-US/viewer.properties
@@ -165,6 +165,7 @@ find_next.title=Find the next occurrence of the phrase
find_next_label=Next
find_highlight=Highlight all
find_match_case_label=Match case
+find_entire_word_label=Whole words
find_reached_top=Reached top of document, continued from bottom
find_reached_bottom=Reached end of document, continued from top
find_not_found=Phrase not found
diff --git a/l10n/sv-SE/viewer.properties b/l10n/sv-SE/viewer.properties
index 7e5c685af6ae4f..5f60c5332d3377 100644
--- a/l10n/sv-SE/viewer.properties
+++ b/l10n/sv-SE/viewer.properties
@@ -165,6 +165,7 @@ find_next.title=Hitta nästa förekomst av frasen
find_next_label=Nästa
find_highlight=Markera alla
find_match_case_label=Matcha versal/gemen
+find_entire_word_label=Hela ord
find_reached_top=Nådde början av dokumentet, började från slutet
find_reached_bottom=Nådde slutet på dokumentet, började från början
find_not_found=Frasen hittades inte
diff --git a/web/app.js b/web/app.js
index f3d0635007775c..a4e8481ec18bab 100644
--- a/web/app.js
+++ b/web/app.js
@@ -1971,6 +1971,7 @@ function webViewerFind(evt) {
query: evt.query,
phraseSearch: evt.phraseSearch,
caseSensitive: evt.caseSensitive,
+ entireWord: evt.entireWord,
highlightAll: evt.highlightAll,
findPrevious: evt.findPrevious,
});
@@ -1981,6 +1982,7 @@ function webViewerFindFromUrlHash(evt) {
query: evt.query,
phraseSearch: evt.phraseSearch,
caseSensitive: false,
+ entireWord: false,
highlightAll: true,
findPrevious: false,
});
@@ -2117,6 +2119,7 @@ function webViewerKeyDown(evt) {
query: findState.query,
phraseSearch: findState.phraseSearch,
caseSensitive: findState.caseSensitive,
+ entireWord: findState.entireWord,
highlightAll: findState.highlightAll,
findPrevious: cmd === 5 || cmd === 12,
});
diff --git a/web/firefoxcom.js b/web/firefoxcom.js
index 99026e9faac078..3f893368f22c8b 100644
--- a/web/firefoxcom.js
+++ b/web/firefoxcom.js
@@ -168,7 +168,8 @@ class MozL10n {
'find',
'findagain',
'findhighlightallchange',
- 'findcasesensitivitychange'
+ 'findcasesensitivitychange',
+ 'findentirewordchange',
];
let handleEvent = function(evt) {
if (!PDFViewerApplication.initialized) {
@@ -180,13 +181,14 @@ class MozL10n {
query: evt.detail.query,
phraseSearch: true,
caseSensitive: !!evt.detail.caseSensitive,
+ entireWord: !!evt.detail.entireWord,
highlightAll: !!evt.detail.highlightAll,
findPrevious: !!evt.detail.findPrevious,
});
};
- for (let i = 0, len = events.length; i < len; i++) {
- window.addEventListener(events[i], handleEvent);
+ for (let event of events) {
+ window.addEventListener(event, handleEvent);
}
})();
diff --git a/web/pdf_find_bar.js b/web/pdf_find_bar.js
index cefd50017e906c..8a4d94aeb973f2 100644
--- a/web/pdf_find_bar.js
+++ b/web/pdf_find_bar.js
@@ -31,6 +31,7 @@ class PDFFindBar {
this.findField = options.findField || null;
this.highlightAll = options.highlightAllCheckbox || null;
this.caseSensitive = options.caseSensitiveCheckbox || null;
+ this.entireWord = options.entireWordCheckbox || null;
this.findMsg = options.findMsg || null;
this.findResultsCount = options.findResultsCount || null;
this.findStatusIcon = options.findStatusIcon || null;
@@ -83,6 +84,10 @@ class PDFFindBar {
this.dispatchEvent('casesensitivitychange');
});
+ this.entireWord.addEventListener('click', () => {
+ this.dispatchEvent('entirewordchange');
+ });
+
this.eventBus.on('resize', this._adjustWidth.bind(this));
}
@@ -95,8 +100,9 @@ class PDFFindBar {
source: this,
type,
query: this.findField.value,
- caseSensitive: this.caseSensitive.checked,
phraseSearch: true,
+ caseSensitive: this.caseSensitive.checked,
+ entireWord: this.entireWord.checked,
highlightAll: this.highlightAll.checked,
findPrevious: findPrev,
});
diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js
index d6810dfb1499d3..8ebf6833ce813f 100644
--- a/web/pdf_find_controller.js
+++ b/web/pdf_find_controller.js
@@ -14,6 +14,7 @@
*/
import { createPromiseCapability } from 'pdfjs-lib';
+import { getCharacterType } from './pdf_find_utils';
import { getGlobalEventBus } from './dom_events';
import { scrollIntoView } from './ui_utils';
@@ -190,7 +191,30 @@ class PDFFindController {
}
}
- _calculatePhraseMatch(query, pageIndex, pageContent) {
+ /**
+ * Determine if the search query constitutes a "whole word", by comparing the
+ * first/last character types with the preceding/following character types.
+ */
+ _isEntireWord(content, startIdx, length) {
+ if (startIdx > 0) {
+ const first = content.charCodeAt(startIdx);
+ const limit = content.charCodeAt(startIdx - 1);
+ if (getCharacterType(first) === getCharacterType(limit)) {
+ return false;
+ }
+ }
+ const endIdx = (startIdx + length - 1);
+ if (endIdx < (content.length - 1)) {
+ const last = content.charCodeAt(endIdx);
+ const limit = content.charCodeAt(endIdx + 1);
+ if (getCharacterType(last) === getCharacterType(limit)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ _calculatePhraseMatch(query, pageIndex, pageContent, entireWord) {
let matches = [];
let queryLen = query.length;
let matchIdx = -queryLen;
@@ -199,12 +223,15 @@ class PDFFindController {
if (matchIdx === -1) {
break;
}
+ if (entireWord && !this._isEntireWord(pageContent, matchIdx, queryLen)) {
+ continue;
+ }
matches.push(matchIdx);
}
this.pageMatches[pageIndex] = matches;
}
- _calculateWordMatch(query, pageIndex, pageContent) {
+ _calculateWordMatch(query, pageIndex, pageContent, entireWord) {
let matchesWithLength = [];
// Divide the query into pieces and search for text in each piece.
let queryArray = query.match(/\S+/g);
@@ -217,6 +244,10 @@ class PDFFindController {
if (matchIdx === -1) {
break;
}
+ if (entireWord &&
+ !this._isEntireWord(pageContent, matchIdx, subqueryLen)) {
+ continue;
+ }
// Other searches do not, so we store the length.
matchesWithLength.push({
match: matchIdx,
@@ -244,6 +275,7 @@ class PDFFindController {
let query = this._normalize(this.state.query);
let caseSensitive = this.state.caseSensitive;
let phraseSearch = this.state.phraseSearch;
+ const entireWord = this.state.entireWord;
let queryLen = query.length;
if (queryLen === 0) {
@@ -257,9 +289,9 @@ class PDFFindController {
}
if (phraseSearch) {
- this._calculatePhraseMatch(query, pageIndex, pageContent);
+ this._calculatePhraseMatch(query, pageIndex, pageContent, entireWord);
} else {
- this._calculateWordMatch(query, pageIndex, pageContent);
+ this._calculateWordMatch(query, pageIndex, pageContent, entireWord);
}
this._updatePage(pageIndex);
diff --git a/web/pdf_find_utils.js b/web/pdf_find_utils.js
new file mode 100644
index 00000000000000..a4a3a4871760f7
--- /dev/null
+++ b/web/pdf_find_utils.js
@@ -0,0 +1,107 @@
+/* Copyright 2018 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+const CharacterType = {
+ SPACE: 0,
+ ALPHA_LETTER: 1,
+ PUNCT: 2,
+ HAN_LETTER: 3,
+ KATAKANA_LETTER: 4,
+ HIRAGANA_LETTER: 5,
+ HALFWIDTH_KATAKANA_LETTER: 6,
+ THAI_LETTER: 7,
+};
+
+function isAlphabeticalScript(charCode) {
+ return charCode < 0x2E80;
+}
+
+function isAscii(charCode) {
+ return (charCode & 0xFF80) === 0;
+}
+
+function isAsciiAlpha(charCode) {
+ return (charCode >= /* a = */ 0x61 && charCode <= /* z = */ 0x7A) ||
+ (charCode >= /* A = */ 0x41 && charCode <= /* Z = */ 0x5A);
+}
+
+function isAsciiDigit(charCode) {
+ return (charCode >= /* 0 = */ 0x30 && charCode <= /* 9 = */ 0x39);
+}
+
+function isAsciiSpace(charCode) {
+ return (charCode === /* SPACE = */ 0x20 || charCode === /* TAB = */ 0x09 ||
+ charCode === /* CR = */ 0x0D || charCode === /* LF = */ 0x0A);
+}
+
+function isHan(charCode) {
+ return (charCode >= 0x3400 && charCode <= 0x9FFFF) ||
+ (charCode >= 0xF900 && charCode <= 0xFAFF);
+}
+
+function isKatakana(charCode) {
+ return (charCode >= 0x30A0 && charCode <= 0x30FF);
+}
+
+function isHiragana(charCode) {
+ return (charCode >= 0x3040 && charCode <= 0x309F);
+}
+
+function isHalfwidthKatakana(charCode) {
+ return (charCode >= 0xFF60 && charCode <= 0xFF9F);
+}
+
+function isThai(charCode) {
+ return (charCode & 0xFF80) === 0x0E00;
+}
+
+/**
+ * This function is based on the word-break detection implemented in:
+ * https://hg.mozilla.org/mozilla-central/file/tip/intl/lwbrk/WordBreaker.cpp
+ */
+function getCharacterType(charCode) {
+ if (isAlphabeticalScript(charCode)) {
+ if (isAscii(charCode)) {
+ if (isAsciiSpace(charCode)) {
+ return CharacterType.SPACE;
+ } else if (isAsciiAlpha(charCode) || isAsciiDigit(charCode) ||
+ charCode === /* UNDERSCORE = */ 0x5F) {
+ return CharacterType.ALPHA_LETTER;
+ }
+ return CharacterType.PUNCT;
+ } else if (isThai(charCode)) {
+ return CharacterType.THAI_LETTER;
+ } else if (charCode === /* NBSP = */ 0xA0) {
+ return CharacterType.SPACE;
+ }
+ return CharacterType.ALPHA_LETTER;
+ }
+
+ if (isHan(charCode)) {
+ return CharacterType.HAN_LETTER;
+ } else if (isKatakana(charCode)) {
+ return CharacterType.KATAKANA_LETTER;
+ } else if (isHiragana(charCode)) {
+ return CharacterType.HIRAGANA_LETTER;
+ } else if (isHalfwidthKatakana(charCode)) {
+ return CharacterType.HALFWIDTH_KATAKANA_LETTER;
+ }
+ return CharacterType.ALPHA_LETTER;
+}
+
+export {
+ CharacterType,
+ getCharacterType,
+};
diff --git a/web/viewer.html b/web/viewer.html
index 8f1b9e1f49c44e..f5425ccdbdb489 100644
--- a/web/viewer.html
+++ b/web/viewer.html
@@ -109,6 +109,8 @@
+
+
diff --git a/web/viewer.js b/web/viewer.js
index 7a9ae61c0f6b2a..113b2799eec935 100644
--- a/web/viewer.js
+++ b/web/viewer.js
@@ -134,6 +134,7 @@ function getViewerConfiguration() {
findField: document.getElementById('findInput'),
highlightAllCheckbox: document.getElementById('findHighlightAll'),
caseSensitiveCheckbox: document.getElementById('findMatchCase'),
+ entireWordCheckbox: document.getElementById('findEntireWord'),
findMsg: document.getElementById('findMsg'),
findResultsCount: document.getElementById('findResultsCount'),
findStatusIcon: document.getElementById('findStatusIcon'),