diff --git a/l10n/en-US/viewer.properties b/l10n/en-US/viewer.properties index 3929482459c42b..85b5f80e90034c 100644 --- a/l10n/en-US/viewer.properties +++ b/l10n/en-US/viewer.properties @@ -165,6 +165,7 @@ find_next.title=Find the next occurrence of the phrase find_next_label=Next find_highlight=Highlight all find_match_case_label=Match case +find_entire_word_label=Whole words find_reached_top=Reached top of document, continued from bottom find_reached_bottom=Reached end of document, continued from top find_not_found=Phrase not found diff --git a/l10n/sv-SE/viewer.properties b/l10n/sv-SE/viewer.properties index 7e5c685af6ae4f..5f60c5332d3377 100644 --- a/l10n/sv-SE/viewer.properties +++ b/l10n/sv-SE/viewer.properties @@ -165,6 +165,7 @@ find_next.title=Hitta nästa förekomst av frasen find_next_label=Nästa find_highlight=Markera alla find_match_case_label=Matcha versal/gemen +find_entire_word_label=Hela ord find_reached_top=Nådde början av dokumentet, började från slutet find_reached_bottom=Nådde slutet på dokumentet, började från början find_not_found=Frasen hittades inte diff --git a/web/app.js b/web/app.js index f3d0635007775c..a4e8481ec18bab 100644 --- a/web/app.js +++ b/web/app.js @@ -1971,6 +1971,7 @@ function webViewerFind(evt) { query: evt.query, phraseSearch: evt.phraseSearch, caseSensitive: evt.caseSensitive, + entireWord: evt.entireWord, highlightAll: evt.highlightAll, findPrevious: evt.findPrevious, }); @@ -1981,6 +1982,7 @@ function webViewerFindFromUrlHash(evt) { query: evt.query, phraseSearch: evt.phraseSearch, caseSensitive: false, + entireWord: false, highlightAll: true, findPrevious: false, }); @@ -2117,6 +2119,7 @@ function webViewerKeyDown(evt) { query: findState.query, phraseSearch: findState.phraseSearch, caseSensitive: findState.caseSensitive, + entireWord: findState.entireWord, highlightAll: findState.highlightAll, findPrevious: cmd === 5 || cmd === 12, }); diff --git a/web/firefoxcom.js b/web/firefoxcom.js index 99026e9faac078..3f893368f22c8b 100644 --- a/web/firefoxcom.js +++ b/web/firefoxcom.js @@ -168,7 +168,8 @@ class MozL10n { 'find', 'findagain', 'findhighlightallchange', - 'findcasesensitivitychange' + 'findcasesensitivitychange', + 'findentirewordchange', ]; let handleEvent = function(evt) { if (!PDFViewerApplication.initialized) { @@ -180,13 +181,14 @@ class MozL10n { query: evt.detail.query, phraseSearch: true, caseSensitive: !!evt.detail.caseSensitive, + entireWord: !!evt.detail.entireWord, highlightAll: !!evt.detail.highlightAll, findPrevious: !!evt.detail.findPrevious, }); }; - for (let i = 0, len = events.length; i < len; i++) { - window.addEventListener(events[i], handleEvent); + for (let event of events) { + window.addEventListener(event, handleEvent); } })(); diff --git a/web/pdf_find_bar.js b/web/pdf_find_bar.js index cefd50017e906c..8a4d94aeb973f2 100644 --- a/web/pdf_find_bar.js +++ b/web/pdf_find_bar.js @@ -31,6 +31,7 @@ class PDFFindBar { this.findField = options.findField || null; this.highlightAll = options.highlightAllCheckbox || null; this.caseSensitive = options.caseSensitiveCheckbox || null; + this.entireWord = options.entireWordCheckbox || null; this.findMsg = options.findMsg || null; this.findResultsCount = options.findResultsCount || null; this.findStatusIcon = options.findStatusIcon || null; @@ -83,6 +84,10 @@ class PDFFindBar { this.dispatchEvent('casesensitivitychange'); }); + this.entireWord.addEventListener('click', () => { + this.dispatchEvent('entirewordchange'); + }); + this.eventBus.on('resize', this._adjustWidth.bind(this)); } @@ -95,8 +100,9 @@ class PDFFindBar { source: this, type, query: this.findField.value, - caseSensitive: this.caseSensitive.checked, phraseSearch: true, + caseSensitive: this.caseSensitive.checked, + entireWord: this.entireWord.checked, highlightAll: this.highlightAll.checked, findPrevious: findPrev, }); diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index d6810dfb1499d3..8ebf6833ce813f 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -14,6 +14,7 @@ */ import { createPromiseCapability } from 'pdfjs-lib'; +import { getCharacterType } from './pdf_find_utils'; import { getGlobalEventBus } from './dom_events'; import { scrollIntoView } from './ui_utils'; @@ -190,7 +191,30 @@ class PDFFindController { } } - _calculatePhraseMatch(query, pageIndex, pageContent) { + /** + * Determine if the search query constitutes a "whole word", by comparing the + * first/last character types with the preceding/following character types. + */ + _isEntireWord(content, startIdx, length) { + if (startIdx > 0) { + const first = content.charCodeAt(startIdx); + const limit = content.charCodeAt(startIdx - 1); + if (getCharacterType(first) === getCharacterType(limit)) { + return false; + } + } + const endIdx = (startIdx + length - 1); + if (endIdx < (content.length - 1)) { + const last = content.charCodeAt(endIdx); + const limit = content.charCodeAt(endIdx + 1); + if (getCharacterType(last) === getCharacterType(limit)) { + return false; + } + } + return true; + } + + _calculatePhraseMatch(query, pageIndex, pageContent, entireWord) { let matches = []; let queryLen = query.length; let matchIdx = -queryLen; @@ -199,12 +223,15 @@ class PDFFindController { if (matchIdx === -1) { break; } + if (entireWord && !this._isEntireWord(pageContent, matchIdx, queryLen)) { + continue; + } matches.push(matchIdx); } this.pageMatches[pageIndex] = matches; } - _calculateWordMatch(query, pageIndex, pageContent) { + _calculateWordMatch(query, pageIndex, pageContent, entireWord) { let matchesWithLength = []; // Divide the query into pieces and search for text in each piece. let queryArray = query.match(/\S+/g); @@ -217,6 +244,10 @@ class PDFFindController { if (matchIdx === -1) { break; } + if (entireWord && + !this._isEntireWord(pageContent, matchIdx, subqueryLen)) { + continue; + } // Other searches do not, so we store the length. matchesWithLength.push({ match: matchIdx, @@ -244,6 +275,7 @@ class PDFFindController { let query = this._normalize(this.state.query); let caseSensitive = this.state.caseSensitive; let phraseSearch = this.state.phraseSearch; + const entireWord = this.state.entireWord; let queryLen = query.length; if (queryLen === 0) { @@ -257,9 +289,9 @@ class PDFFindController { } if (phraseSearch) { - this._calculatePhraseMatch(query, pageIndex, pageContent); + this._calculatePhraseMatch(query, pageIndex, pageContent, entireWord); } else { - this._calculateWordMatch(query, pageIndex, pageContent); + this._calculateWordMatch(query, pageIndex, pageContent, entireWord); } this._updatePage(pageIndex); diff --git a/web/pdf_find_utils.js b/web/pdf_find_utils.js new file mode 100644 index 00000000000000..a4a3a4871760f7 --- /dev/null +++ b/web/pdf_find_utils.js @@ -0,0 +1,107 @@ +/* Copyright 2018 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +const CharacterType = { + SPACE: 0, + ALPHA_LETTER: 1, + PUNCT: 2, + HAN_LETTER: 3, + KATAKANA_LETTER: 4, + HIRAGANA_LETTER: 5, + HALFWIDTH_KATAKANA_LETTER: 6, + THAI_LETTER: 7, +}; + +function isAlphabeticalScript(charCode) { + return charCode < 0x2E80; +} + +function isAscii(charCode) { + return (charCode & 0xFF80) === 0; +} + +function isAsciiAlpha(charCode) { + return (charCode >= /* a = */ 0x61 && charCode <= /* z = */ 0x7A) || + (charCode >= /* A = */ 0x41 && charCode <= /* Z = */ 0x5A); +} + +function isAsciiDigit(charCode) { + return (charCode >= /* 0 = */ 0x30 && charCode <= /* 9 = */ 0x39); +} + +function isAsciiSpace(charCode) { + return (charCode === /* SPACE = */ 0x20 || charCode === /* TAB = */ 0x09 || + charCode === /* CR = */ 0x0D || charCode === /* LF = */ 0x0A); +} + +function isHan(charCode) { + return (charCode >= 0x3400 && charCode <= 0x9FFFF) || + (charCode >= 0xF900 && charCode <= 0xFAFF); +} + +function isKatakana(charCode) { + return (charCode >= 0x30A0 && charCode <= 0x30FF); +} + +function isHiragana(charCode) { + return (charCode >= 0x3040 && charCode <= 0x309F); +} + +function isHalfwidthKatakana(charCode) { + return (charCode >= 0xFF60 && charCode <= 0xFF9F); +} + +function isThai(charCode) { + return (charCode & 0xFF80) === 0x0E00; +} + +/** + * This function is based on the word-break detection implemented in: + * https://hg.mozilla.org/mozilla-central/file/tip/intl/lwbrk/WordBreaker.cpp + */ +function getCharacterType(charCode) { + if (isAlphabeticalScript(charCode)) { + if (isAscii(charCode)) { + if (isAsciiSpace(charCode)) { + return CharacterType.SPACE; + } else if (isAsciiAlpha(charCode) || isAsciiDigit(charCode) || + charCode === /* UNDERSCORE = */ 0x5F) { + return CharacterType.ALPHA_LETTER; + } + return CharacterType.PUNCT; + } else if (isThai(charCode)) { + return CharacterType.THAI_LETTER; + } else if (charCode === /* NBSP = */ 0xA0) { + return CharacterType.SPACE; + } + return CharacterType.ALPHA_LETTER; + } + + if (isHan(charCode)) { + return CharacterType.HAN_LETTER; + } else if (isKatakana(charCode)) { + return CharacterType.KATAKANA_LETTER; + } else if (isHiragana(charCode)) { + return CharacterType.HIRAGANA_LETTER; + } else if (isHalfwidthKatakana(charCode)) { + return CharacterType.HALFWIDTH_KATAKANA_LETTER; + } + return CharacterType.ALPHA_LETTER; +} + +export { + CharacterType, + getCharacterType, +}; diff --git a/web/viewer.html b/web/viewer.html index 8f1b9e1f49c44e..f5425ccdbdb489 100644 --- a/web/viewer.html +++ b/web/viewer.html @@ -109,6 +109,8 @@ + + diff --git a/web/viewer.js b/web/viewer.js index 7a9ae61c0f6b2a..113b2799eec935 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -134,6 +134,7 @@ function getViewerConfiguration() { findField: document.getElementById('findInput'), highlightAllCheckbox: document.getElementById('findHighlightAll'), caseSensitiveCheckbox: document.getElementById('findMatchCase'), + entireWordCheckbox: document.getElementById('findEntireWord'), findMsg: document.getElementById('findMsg'), findResultsCount: document.getElementById('findResultsCount'), findStatusIcon: document.getElementById('findStatusIcon'),