Skip to content

Commit

Permalink
Merge pull request #10028 from Snuffleupagus/entireWord
Browse files Browse the repository at this point in the history
Add initial support for "Whole words" searching in the viewer
  • Loading branch information
timvandermeij authored Sep 10, 2018
2 parents 53c37d3 + b4edcce commit bc5111d
Show file tree
Hide file tree
Showing 13 changed files with 226 additions and 12 deletions.
1 change: 1 addition & 0 deletions l10n/en-US/viewer.properties
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ find_next.title=Find the next occurrence of the phrase
find_next_label=Next
find_highlight=Highlight all
find_match_case_label=Match case
find_entire_word_label=Whole words
find_reached_top=Reached top of document, continued from bottom
find_reached_bottom=Reached end of document, continued from top
# LOCALIZATION NOTE (find_matches_count): "{{current}}" and "{{total}}" will be
Expand Down
1 change: 1 addition & 0 deletions l10n/nl/viewer.properties
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ find_next.title=De volgende overeenkomst van de tekst zoeken
find_next_label=Volgende
find_highlight=Alles markeren
find_match_case_label=Hoofdlettergevoelig
find_entire_word_label=Hele woorden
find_reached_top=Bovenkant van document bereikt, doorgegaan vanaf onderkant
find_reached_bottom=Onderkant van document bereikt, doorgegaan vanaf bovenkant
# LOCALIZATION NOTE (find_matches_count): "{{current}}" and "{{total}}" will be
Expand Down
1 change: 1 addition & 0 deletions l10n/sv-SE/viewer.properties
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ find_next.title=Hitta nästa förekomst av frasen
find_next_label=Nästa
find_highlight=Markera alla
find_match_case_label=Matcha versal/gemen
find_entire_word_label=Hela ord
find_reached_top=Nådde början av dokumentet, började från slutet
find_reached_bottom=Nådde slutet på dokumentet, började från början
# LOCALIZATION NOTE (find_matches_count): "{{current}}" and "{{total}}" will be
Expand Down
1 change: 1 addition & 0 deletions test/unit/clitests.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"network_utils_spec.js",
"node_stream_spec.js",
"parser_spec.js",
"pdf_find_utils.js",
"pdf_history.js",
"primitives_spec.js",
"stream_spec.js",
Expand Down
1 change: 1 addition & 0 deletions test/unit/jasmine-boot.js
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ function initializePDFJS(callback) {
'pdfjs-test/unit/network_spec',
'pdfjs-test/unit/network_utils_spec',
'pdfjs-test/unit/parser_spec',
'pdfjs-test/unit/pdf_find_utils_spec',
'pdfjs-test/unit/pdf_history_spec',
'pdfjs-test/unit/primitives_spec',
'pdfjs-test/unit/stream_spec',
Expand Down
56 changes: 56 additions & 0 deletions test/unit/pdf_find_utils_spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/* Copyright 2018 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import { CharacterType, getCharacterType } from '../../web/pdf_find_utils';

describe('pdf_find_utils', function() {
describe('getCharacterType', function() {
it('gets expected character types', function() {
const characters = {
'A': CharacterType.ALPHA_LETTER,
'a': CharacterType.ALPHA_LETTER,
'0': CharacterType.ALPHA_LETTER,
'5': CharacterType.ALPHA_LETTER,
'\xC4': CharacterType.ALPHA_LETTER, // 'Ä'
'\xE4': CharacterType.ALPHA_LETTER, // 'ä'
'_': CharacterType.ALPHA_LETTER,
' ': CharacterType.SPACE,
'\t': CharacterType.SPACE,
'\r': CharacterType.SPACE,
'\n': CharacterType.SPACE,
'\xA0': CharacterType.SPACE,
'-': CharacterType.PUNCT,
',': CharacterType.PUNCT,
'.': CharacterType.PUNCT,
';': CharacterType.PUNCT,
':': CharacterType.PUNCT,
'\u2122': CharacterType.ALPHA_LETTER, // trademark
'\u0E25': CharacterType.THAI_LETTER,
'\u4000': CharacterType.HAN_LETTER,
'\uF950': CharacterType.HAN_LETTER,
'\u30C0': CharacterType.KATAKANA_LETTER,
'\u3050': CharacterType.HIRAGANA_LETTER,
'\uFF80': CharacterType.HALFWIDTH_KATAKANA_LETTER,
};

for (const character in characters) {
const charCode = character.charCodeAt(0);
const type = characters[character];

expect(getCharacterType(charCode)).toEqual(type);
}
});
});
});
3 changes: 3 additions & 0 deletions web/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -1959,6 +1959,7 @@ function webViewerFind(evt) {
query: evt.query,
phraseSearch: evt.phraseSearch,
caseSensitive: evt.caseSensitive,
entireWord: evt.entireWord,
highlightAll: evt.highlightAll,
findPrevious: evt.findPrevious,
});
Expand All @@ -1969,6 +1970,7 @@ function webViewerFindFromUrlHash(evt) {
query: evt.query,
phraseSearch: evt.phraseSearch,
caseSensitive: false,
entireWord: false,
highlightAll: true,
findPrevious: false,
});
Expand Down Expand Up @@ -2105,6 +2107,7 @@ function webViewerKeyDown(evt) {
query: findState.query,
phraseSearch: findState.phraseSearch,
caseSensitive: findState.caseSensitive,
entireWord: findState.entireWord,
highlightAll: findState.highlightAll,
findPrevious: cmd === 5 || cmd === 12,
});
Expand Down
8 changes: 5 additions & 3 deletions web/firefoxcom.js
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ class MozL10n {
'find',
'findagain',
'findhighlightallchange',
'findcasesensitivitychange'
'findcasesensitivitychange',
'findentirewordchange',
];
let handleEvent = function(evt) {
if (!PDFViewerApplication.initialized) {
Expand All @@ -179,13 +180,14 @@ class MozL10n {
query: evt.detail.query,
phraseSearch: true,
caseSensitive: !!evt.detail.caseSensitive,
entireWord: !!evt.detail.entireWord,
highlightAll: !!evt.detail.highlightAll,
findPrevious: !!evt.detail.findPrevious,
});
};

for (let i = 0, len = events.length; i < len; i++) {
window.addEventListener(events[i], handleEvent);
for (let event of events) {
window.addEventListener(event, handleEvent);
}
})();

Expand Down
9 changes: 7 additions & 2 deletions web/pdf_find_bar.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ class PDFFindBar {
this.findField = options.findField || null;
this.highlightAll = options.highlightAllCheckbox || null;
this.caseSensitive = options.caseSensitiveCheckbox || null;
this.entireWord = options.entireWordCheckbox || null;
this.findMsg = options.findMsg || null;
this.findResultsCount = options.findResultsCount || null;
this.findStatusIcon = options.findStatusIcon || null;
this.findPreviousButton = options.findPreviousButton || null;
this.findNextButton = options.findNextButton || null;
this.findController = options.findController || null;
Expand Down Expand Up @@ -85,6 +85,10 @@ class PDFFindBar {
this.dispatchEvent('casesensitivitychange');
});

this.entireWord.addEventListener('click', () => {
this.dispatchEvent('entirewordchange');
});

this.eventBus.on('resize', this._adjustWidth.bind(this));
}

Expand All @@ -97,8 +101,9 @@ class PDFFindBar {
source: this,
type,
query: this.findField.value,
caseSensitive: this.caseSensitive.checked,
phraseSearch: true,
caseSensitive: this.caseSensitive.checked,
entireWord: this.entireWord.checked,
highlightAll: this.highlightAll.checked,
findPrevious: findPrev,
});
Expand Down
40 changes: 36 additions & 4 deletions web/pdf_find_controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
*/

import { createPromiseCapability } from 'pdfjs-lib';
import { getCharacterType } from './pdf_find_utils';
import { getGlobalEventBus } from './dom_events';
import { scrollIntoView } from './ui_utils';

Expand Down Expand Up @@ -190,7 +191,30 @@ class PDFFindController {
}
}

_calculatePhraseMatch(query, pageIndex, pageContent) {
/**
* Determine if the search query constitutes a "whole word", by comparing the
* first/last character type with the preceding/following character type.
*/
_isEntireWord(content, startIdx, length) {
if (startIdx > 0) {
const first = content.charCodeAt(startIdx);
const limit = content.charCodeAt(startIdx - 1);
if (getCharacterType(first) === getCharacterType(limit)) {
return false;
}
}
const endIdx = (startIdx + length - 1);
if (endIdx < (content.length - 1)) {
const last = content.charCodeAt(endIdx);
const limit = content.charCodeAt(endIdx + 1);
if (getCharacterType(last) === getCharacterType(limit)) {
return false;
}
}
return true;
}

_calculatePhraseMatch(query, pageIndex, pageContent, entireWord) {
let matches = [];
let queryLen = query.length;
let matchIdx = -queryLen;
Expand All @@ -199,12 +223,15 @@ class PDFFindController {
if (matchIdx === -1) {
break;
}
if (entireWord && !this._isEntireWord(pageContent, matchIdx, queryLen)) {
continue;
}
matches.push(matchIdx);
}
this.pageMatches[pageIndex] = matches;
}

_calculateWordMatch(query, pageIndex, pageContent) {
_calculateWordMatch(query, pageIndex, pageContent, entireWord) {
let matchesWithLength = [];
// Divide the query into pieces and search for text in each piece.
let queryArray = query.match(/\S+/g);
Expand All @@ -217,6 +244,10 @@ class PDFFindController {
if (matchIdx === -1) {
break;
}
if (entireWord &&
!this._isEntireWord(pageContent, matchIdx, subqueryLen)) {
continue;
}
// Other searches do not, so we store the length.
matchesWithLength.push({
match: matchIdx,
Expand Down Expand Up @@ -244,6 +275,7 @@ class PDFFindController {
let query = this._normalize(this.state.query);
let caseSensitive = this.state.caseSensitive;
let phraseSearch = this.state.phraseSearch;
const entireWord = this.state.entireWord;
let queryLen = query.length;

if (queryLen === 0) {
Expand All @@ -257,9 +289,9 @@ class PDFFindController {
}

if (phraseSearch) {
this._calculatePhraseMatch(query, pageIndex, pageContent);
this._calculatePhraseMatch(query, pageIndex, pageContent, entireWord);
} else {
this._calculateWordMatch(query, pageIndex, pageContent);
this._calculateWordMatch(query, pageIndex, pageContent, entireWord);
}

this._updatePage(pageIndex);
Expand Down
107 changes: 107 additions & 0 deletions web/pdf_find_utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/* Copyright 2018 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

const CharacterType = {
SPACE: 0,
ALPHA_LETTER: 1,
PUNCT: 2,
HAN_LETTER: 3,
KATAKANA_LETTER: 4,
HIRAGANA_LETTER: 5,
HALFWIDTH_KATAKANA_LETTER: 6,
THAI_LETTER: 7,
};

function isAlphabeticalScript(charCode) {
return charCode < 0x2E80;
}

function isAscii(charCode) {
return (charCode & 0xFF80) === 0;
}

function isAsciiAlpha(charCode) {
return (charCode >= /* a = */ 0x61 && charCode <= /* z = */ 0x7A) ||
(charCode >= /* A = */ 0x41 && charCode <= /* Z = */ 0x5A);
}

function isAsciiDigit(charCode) {
return (charCode >= /* 0 = */ 0x30 && charCode <= /* 9 = */ 0x39);
}

function isAsciiSpace(charCode) {
return (charCode === /* SPACE = */ 0x20 || charCode === /* TAB = */ 0x09 ||
charCode === /* CR = */ 0x0D || charCode === /* LF = */ 0x0A);
}

function isHan(charCode) {
return (charCode >= 0x3400 && charCode <= 0x9FFF) ||
(charCode >= 0xF900 && charCode <= 0xFAFF);
}

function isKatakana(charCode) {
return (charCode >= 0x30A0 && charCode <= 0x30FF);
}

function isHiragana(charCode) {
return (charCode >= 0x3040 && charCode <= 0x309F);
}

function isHalfwidthKatakana(charCode) {
return (charCode >= 0xFF60 && charCode <= 0xFF9F);
}

function isThai(charCode) {
return (charCode & 0xFF80) === 0x0E00;
}

/**
* This function is based on the word-break detection implemented in:
* https://hg.mozilla.org/mozilla-central/file/tip/intl/lwbrk/WordBreaker.cpp
*/
function getCharacterType(charCode) {
if (isAlphabeticalScript(charCode)) {
if (isAscii(charCode)) {
if (isAsciiSpace(charCode)) {
return CharacterType.SPACE;
} else if (isAsciiAlpha(charCode) || isAsciiDigit(charCode) ||
charCode === /* UNDERSCORE = */ 0x5F) {
return CharacterType.ALPHA_LETTER;
}
return CharacterType.PUNCT;
} else if (isThai(charCode)) {
return CharacterType.THAI_LETTER;
} else if (charCode === /* NBSP = */ 0xA0) {
return CharacterType.SPACE;
}
return CharacterType.ALPHA_LETTER;
}

if (isHan(charCode)) {
return CharacterType.HAN_LETTER;
} else if (isKatakana(charCode)) {
return CharacterType.KATAKANA_LETTER;
} else if (isHiragana(charCode)) {
return CharacterType.HIRAGANA_LETTER;
} else if (isHalfwidthKatakana(charCode)) {
return CharacterType.HALFWIDTH_KATAKANA_LETTER;
}
return CharacterType.ALPHA_LETTER;
}

export {
CharacterType,
getCharacterType,
};
8 changes: 6 additions & 2 deletions web/viewer.html
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,19 @@
</div>
</div>

<div id="findbarOptionsContainer">
<div id="findbarOptionsOneContainer">
<input type="checkbox" id="findHighlightAll" class="toolbarField" tabindex="94">
<label for="findHighlightAll" class="toolbarLabel" data-l10n-id="find_highlight">Highlight all</label>
<input type="checkbox" id="findMatchCase" class="toolbarField" tabindex="95">
<label for="findMatchCase" class="toolbarLabel" data-l10n-id="find_match_case_label">Match case</label>
</div>
<div id="findbarOptionsTwoContainer">
<input type="checkbox" id="findEntireWord" class="toolbarField" tabindex="96">
<label for="findEntireWord" class="toolbarLabel" data-l10n-id="find_entire_word_label">Whole words</label>
<span id="findResultsCount" class="toolbarLabel hidden"></span>
</div>

<div id="findbarMessageContainer">
<span id="findResultsCount" class="toolbarLabel hidden"></span>
<span id="findMsg" class="toolbarLabel"></span>
</div>
</div> <!-- findbar -->
Expand Down
Loading

0 comments on commit bc5111d

Please sign in to comment.