diff --git a/l10n/en-US/viewer.properties b/l10n/en-US/viewer.properties index 5fe094b769b286..e02857c1157980 100644 --- a/l10n/en-US/viewer.properties +++ b/l10n/en-US/viewer.properties @@ -168,6 +168,7 @@ find_next.title=Find the next occurrence of the phrase find_next_label=Next find_highlight=Highlight all find_match_case_label=Match case +find_match_diacritics_label=Match Diacritics find_entire_word_label=Whole words find_reached_top=Reached top of document, continued from bottom find_reached_bottom=Reached end of document, continued from top diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 6915b460064f51..34ec9a4aaedb85 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -321,6 +321,7 @@ !issue4650.pdf !issue6721_reduced.pdf !issue3025.pdf +!french_diacritics.pdf !issue2099-1.pdf !issue3371.pdf !issue2956.pdf diff --git a/test/pdfs/french_diacritics.pdf b/test/pdfs/french_diacritics.pdf new file mode 100644 index 00000000000000..ba5b5cb07e2bde Binary files /dev/null and b/test/pdfs/french_diacritics.pdf differ diff --git a/test/unit/clitests.json b/test/unit/clitests.json index 00c16be26b3b0a..efd7b8d03d123f 100644 --- a/test/unit/clitests.json +++ b/test/unit/clitests.json @@ -30,7 +30,6 @@ "node_stream_spec.js", "parser_spec.js", "pdf_find_controller_spec.js", - "pdf_find_utils_spec.js", "pdf_history_spec.js", "primitives_spec.js", "stream_spec.js", diff --git a/test/unit/jasmine-boot.js b/test/unit/jasmine-boot.js index 022ec220ffccfd..8bdfdd1689efea 100644 --- a/test/unit/jasmine-boot.js +++ b/test/unit/jasmine-boot.js @@ -75,7 +75,6 @@ async function initializePDFJS(callback) { "pdfjs-test/unit/network_utils_spec.js", "pdfjs-test/unit/parser_spec.js", "pdfjs-test/unit/pdf_find_controller_spec.js", - "pdfjs-test/unit/pdf_find_utils_spec.js", "pdfjs-test/unit/pdf_history_spec.js", "pdfjs-test/unit/primitives_spec.js", "pdfjs-test/unit/scripting_spec.js", diff --git a/test/unit/pdf_find_controller_spec.js b/test/unit/pdf_find_controller_spec.js index 1b97f47e48eda9..9ba0426d894ef4 100644 --- a/test/unit/pdf_find_controller_spec.js +++ b/test/unit/pdf_find_controller_spec.js @@ -108,7 +108,6 @@ function testSearch({ return; } eventBus.off("updatefindmatchescount", onUpdateFindMatchesCount); - expect(evt.matchesCount.total).toBe(totalMatches); for (let i = 0; i < totalPages; i++) { expect(pdfFindController.pageMatches[i].length).toEqual( @@ -271,5 +270,130 @@ describe("pdf_find_controller", function () { pageMatches: [[19, 48, 66]], pageMatchesLength: [[8, 8, 8]], }); + + await testSearch({ + eventBus, + pdfFindController, + parameters: { + query: "1/2", + caseSensitive: false, + entireWord: false, + phraseSearch: true, + findPrevious: false, + }, + matchesPerPage: [2], + selectedMatch: { + pageIndex: 0, + matchIndex: 0, + }, + pageMatches: [[28, 57]], + pageMatchesLength: [[1, 1]], + }); + + await testSearch({ + eventBus, + pdfFindController, + parameters: { + query: "½", + caseSensitive: false, + entireWord: false, + phraseSearch: true, + findPrevious: false, + }, + matchesPerPage: [2], + selectedMatch: { + pageIndex: 0, + matchIndex: 0, + }, + pageMatches: [[28, 57]], + pageMatchesLength: [[1, 1]], + }); + }); + + it("performs a normal search, where the text with diacritics is normalized", async function () { + const { eventBus, pdfFindController } = await initPdfFindController( + "french_diacritics.pdf" + ); + + await testSearch({ + eventBus, + pdfFindController, + parameters: { + query: "a", + caseSensitive: false, + entireWord: false, + phraseSearch: true, + findPrevious: false, + matchDiacritics: false, + }, + matchesPerPage: [6], + selectedMatch: { + pageIndex: 0, + matchIndex: 0, + }, + pageMatches: [[0, 2, 4, 6, 8, 10]], + pageMatchesLength: [[1, 1, 1, 1, 1, 1]], + }); + + await testSearch({ + eventBus, + pdfFindController, + parameters: { + query: "u", + caseSensitive: false, + entireWord: false, + phraseSearch: true, + findPrevious: false, + matchDiacritics: false, + }, + matchesPerPage: [6], + selectedMatch: { + pageIndex: 0, + matchIndex: 0, + }, + pageMatches: [[44, 46, 48, 50, 52, 54]], + pageMatchesLength: [[1, 1, 1, 1, 1, 1]], + }); + + await testSearch({ + eventBus, + pdfFindController, + parameters: { + query: "ë", + caseSensitive: false, + entireWord: false, + phraseSearch: true, + findPrevious: false, + matchDiacritics: true, + }, + matchesPerPage: [2], + selectedMatch: { + pageIndex: 0, + matchIndex: 0, + }, + pageMatches: [[28, 30]], + pageMatchesLength: [[1, 1]], + }); + }); + + it("performs a search where one of the results contains an hyphen", async function () { + const { eventBus, pdfFindController } = await initPdfFindController(); + + await testSearch({ + eventBus, + pdfFindController, + parameters: { + query: "optimiz", + caseSensitive: false, + entireWord: false, + phraseSearch: true, + findPrevious: false, + }, + matchesPerPage: [1, 4, 2, 3, 3, 0, 2, 9, 1, 0, 0, 6, 3, 4], + selectedMatch: { + pageIndex: 0, + matchIndex: 0, + }, + }); }); }); diff --git a/test/unit/pdf_find_utils_spec.js b/test/unit/pdf_find_utils_spec.js deleted file mode 100644 index d108a5cc2708e0..00000000000000 --- a/test/unit/pdf_find_utils_spec.js +++ /dev/null @@ -1,56 +0,0 @@ -/* Copyright 2018 Mozilla Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import { CharacterType, getCharacterType } from "../../web/pdf_find_utils.js"; - -describe("pdf_find_utils", function () { - describe("getCharacterType", function () { - it("gets expected character types", function () { - const characters = { - A: CharacterType.ALPHA_LETTER, - a: CharacterType.ALPHA_LETTER, - 0: CharacterType.ALPHA_LETTER, - 5: CharacterType.ALPHA_LETTER, - "\xC4": CharacterType.ALPHA_LETTER, // "Ä" - "\xE4": CharacterType.ALPHA_LETTER, // "ä" - _: CharacterType.ALPHA_LETTER, - " ": CharacterType.SPACE, - "\t": CharacterType.SPACE, - "\r": CharacterType.SPACE, - "\n": CharacterType.SPACE, - "\xA0": CharacterType.SPACE, // nbsp - "-": CharacterType.PUNCT, - ",": CharacterType.PUNCT, - ".": CharacterType.PUNCT, - ";": CharacterType.PUNCT, - ":": CharacterType.PUNCT, - "\u2122": CharacterType.ALPHA_LETTER, // trademark - "\u0E25": CharacterType.THAI_LETTER, - "\u4000": CharacterType.HAN_LETTER, - "\uF950": CharacterType.HAN_LETTER, - "\u30C0": CharacterType.KATAKANA_LETTER, - "\u3050": CharacterType.HIRAGANA_LETTER, - "\uFF80": CharacterType.HALFWIDTH_KATAKANA_LETTER, - }; - - for (const character in characters) { - const charCode = character.charCodeAt(0); - const type = characters[character]; - - expect(getCharacterType(charCode)).toEqual(type); - } - }); - }); -}); diff --git a/web/app.js b/web/app.js index da46c330c7faf3..7d9b4db8cec3e7 100644 --- a/web/app.js +++ b/web/app.js @@ -2608,6 +2608,7 @@ function webViewerFind(evt) { entireWord: evt.entireWord, highlightAll: evt.highlightAll, findPrevious: evt.findPrevious, + matchDiacritics: evt.matchDiacritics, }); } @@ -2619,6 +2620,7 @@ function webViewerFindFromUrlHash(evt) { entireWord: false, highlightAll: true, findPrevious: false, + matchDiacritics: true, }); } @@ -2825,6 +2827,7 @@ function webViewerKeyDown(evt) { entireWord: findState.entireWord, highlightAll: findState.highlightAll, findPrevious: cmd === 5 || cmd === 12, + matchDiacritics: findState.matchDiacritics, }); } handled = true; diff --git a/web/firefoxcom.js b/web/firefoxcom.js index 129190a747851c..a98d093177b65f 100644 --- a/web/firefoxcom.js +++ b/web/firefoxcom.js @@ -218,6 +218,7 @@ class MozL10n { "findcasesensitivitychange", "findentirewordchange", "findbarclose", + "finddiacriticmatchingchange", ]; const handleEvent = function ({ type, detail }) { if (!PDFViewerApplication.initialized) { @@ -236,6 +237,7 @@ class MozL10n { entireWord: !!detail.entireWord, highlightAll: !!detail.highlightAll, findPrevious: !!detail.findPrevious, + matchDiacritics: !!detail.matchDiacritics, }); }; diff --git a/web/pdf_find_bar.js b/web/pdf_find_bar.js index 3388711d935fd7..cd00f8d438b245 100644 --- a/web/pdf_find_bar.js +++ b/web/pdf_find_bar.js @@ -33,6 +33,7 @@ class PDFFindBar { this.highlightAll = options.highlightAllCheckbox; this.caseSensitive = options.caseSensitiveCheckbox; this.entireWord = options.entireWordCheckbox; + this.matchDiacritics = options.matchDiacriticsCheckbox; this.findMsg = options.findMsg; this.findResultsCount = options.findResultsCount; this.findPreviousButton = options.findPreviousButton; @@ -82,6 +83,10 @@ class PDFFindBar { this.dispatchEvent("entirewordchange"); }); + this.matchDiacritics.addEventListener("click", () => { + this.dispatchEvent("diacriticmatchingchange"); + }); + this.eventBus._on("resize", this._adjustWidth.bind(this)); } @@ -99,6 +104,7 @@ class PDFFindBar { entireWord: this.entireWord.checked, highlightAll: this.highlightAll.checked, findPrevious: findPrev, + matchDiacritics: this.matchDiacritics.checked, }); } diff --git a/web/pdf_find_controller.js b/web/pdf_find_controller.js index 7eeb01e65e8658..6e8eb759beb92b 100644 --- a/web/pdf_find_controller.js +++ b/web/pdf_find_controller.js @@ -13,9 +13,8 @@ * limitations under the License. */ +import { binarySearchFirstItem, scrollIntoView } from "./ui_utils.js"; import { createPromiseCapability } from "pdfjs-lib"; -import { getCharacterType } from "./pdf_find_utils.js"; -import { scrollIntoView } from "./ui_utils.js"; const FindState = { FOUND: 0, @@ -42,47 +41,136 @@ const CHARACTERS_TO_NORMALIZE = { "\u00BE": "3/4", // Vulgar fraction three quarters }; +const diacriticsRegExp = /\p{Mn}+/gu; +const escapeRegExp = /[.*+\-?^${}()|[\]\\]/g; + let normalizationRegex = null; function normalize(text) { + // The diacritics in the text or in the query can be composed or not. + // So we use a decomposed text using NFD (and the same for the query) + // in order to be sure that diacritics are in the same order. + if (!normalizationRegex) { // Compile the regular expression for text normalization once. const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join(""); - normalizationRegex = new RegExp(`[${replace}]`, "g"); + normalizationRegex = new RegExp( + `([${replace}])|(-\\n)|(\\n)|(\\p{Mn}+)`, + "gum" + ); } - let diffs = null; - const normalizedText = text.replace(normalizationRegex, function (ch, index) { - const normalizedCh = CHARACTERS_TO_NORMALIZE[ch], - diff = normalizedCh.length - ch.length; - if (diff !== 0) { - (diffs ||= []).push([index, diff]); + + // The goal of this function is to normalize the string and + // be able to get from an index in the new string the + // corresponding index in the old string. + // For example if we have: abCd12ef456gh where C is replaced by ccc + // and numbers replaced by nothing (it's the case for diacritics), then + // we'll obtain the normalized string: abcccdefgh. + // So here the reverse map is: [0,1,2,2,2,3,6,7,11,12]. + + // The goal is to obtain the array: [[0, 0], [3, -1], [4, -2], + // [6, 0], [8, 3]]. + // which can be used like this: + // - let say that i is the index in new string and j the index + // the old string. + // - if i is in [0; 3[ then j = i + 0 + // - if i is in [3; 4[ then j = i - 1 + // - if i is in [4; 6[ then j = i - 2 + // ... + // Thanks to a binary search it's easy to know where is i and what's the + // shift. + // Let say that the last entry in the array is [x, s] and we have a + // substitution at index y (old string) which will replace o chars by n chars. + // Firstly, if o === n, then no need to add a new entry: the shift is + // the same. + // Secondly, if o < n, then we push the n - o elements: + // [y - (s - 1), s - 1], [y - (s - 2), s - 2], ... + // Thirdly, if o > n, then we push the element: [y - (s - n), o + s - n] + + // Collect diacritics length and positions. + const rawDiacriticsPositions = []; + let m; + while ((m = diacriticsRegExp.exec(text)) !== null) { + rawDiacriticsPositions.push([m[0].length, m.index]); + } + + let normalized = text.normalize("NFD"); + const positions = [[0, 0]]; + let k = 0; + let shift = 0; + let shiftOrigin = 0; + let eol = 0; + normalized = normalized.replace( + normalizationRegex, + (match, p1, p2, p3, p4, i) => { + i -= shiftOrigin; + if (p1) { + // Fractions... + const replacement = CHARACTERS_TO_NORMALIZE[match]; + const jj = replacement.length; + for (let j = 1; j < jj; j++) { + positions.push([i - shift + j, shift - j]); + } + shift -= jj - 1; + return replacement; + } + + if (p2) { + // -\n is removed because a - at eol is because of an hyphenation. + positions.push([i - shift, 1 + shift]); + shift += 1; + shiftOrigin += 1; + eol += 1; + return ""; + } + + if (p3) { + // eol is replaced by space: "foo\nbar" is likely equivalent to + // "foo bar". + positions.push([i - shift + 1, shift - 1]); + shift -= 1; + shiftOrigin += 1; + eol += 1; + return " "; + } + + // Diacritics. + let jj = match.length; + if (i + eol === rawDiacriticsPositions?.[k]?.[1]) { + jj -= rawDiacriticsPositions[k][0]; + ++k; + } + + for (let j = 1; j < jj + 1; j++) { + // i is the position of the first diacritic + // so (i - 1) is the position for the letter before. + positions.push([i - 1 - shift + j, shift - j]); + } + shift -= jj; + shiftOrigin += jj; + + return match; } - return normalizedCh; - }); + ); + + positions.push([normalized.length, shift]); - return [normalizedText, diffs]; + return [normalized, positions]; } -// Determine the original, non-normalized, match index such that highlighting of -// search results is correct in the `textLayer` for strings containing e.g. "½" -// characters; essentially "inverting" the result of the `normalize` function. -function getOriginalIndex(matchIndex, diffs = null) { - if (!diffs) { - return matchIndex; +function getOriginalIndex(positions, pos, len) { + const start = pos; + const end = pos + len - 1; + let i = binarySearchFirstItem(positions, x => x[0] >= start); + if (positions[i][0] > start) { + --i; } - let totalDiff = 0; - for (const [index, diff] of diffs) { - const currentIndex = index + totalDiff; - if (currentIndex >= matchIndex) { - break; - } - if (currentIndex + diff > matchIndex) { - totalDiff += matchIndex - currentIndex; - break; - } - totalDiff += diff; + let j = binarySearchFirstItem(positions, x => x[0] >= end, i); + if (positions[j][0] > end) { + --j; } - return matchIndex - totalDiff; + + return [start + positions[i][1], len + positions[j][1] - positions[i][1]]; } /** @@ -152,6 +240,7 @@ class PDFFindController { if (this._state === null || this._shouldDirtyMatch(cmd, state)) { this._dirtyMatch = true; } + this._state = state; if (cmd !== "findhighlightallchange") { this._updateUIState(FindState.PENDING); @@ -166,6 +255,7 @@ class PDFFindController { ) { return; } + this._extractText(); const findbarClosed = !this._highlightMatches; @@ -358,136 +448,67 @@ class PDFFindController { } } - /** - * Determine if the search query constitutes a "whole word", by comparing the - * first/last character type with the preceding/following character type. - */ - _isEntireWord(content, startIdx, length) { - if (startIdx > 0) { - const first = content.charCodeAt(startIdx); - const limit = content.charCodeAt(startIdx - 1); - if (getCharacterType(first) === getCharacterType(limit)) { - return false; - } - } - const endIdx = startIdx + length - 1; - if (endIdx < content.length - 1) { - const last = content.charCodeAt(endIdx); - const limit = content.charCodeAt(endIdx + 1); - if (getCharacterType(last) === getCharacterType(limit)) { - return false; - } - } - return true; - } - - _calculatePhraseMatch(query, pageIndex, pageContent, pageDiffs, entireWord) { + _calculateRegExMatch(query, pageIndex, pageContent) { const matches = [], matchesLength = []; - const queryLen = query.length; - - let matchIdx = -queryLen; - while (true) { - matchIdx = pageContent.indexOf(query, matchIdx + queryLen); - if (matchIdx === -1) { - break; - } - if (entireWord && !this._isEntireWord(pageContent, matchIdx, queryLen)) { - continue; - } - const originalMatchIdx = getOriginalIndex(matchIdx, pageDiffs), - matchEnd = matchIdx + queryLen - 1, - originalQueryLen = - getOriginalIndex(matchEnd, pageDiffs) - originalMatchIdx + 1; - matches.push(originalMatchIdx); - matchesLength.push(originalQueryLen); + const diffs = this._pageDiffs[pageIndex]; + let match; + while ((match = query.exec(pageContent)) !== null) { + const [matchPos, matchLen] = getOriginalIndex( + diffs, + match.index, + match[0].length + ); + matches.push(matchPos); + matchesLength.push(matchLen); } this._pageMatches[pageIndex] = matches; this._pageMatchesLength[pageIndex] = matchesLength; } - _calculateWordMatch(query, pageIndex, pageContent, pageDiffs, entireWord) { - const matchesWithLength = []; + _convertToRegExpString(query) { + const { entireWord, matchDiacritics } = this._state; - // Divide the query into pieces and search for text in each piece. - const queryArray = query.match(/\S+/g); - for (let i = 0, len = queryArray.length; i < len; i++) { - const subquery = queryArray[i]; - const subqueryLen = subquery.length; + query = query.replace(escapeRegExp, "\\$&"); - let matchIdx = -subqueryLen; - while (true) { - matchIdx = pageContent.indexOf(subquery, matchIdx + subqueryLen); - if (matchIdx === -1) { - break; - } - if ( - entireWord && - !this._isEntireWord(pageContent, matchIdx, subqueryLen) - ) { - continue; - } - const originalMatchIdx = getOriginalIndex(matchIdx, pageDiffs), - matchEnd = matchIdx + subqueryLen - 1, - originalQueryLen = - getOriginalIndex(matchEnd, pageDiffs) - originalMatchIdx + 1; - - // Other searches do not, so we store the length. - matchesWithLength.push({ - match: originalMatchIdx, - matchLength: originalQueryLen, - skipped: false, - }); - } + if (matchDiacritics) { + // aX musn't match aXY + query = `${query}(?=[^\\p{Mn}])`; + } else { + query = query.replace(/\p{Mn}/gu, ""); + query = query.replace(/\p{L}/gu, "$&\\p{Mn}*"); } - // Prepare arrays for storing the matches. - this._pageMatchesLength[pageIndex] = []; - this._pageMatches[pageIndex] = []; + if (entireWord) { + query = `\\b${query}\\b`; + } - // Sort `matchesWithLength`, remove intersecting terms and put the result - // into the two arrays. - this._prepareMatches( - matchesWithLength, - this._pageMatches[pageIndex], - this._pageMatchesLength[pageIndex] - ); + return query; } _calculateMatch(pageIndex) { - let pageContent = this._pageContents[pageIndex]; - const pageDiffs = this._pageDiffs[pageIndex]; + const pageContent = this._pageContents[pageIndex]; let query = this._query; - const { caseSensitive, entireWord, phraseSearch } = this._state; + const { caseSensitive, phraseSearch } = this._state; if (query.length === 0) { // Do nothing: the matches should be wiped out already. return; } - if (!caseSensitive) { - pageContent = pageContent.toLowerCase(); - query = query.toLowerCase(); - } - + const flags = caseSensitive ? "gu" : "gui"; if (phraseSearch) { - this._calculatePhraseMatch( - query, - pageIndex, - pageContent, - pageDiffs, - entireWord - ); + query = this._convertToRegExpString(query); } else { - this._calculateWordMatch( - query, - pageIndex, - pageContent, - pageDiffs, - entireWord - ); + query = query + .match(/\S+/g) + .map(q => `(${this._convertToRegExpString(q)})`) + .join("|"); } + query = new RegExp(query, flags); + + this._calculateRegExMatch(query, pageIndex, pageContent); // When `highlightAll` is set, ensure that the matches on previously // rendered (and still active) pages are correctly highlighted. @@ -533,12 +554,14 @@ class PDFFindController { for (let j = 0, jj = textItems.length; j < jj; j++) { strBuf.push(textItems[j].str); + if (textItems[j].hasEOL) { + strBuf.push("\n"); + } } // Store the normalized page content (text items) as one string. - [this._pageContents[i], this._pageDiffs[i]] = normalize( - strBuf.join("") - ); + const query = strBuf.join(""); + [this._pageContents[i], this._pageDiffs[i]] = normalize(query); extractTextCapability.resolve(i); }, reason => { diff --git a/web/pdf_find_utils.js b/web/pdf_find_utils.js deleted file mode 100644 index 24ec4c57502956..00000000000000 --- a/web/pdf_find_utils.js +++ /dev/null @@ -1,115 +0,0 @@ -/* Copyright 2018 Mozilla Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -const CharacterType = { - SPACE: 0, - ALPHA_LETTER: 1, - PUNCT: 2, - HAN_LETTER: 3, - KATAKANA_LETTER: 4, - HIRAGANA_LETTER: 5, - HALFWIDTH_KATAKANA_LETTER: 6, - THAI_LETTER: 7, -}; - -function isAlphabeticalScript(charCode) { - return charCode < 0x2e80; -} - -function isAscii(charCode) { - return (charCode & 0xff80) === 0; -} - -function isAsciiAlpha(charCode) { - return ( - (charCode >= /* a = */ 0x61 && charCode <= /* z = */ 0x7a) || - (charCode >= /* A = */ 0x41 && charCode <= /* Z = */ 0x5a) - ); -} - -function isAsciiDigit(charCode) { - return charCode >= /* 0 = */ 0x30 && charCode <= /* 9 = */ 0x39; -} - -function isAsciiSpace(charCode) { - return ( - charCode === /* SPACE = */ 0x20 || - charCode === /* TAB = */ 0x09 || - charCode === /* CR = */ 0x0d || - charCode === /* LF = */ 0x0a - ); -} - -function isHan(charCode) { - return ( - (charCode >= 0x3400 && charCode <= 0x9fff) || - (charCode >= 0xf900 && charCode <= 0xfaff) - ); -} - -function isKatakana(charCode) { - return charCode >= 0x30a0 && charCode <= 0x30ff; -} - -function isHiragana(charCode) { - return charCode >= 0x3040 && charCode <= 0x309f; -} - -function isHalfwidthKatakana(charCode) { - return charCode >= 0xff60 && charCode <= 0xff9f; -} - -function isThai(charCode) { - return (charCode & 0xff80) === 0x0e00; -} - -/** - * This function is based on the word-break detection implemented in: - * https://hg.mozilla.org/mozilla-central/file/tip/intl/lwbrk/WordBreaker.cpp - */ -function getCharacterType(charCode) { - if (isAlphabeticalScript(charCode)) { - if (isAscii(charCode)) { - if (isAsciiSpace(charCode)) { - return CharacterType.SPACE; - } else if ( - isAsciiAlpha(charCode) || - isAsciiDigit(charCode) || - charCode === /* UNDERSCORE = */ 0x5f - ) { - return CharacterType.ALPHA_LETTER; - } - return CharacterType.PUNCT; - } else if (isThai(charCode)) { - return CharacterType.THAI_LETTER; - } else if (charCode === /* NBSP = */ 0xa0) { - return CharacterType.SPACE; - } - return CharacterType.ALPHA_LETTER; - } - - if (isHan(charCode)) { - return CharacterType.HAN_LETTER; - } else if (isKatakana(charCode)) { - return CharacterType.KATAKANA_LETTER; - } else if (isHiragana(charCode)) { - return CharacterType.HIRAGANA_LETTER; - } else if (isHalfwidthKatakana(charCode)) { - return CharacterType.HALFWIDTH_KATAKANA_LETTER; - } - return CharacterType.ALPHA_LETTER; -} - -export { CharacterType, getCharacterType }; diff --git a/web/ui_utils.js b/web/ui_utils.js index d9af9235b76461..dd26ddbba572c4 100644 --- a/web/ui_utils.js +++ b/web/ui_utils.js @@ -204,8 +204,8 @@ function parseQueryString(query) { * @returns {number} Index of the first array element to pass the test, * or |items.length| if no such element exists. */ -function binarySearchFirstItem(items, condition) { - let minIndex = 0; +function binarySearchFirstItem(items, condition, start = 0) { + let minIndex = start; let maxIndex = items.length - 1; if (maxIndex < 0 || !condition(items[maxIndex])) { diff --git a/web/viewer.html b/web/viewer.html index c390ba62e88693..57f4e1021caf97 100644 --- a/web/viewer.html +++ b/web/viewer.html @@ -138,8 +138,13 @@
- + + + +
+ +
diff --git a/web/viewer.js b/web/viewer.js index 8c843566010575..fdf6ecbfb249b7 100644 --- a/web/viewer.js +++ b/web/viewer.js @@ -155,6 +155,7 @@ function getViewerConfiguration() { highlightAllCheckbox: document.getElementById("findHighlightAll"), caseSensitiveCheckbox: document.getElementById("findMatchCase"), entireWordCheckbox: document.getElementById("findEntireWord"), + matchDiacriticsCheckbox: document.getElementById("findMatchDiacritics"), findMsg: document.getElementById("findMsg"), findResultsCount: document.getElementById("findResultsCount"), findPreviousButton: document.getElementById("findPrevious"),