Skip to content

Commit

Permalink
Support search with or without diacritics
Browse files Browse the repository at this point in the history
  - get original index in using a dichotomic seach instead of a linear one;
  - remove diacritics from text using NFD decomposition and unicode regex;
  - convert the query string into a RegExp;
  - replace whitespaces in the query with \s+;
  - remove pdf_find_utils.js.
  • Loading branch information
calixteman committed May 13, 2021
1 parent e394da5 commit c22368f
Show file tree
Hide file tree
Showing 11 changed files with 333 additions and 173 deletions.
1 change: 1 addition & 0 deletions l10n/en-US/viewer.properties
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ find_next.title=Find the next occurrence of the phrase
find_next_label=Next
find_highlight=Highlight all
find_match_case_label=Match case
find_match_diacritics_label=Match Diacritics
find_entire_word_label=Whole words
find_reached_top=Reached top of document, continued from bottom
find_reached_bottom=Reached end of document, continued from top
Expand Down
1 change: 1 addition & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@
!issue4650.pdf
!issue6721_reduced.pdf
!issue3025.pdf
!french_diacritics.pdf
!issue2099-1.pdf
!issue3371.pdf
!issue2956.pdf
Expand Down
Binary file added test/pdfs/french_diacritics.pdf
Binary file not shown.
125 changes: 125 additions & 0 deletions test/unit/pdf_find_controller_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -271,5 +271,130 @@ describe("pdf_find_controller", function () {
pageMatches: [[19, 48, 66]],
pageMatchesLength: [[8, 8, 8]],
});

await testSearch({
eventBus,
pdfFindController,
parameters: {
query: "1/2",
caseSensitive: false,
entireWord: false,
phraseSearch: true,
findPrevious: false,
},
matchesPerPage: [2],
selectedMatch: {
pageIndex: 0,
matchIndex: 0,
},
pageMatches: [[28, 57]],
pageMatchesLength: [[1, 1]],
});

await testSearch({
eventBus,
pdfFindController,
parameters: {
query: "½",
caseSensitive: false,
entireWord: false,
phraseSearch: true,
findPrevious: false,
},
matchesPerPage: [2],
selectedMatch: {
pageIndex: 0,
matchIndex: 0,
},
pageMatches: [[28, 57]],
pageMatchesLength: [[1, 1]],
});
});

it("performs a normal search, where the text with diacritics is normalized", async function () {
const { eventBus, pdfFindController } = await initPdfFindController(
"french_diacritics.pdf"
);

await testSearch({
eventBus,
pdfFindController,
parameters: {
query: "a",
caseSensitive: false,
entireWord: false,
phraseSearch: true,
findPrevious: false,
matchDiacritics: false,
},
matchesPerPage: [6],
selectedMatch: {
pageIndex: 0,
matchIndex: 0,
},
pageMatches: [[0, 2, 4, 6, 8, 10]],
pageMatchesLength: [[1, 1, 1, 1, 1, 1]],
});

await testSearch({
eventBus,
pdfFindController,
parameters: {
query: "u",
caseSensitive: false,
entireWord: false,
phraseSearch: true,
findPrevious: false,
matchDiacritics: false,
},
matchesPerPage: [6],
selectedMatch: {
pageIndex: 0,
matchIndex: 0,
},
pageMatches: [[44, 46, 48, 50, 52, 54]],
pageMatchesLength: [[1, 1, 1, 1, 1, 1]],
});

await testSearch({
eventBus,
pdfFindController,
parameters: {
query: "ë",
caseSensitive: false,
entireWord: false,
phraseSearch: true,
findPrevious: false,
matchDiacritics: true,
},
matchesPerPage: [2],
selectedMatch: {
pageIndex: 0,
matchIndex: 0,
},
pageMatches: [[28, 30]],
pageMatchesLength: [[1, 1]],
});
});

it("performs a search where one of the results contains an hyphen", async function () {
const { eventBus, pdfFindController } = await initPdfFindController();

await testSearch({
eventBus,
pdfFindController,
parameters: {
query: "optimiz",
caseSensitive: false,
entireWord: false,
phraseSearch: true,
findPrevious: false,
},
matchesPerPage: [1, 4, 2, 3, 3, 0, 2, 9, 1, 0, 0, 6, 3, 4],
selectedMatch: {
pageIndex: 0,
matchIndex: 0,
},
});
});
});
3 changes: 3 additions & 0 deletions web/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -2603,6 +2603,7 @@ function webViewerFind(evt) {
entireWord: evt.entireWord,
highlightAll: evt.highlightAll,
findPrevious: evt.findPrevious,
matchDiacritics: evt.matchDiacritics,
});
}

Expand All @@ -2614,6 +2615,7 @@ function webViewerFindFromUrlHash(evt) {
entireWord: false,
highlightAll: true,
findPrevious: false,
matchDiacritics: true,
});
}

Expand Down Expand Up @@ -2820,6 +2822,7 @@ function webViewerKeyDown(evt) {
entireWord: findState.entireWord,
highlightAll: findState.highlightAll,
findPrevious: cmd === 5 || cmd === 12,
matchDiacritics: findState.matchDiacritics,
});
}
handled = true;
Expand Down
2 changes: 2 additions & 0 deletions web/firefoxcom.js
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ class MozL10n {
"findcasesensitivitychange",
"findentirewordchange",
"findbarclose",
"finddiacriticmatchingchange",
];
const handleEvent = function ({ type, detail }) {
if (!PDFViewerApplication.initialized) {
Expand All @@ -236,6 +237,7 @@ class MozL10n {
entireWord: !!detail.entireWord,
highlightAll: !!detail.highlightAll,
findPrevious: !!detail.findPrevious,
matchDiacritics: !!detail.matchDiacritics,
});
};

Expand Down
6 changes: 6 additions & 0 deletions web/pdf_find_bar.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class PDFFindBar {
this.highlightAll = options.highlightAllCheckbox;
this.caseSensitive = options.caseSensitiveCheckbox;
this.entireWord = options.entireWordCheckbox;
this.matchDiacritics = options.matchDiacriticsCheckbox;
this.findMsg = options.findMsg;
this.findResultsCount = options.findResultsCount;
this.findPreviousButton = options.findPreviousButton;
Expand Down Expand Up @@ -82,6 +83,10 @@ class PDFFindBar {
this.dispatchEvent("entirewordchange");
});

this.matchDiacritics.addEventListener("click", () => {
this.dispatchEvent("diacriticmatchingchange");
});

this.eventBus._on("resize", this._adjustWidth.bind(this));
}

Expand All @@ -99,6 +104,7 @@ class PDFFindBar {
entireWord: this.entireWord.checked,
highlightAll: this.highlightAll.checked,
findPrevious: findPrev,
matchDiacritics: this.matchDiacritics.checked,
});
}

Expand Down
Loading

0 comments on commit c22368f

Please sign in to comment.