Skip to content

Commit

Permalink
[api-minor] Highlight search results correctly for normalized text (P…
Browse files Browse the repository at this point in the history
…R 9448)

This patch is a rebased *and* refactored version of PR 9448, such that it applies cleanly given that `PDFFindController` has changed since that PR was opened; obviously keeping the original author information intact.

This patch will thus ensure that e.g. fractions, and other things that we normalize before searching, will still be highlighted correctly in the textLayer.

Furthermore, this patch also adds basic unit-tests for this functionality.

*Note:* The `[api-minor]` tag is added, since third-party implementations of the `PDFFindController` must now always use the `pageMatchesLength` property to get accurate length information (see the `web/text_layer_builder.js` changes).

Co-authored-by: Ross Johnson <[email protected]>
Co-authored-by: Jonas Jenwald <[email protected]>
  • Loading branch information
rossj and Snuffleupagus committed Jan 12, 2021
1 parent 1de1ae0 commit 5e798ae
Show file tree
Hide file tree
Showing 6 changed files with 253 additions and 105 deletions.
1 change: 1 addition & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
!TrueType_without_cmap.pdf
!franz.pdf
!franz_2.pdf
!fraction-highlight.pdf
!german-umlaut-r.pdf
!xref_command_missing.pdf
!issue1155r.pdf
Expand Down
Binary file added test/pdfs/fraction-highlight.pdf
Binary file not shown.
3 changes: 3 additions & 0 deletions test/unit/clitests_helper.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ import { isNodeJS } from "../../src/shared/is_node.js";
import { PDFNodeStream } from "../../src/display/node_stream.js";
import { setPDFNetworkStreamFactory } from "../../src/display/api.js";

// Sets longer timeout, similar to `jasmine-boot.js`.
jasmine.DEFAULT_TIMEOUT_INTERVAL = 30000;

// Ensure that this script only runs in Node.js environments.
if (!isNodeJS) {
throw new Error(
Expand Down
258 changes: 174 additions & 84 deletions test/unit/pdf_find_controller_spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import { getDocument } from "../../src/display/api.js";
import { PDFFindController } from "../../web/pdf_find_controller.js";
import { SimpleLinkService } from "../../web/pdf_link_service.js";

const tracemonkeyFileName = "tracemonkey.pdf";

class MockLinkService extends SimpleLinkService {
constructor() {
super();
Expand All @@ -44,89 +46,106 @@ class MockLinkService extends SimpleLinkService {
}
}

describe("pdf_find_controller", function () {
let eventBus;
let pdfFindController;

beforeEach(function (done) {
const loadingTask = getDocument(buildGetDocumentParams("tracemonkey.pdf"));
loadingTask.promise.then(function (pdfDocument) {
eventBus = new EventBus();
async function initializePdfFindController(filename) {
const loadingTask = getDocument(
buildGetDocumentParams(filename || tracemonkeyFileName)
);
const pdfDocument = await loadingTask.promise;

const linkService = new MockLinkService();
linkService.setDocument(pdfDocument);
const eventBus = new EventBus();

pdfFindController = new PDFFindController({
linkService,
eventBus,
});
pdfFindController.setDocument(pdfDocument); // Enable searching.
const linkService = new MockLinkService();
linkService.setDocument(pdfDocument);

done();
});
const pdfFindController = new PDFFindController({
linkService,
eventBus,
});
pdfFindController.setDocument(pdfDocument); // Enable searching.

afterEach(function () {
eventBus = null;
pdfFindController = null;
});
return { eventBus, pdfFindController, loadingTask };
}

function testSearch({ parameters, matchesPerPage, selectedMatch }) {
return new Promise(function (resolve) {
pdfFindController.executeCommand("find", parameters);

// The `updatefindmatchescount` event is only emitted if the page contains
// at least one match for the query, so the last non-zero item in the
// matches per page array corresponds to the page for which the final
// `updatefindmatchescount` event is emitted. If this happens, we know
// that any subsequent pages won't trigger the event anymore and we
// can start comparing the matches per page. This logic is necessary
// because we call the `pdfFindController.pageMatches` getter directly
// after receiving the event and the underlying `_pageMatches` array
// is only extended when a page is processed, so it will only contain
// entries for the pages processed until the time when the final event
// was emitted.
let totalPages = matchesPerPage.length;
for (let i = totalPages - 1; i >= 0; i--) {
if (matchesPerPage[i] > 0) {
totalPages = i + 1;
break;
}
function testSearch({
eventBus,
pdfFindController,
parameters,
matchesPerPage,
selectedMatch,
pageMatches = null,
pageMatchesLength = null,
}) {
return new Promise(function (resolve) {
pdfFindController.executeCommand("find", parameters);

// The `updatefindmatchescount` event is only emitted if the page contains
// at least one match for the query, so the last non-zero item in the
// matches per page array corresponds to the page for which the final
// `updatefindmatchescount` event is emitted. If this happens, we know
// that any subsequent pages won't trigger the event anymore and we
// can start comparing the matches per page. This logic is necessary
// because we call the `pdfFindController.pageMatches` getter directly
// after receiving the event and the underlying `_pageMatches` array
// is only extended when a page is processed, so it will only contain
// entries for the pages processed until the time when the final event
// was emitted.
let totalPages = matchesPerPage.length;
for (let i = totalPages - 1; i >= 0; i--) {
if (matchesPerPage[i] > 0) {
totalPages = i + 1;
break;
}
}

const totalMatches = matchesPerPage.reduce((a, b) => {
return a + b;
});

eventBus.on(
"updatefindmatchescount",
function onUpdateFindMatchesCount(evt) {
if (pdfFindController.pageMatches.length !== totalPages) {
return;
}
eventBus.off("updatefindmatchescount", onUpdateFindMatchesCount);

expect(evt.matchesCount.total).toBe(totalMatches);
for (let i = 0; i < totalPages; i++) {
expect(pdfFindController.pageMatches[i].length).toEqual(
matchesPerPage[i]
);
}
expect(pdfFindController.selected.pageIdx).toEqual(
selectedMatch.pageIndex
);
expect(pdfFindController.selected.matchIdx).toEqual(
selectedMatch.matchIndex
const totalMatches = matchesPerPage.reduce((a, b) => {
return a + b;
});

eventBus.on(
"updatefindmatchescount",
function onUpdateFindMatchesCount(evt) {
if (pdfFindController.pageMatches.length !== totalPages) {
return;
}
eventBus.off("updatefindmatchescount", onUpdateFindMatchesCount);

expect(evt.matchesCount.total).toBe(totalMatches);
for (let i = 0; i < totalPages; i++) {
expect(pdfFindController.pageMatches[i].length).toEqual(
matchesPerPage[i]
);
}
expect(pdfFindController.selected.pageIdx).toEqual(
selectedMatch.pageIndex
);
expect(pdfFindController.selected.matchIdx).toEqual(
selectedMatch.matchIndex
);

resolve();
if (pageMatches) {
expect(pdfFindController.pageMatches).toEqual(pageMatches);
expect(pdfFindController.pageMatchesLength).toEqual(
pageMatchesLength
);
}
);
});
}

it("performs a normal search", function (done) {
testSearch({
resolve();
}
);
});
}

describe("pdf_find_controller", function () {
it("performs a normal search", async function () {
const {
eventBus,
pdfFindController,
loadingTask,
} = await initializePdfFindController();

await testSearch({
eventBus,
pdfFindController,
parameters: {
query: "Dynamic",
caseSensitive: false,
Expand All @@ -139,14 +158,24 @@ describe("pdf_find_controller", function () {
pageIndex: 0,
matchIndex: 0,
},
}).then(done);
});

return loadingTask.destroy();
});

it("performs a normal search and finds the previous result", function (done) {
it("performs a normal search and finds the previous result", async function () {
// Page 14 (with page index 13) contains five results. By default, the
// first result (match index 0) is selected, so the previous result
// should be the fifth result (match index 4).
testSearch({
const {
eventBus,
pdfFindController,
loadingTask,
} = await initializePdfFindController();

await testSearch({
eventBus,
pdfFindController,
parameters: {
query: "conference",
caseSensitive: false,
Expand All @@ -159,11 +188,21 @@ describe("pdf_find_controller", function () {
pageIndex: 13,
matchIndex: 4,
},
}).then(done);
});

return loadingTask.destroy();
});

it("performs a case sensitive search", function (done) {
testSearch({
it("performs a case sensitive search", async function () {
const {
eventBus,
pdfFindController,
loadingTask,
} = await initializePdfFindController();

await testSearch({
eventBus,
pdfFindController,
parameters: {
query: "Dynamic",
caseSensitive: true,
Expand All @@ -176,13 +215,23 @@ describe("pdf_find_controller", function () {
pageIndex: 0,
matchIndex: 0,
},
}).then(done);
});

return loadingTask.destroy();
});

it("performs an entire word search", function (done) {
it("performs an entire word search", async function () {
// Page 13 contains both 'Government' and 'Governmental', so the latter
// should not be found with entire word search.
testSearch({
const {
eventBus,
pdfFindController,
loadingTask,
} = await initializePdfFindController();

await testSearch({
eventBus,
pdfFindController,
parameters: {
query: "Government",
caseSensitive: false,
Expand All @@ -195,13 +244,23 @@ describe("pdf_find_controller", function () {
pageIndex: 12,
matchIndex: 0,
},
}).then(done);
});

return loadingTask.destroy();
});

it("performs a multiple term (no phrase) search", function (done) {
it("performs a multiple term (no phrase) search", async function () {
// Page 9 contains 'alternate' and pages 6 and 9 contain 'solution'.
// Both should be found for multiple term (no phrase) search.
testSearch({
const {
eventBus,
pdfFindController,
loadingTask,
} = await initializePdfFindController();

await testSearch({
eventBus,
pdfFindController,
parameters: {
query: "alternate solution",
caseSensitive: false,
Expand All @@ -214,6 +273,37 @@ describe("pdf_find_controller", function () {
pageIndex: 5,
matchIndex: 0,
},
}).then(done);
});

return loadingTask.destroy();
});

it("performs a normal search, where the text is normalized", async function () {
const {
eventBus,
pdfFindController,
loadingTask,
} = await initializePdfFindController("fraction-highlight.pdf");

await testSearch({
eventBus,
pdfFindController,
parameters: {
query: "fraction",
caseSensitive: false,
entireWord: false,
phraseSearch: true,
findPrevious: false,
},
matchesPerPage: [3],
selectedMatch: {
pageIndex: 0,
matchIndex: 0,
},
pageMatches: [[19, 48, 66]],
pageMatchesLength: [[8, 8, 8]],
});

return loadingTask.destroy();
});
});
Loading

0 comments on commit 5e798ae

Please sign in to comment.