From d0c1b853e05ee84c70fb0efcf6681e885a6c14da Mon Sep 17 00:00:00 2001 From: PrimosK <primoz.kokol@gmail.com> Date: Tue, 7 Apr 2020 12:32:42 +0200 Subject: [PATCH] Fixes misaligned highlights. Explanation: The current solution was build on the assumption that {{{textPositions}}} array will always be of the same length as snippet text but that's not the case. This fix takes the different length of these two into account. --- .../com/dlsc/gemsfx/skins/PDFViewSkin.java | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/gemsfx/src/main/java/com/dlsc/gemsfx/skins/PDFViewSkin.java b/gemsfx/src/main/java/com/dlsc/gemsfx/skins/PDFViewSkin.java index 43b0275b..b4d1a579 100644 --- a/gemsfx/src/main/java/com/dlsc/gemsfx/skins/PDFViewSkin.java +++ b/gemsfx/src/main/java/com/dlsc/gemsfx/skins/PDFViewSkin.java @@ -1002,9 +1002,6 @@ private void highlightSearchResults(int pageNumber, float scale, BufferedImage b final List<SearchResult> searchResults = getSkinnable().getSearchResults().stream().filter(result -> result.getPageNumber() == pageNumber).collect(Collectors.toList()); if (!searchResults.isEmpty()) { - final PDDocument document = getSkinnable().getDocument(); - final PDPage page = document.getPage(pageNumber); - final Graphics2D graphics = (Graphics2D) bufferedImage.getGraphics(); graphics.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER, .5f)); @@ -1015,16 +1012,17 @@ private void highlightSearchResults(int pageNumber, float scale, BufferedImage b searchResults.forEach(result -> { final String searchText = result.getSearchText(); + final String snippetText = result.getText(); final List<TextPosition> textPositions = result.getTextPositions(); - final int startIndex = result.getText().toLowerCase().indexOf(searchText.toLowerCase()); + final int textPositionStartIndex = calculateTextPositionStartIndex(searchText, snippetText, textPositions); float x1 = Float.MAX_VALUE; float x2 = 0; float y1 = Float.MAX_VALUE; float y2 = 0; - for (int i = startIndex; i < startIndex + searchText.length(); i++) { - TextPosition position = textPositions.get(i); + for (int textPositionIndex = textPositionStartIndex; textPositionIndex < textPositionStartIndex + searchText.length(); textPositionIndex++) { + TextPosition position = textPositions.get(textPositionIndex); x1 = Math.min(x1, position.getXDirAdj() * scale); x2 = Math.max(x2, (position.getXDirAdj() + position.getWidth()) * scale); @@ -1043,6 +1041,29 @@ private void highlightSearchResults(int pageNumber, float scale, BufferedImage b }); } } + + /** + * Note that number of textPositions might not be equal to the length of the snippetText. + * so we need to account for that. + * + * See: org.apache.pdfbox.text.PDFTextStripper.WordWithTextPositions + */ + private int calculateTextPositionStartIndex(String searchText, String snippetText, List<TextPosition> textPositions) { + + final int snippetTextStartIndex = snippetText.toLowerCase().indexOf(searchText.toLowerCase()); + + int startIndexDecreaseDelta = 0; + + // If any TextPosition (up to the snippetTextStartIndex) contains more then one character, we have to account for that. + for (int i=0; i < snippetTextStartIndex; i++) { + int numberOfCharactersInTextPosition = textPositions.get(i).getUnicode().length(); + if (numberOfCharactersInTextPosition > 1) { + startIndexDecreaseDelta = startIndexDecreaseDelta + (numberOfCharactersInTextPosition - 1); + } + } + + return snippetTextStartIndex - startIndexDecreaseDelta; + } } private void updatePagesList() {