From 2f6852088012703b2f3a3a0d3e19e88bfe82ae3a Mon Sep 17 00:00:00 2001 From: Dawid Date: Wed, 12 Feb 2020 00:00:21 +0100 Subject: [PATCH 1/5] Fix not escaping special characters in search pattern fixes #5892 * add method to get search pattern for searched words with escaped javascript regexp special characters (for search without regular expressions) * in preview viewer use search pattern with escaped javascript regexp special characters --- .../org/jabref/gui/preview/PreviewViewer.java | 4 ++-- .../org/jabref/logic/search/SearchQuery.java | 17 ++++++++++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/jabref/gui/preview/PreviewViewer.java b/src/main/java/org/jabref/gui/preview/PreviewViewer.java index 7f154c6e78d..190f973fa39 100644 --- a/src/main/java/org/jabref/gui/preview/PreviewViewer.java +++ b/src/main/java/org/jabref/gui/preview/PreviewViewer.java @@ -85,7 +85,7 @@ public class PreviewViewer extends ScrollPane implements InvalidationListener { private boolean registered; private ChangeListener> listener = (queryObservable, queryOldValue, queryNewValue) -> { - searchHighlightPattern = queryNewValue.flatMap(SearchQuery::getPatternForWords); + searchHighlightPattern = queryNewValue.flatMap(SearchQuery::getJsPatternForWords); highlightSearchPattern(); }; @@ -131,7 +131,7 @@ public void setTheme(String theme) { private void highlightSearchPattern() { if (searchHighlightPattern.isPresent()) { - String pattern = searchHighlightPattern.get().pattern().replace("\\Q", "").replace("\\E", ""); + String pattern = searchHighlightPattern.get().pattern(); previewView.getEngine().executeScript( "var markInstance = new Mark(document.getElementById(\"content\"));" + diff --git a/src/main/java/org/jabref/logic/search/SearchQuery.java b/src/main/java/org/jabref/logic/search/SearchQuery.java index e6e35486767..2070e1b70c8 100644 --- a/src/main/java/org/jabref/logic/search/SearchQuery.java +++ b/src/main/java/org/jabref/logic/search/SearchQuery.java @@ -18,6 +18,9 @@ public class SearchQuery implements SearchMatcher { + // regexp pattern for escaping special characters in javascript regex + public static final String JAVASCRIPT_ESCAPED_CHARS_PATTERN = "[\\.\\*\\+\\?\\^\\$\\{\\}\\(\\)\\|\\[\\]\\\\/]"; + private final String query; private final boolean caseSensitive; private final boolean regularExpression; @@ -124,6 +127,18 @@ public List getSearchWords() { // Returns a regular expression pattern in the form (w1)|(w2)| ... wi are escaped if no regular expression search is enabled public Optional getPatternForWords() { + return joinWordsToPattern(false); + } + + // Returns a regular expression pattern in the form (w1)|(w2)| ... wi are escaped for javascript if no regular expression search is enabled + public Optional getJsPatternForWords() { + return joinWordsToPattern(true); + } + + /* Returns a regular expression pattern in the form (w1)|(w2)| ... wi are escaped if no regular expression search is enabled + * @param escapeSpecialCharsForJS whether to escape characters in wi for javascript regexp (escaping all special characters) or for java (using \Q and \E) + */ + private Optional joinWordsToPattern(boolean escapeSpecialCharsForJS) { List words = getSearchWords(); if ((words == null) || words.isEmpty() || words.get(0).isEmpty()) { @@ -133,7 +148,7 @@ public Optional getPatternForWords() { // compile the words to a regular expression in the form (w1)|(w2)|(w3) StringJoiner joiner = new StringJoiner(")|(", "(", ")"); for (String word : words) { - joiner.add(regularExpression ? word : Pattern.quote(word)); + joiner.add(regularExpression ? word : (escapeSpecialCharsForJS ? word.replaceAll(JAVASCRIPT_ESCAPED_CHARS_PATTERN, "\\\\$0") : Pattern.quote(word))); } String searchPattern = joiner.toString(); From 5c2925dcac0f92c9e709d50673dfb3d50f05b4b3 Mon Sep 17 00:00:00 2001 From: Dawid Date: Wed, 12 Feb 2020 15:05:33 +0100 Subject: [PATCH 2/5] Refactoring and performance improvement * use enum to specify special characters escape mode * use compiled regex pattern instead of string --- .../org/jabref/logic/search/SearchQuery.java | 44 +++++++++++++++---- 1 file changed, 36 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/jabref/logic/search/SearchQuery.java b/src/main/java/org/jabref/logic/search/SearchQuery.java index 2070e1b70c8..e118a9c2a24 100644 --- a/src/main/java/org/jabref/logic/search/SearchQuery.java +++ b/src/main/java/org/jabref/logic/search/SearchQuery.java @@ -18,8 +18,24 @@ public class SearchQuery implements SearchMatcher { - // regexp pattern for escaping special characters in javascript regex - public static final String JAVASCRIPT_ESCAPED_CHARS_PATTERN = "[\\.\\*\\+\\?\\^\\$\\{\\}\\(\\)\\|\\[\\]\\\\/]"; + /** + * Regex pattern for escaping special characters in javascript regular expressions + */ + public static final Pattern JAVASCRIPT_ESCAPED_CHARS_PATTERN = Pattern.compile("[\\.\\*\\+\\?\\^\\$\\{\\}\\(\\)\\|\\[\\]\\\\/]"); + + /** + * Metod for escaping special characters in regular expressions + */ + private enum EscapeMode { + /** + * using \Q and \E marks + */ + JAVA, + /** + * escaping all javascript regex special characters separately + */ + JAVASCRIPT + } private final String query; private final boolean caseSensitive; @@ -127,18 +143,18 @@ public List getSearchWords() { // Returns a regular expression pattern in the form (w1)|(w2)| ... wi are escaped if no regular expression search is enabled public Optional getPatternForWords() { - return joinWordsToPattern(false); + return joinWordsToPattern(EscapeMode.JAVA); } // Returns a regular expression pattern in the form (w1)|(w2)| ... wi are escaped for javascript if no regular expression search is enabled public Optional getJsPatternForWords() { - return joinWordsToPattern(true); + return joinWordsToPattern(EscapeMode.JAVASCRIPT); } - /* Returns a regular expression pattern in the form (w1)|(w2)| ... wi are escaped if no regular expression search is enabled - * @param escapeSpecialCharsForJS whether to escape characters in wi for javascript regexp (escaping all special characters) or for java (using \Q and \E) + /** Returns a regular expression pattern in the form (w1)|(w2)| ... wi are escaped if no regular expression search is enabled + * @param escapeMode method for escaping special characters in wi */ - private Optional joinWordsToPattern(boolean escapeSpecialCharsForJS) { + private Optional joinWordsToPattern(EscapeMode escapeMode) { List words = getSearchWords(); if ((words == null) || words.isEmpty() || words.get(0).isEmpty()) { @@ -148,7 +164,19 @@ private Optional joinWordsToPattern(boolean escapeSpecialCharsForJS) { // compile the words to a regular expression in the form (w1)|(w2)|(w3) StringJoiner joiner = new StringJoiner(")|(", "(", ")"); for (String word : words) { - joiner.add(regularExpression ? word : (escapeSpecialCharsForJS ? word.replaceAll(JAVASCRIPT_ESCAPED_CHARS_PATTERN, "\\\\$0") : Pattern.quote(word))); + if (regularExpression) + joiner.add(word); + else + switch (escapeMode) { + case JAVA: + joiner.add(Pattern.quote(word)); + break; + case JAVASCRIPT: + joiner.add(JAVASCRIPT_ESCAPED_CHARS_PATTERN.matcher(word).replaceAll("\\\\$0")); + break; + default: + throw new IllegalArgumentException("Unknown special characters escape method: " + escapeMode); + } } String searchPattern = joiner.toString(); From 89c72c05dd881133ee6a667d9c78ec503566cf5a Mon Sep 17 00:00:00 2001 From: Dawid Date: Wed, 12 Feb 2020 15:26:39 +0100 Subject: [PATCH 3/5] Refactoring: braces in if..else --- src/main/java/org/jabref/logic/search/SearchQuery.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/jabref/logic/search/SearchQuery.java b/src/main/java/org/jabref/logic/search/SearchQuery.java index e118a9c2a24..2171a7329c3 100644 --- a/src/main/java/org/jabref/logic/search/SearchQuery.java +++ b/src/main/java/org/jabref/logic/search/SearchQuery.java @@ -164,9 +164,10 @@ private Optional joinWordsToPattern(EscapeMode escapeMode) { // compile the words to a regular expression in the form (w1)|(w2)|(w3) StringJoiner joiner = new StringJoiner(")|(", "(", ")"); for (String word : words) { - if (regularExpression) + if (regularExpression) { joiner.add(word); - else + } + else { switch (escapeMode) { case JAVA: joiner.add(Pattern.quote(word)); @@ -177,6 +178,7 @@ private Optional joinWordsToPattern(EscapeMode escapeMode) { default: throw new IllegalArgumentException("Unknown special characters escape method: " + escapeMode); } + } } String searchPattern = joiner.toString(); From 6f39cbc6b035d5d2c07ba176b95e2b54139fc109 Mon Sep 17 00:00:00 2001 From: Dawid Date: Wed, 12 Feb 2020 23:05:16 +0100 Subject: [PATCH 4/5] Refactoring, minor changes: names, comments --- .../java/org/jabref/gui/preview/PreviewViewer.java | 2 +- .../java/org/jabref/logic/search/SearchQuery.java | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/jabref/gui/preview/PreviewViewer.java b/src/main/java/org/jabref/gui/preview/PreviewViewer.java index 190f973fa39..6a0c42b0837 100644 --- a/src/main/java/org/jabref/gui/preview/PreviewViewer.java +++ b/src/main/java/org/jabref/gui/preview/PreviewViewer.java @@ -85,7 +85,7 @@ public class PreviewViewer extends ScrollPane implements InvalidationListener { private boolean registered; private ChangeListener> listener = (queryObservable, queryOldValue, queryNewValue) -> { - searchHighlightPattern = queryNewValue.flatMap(SearchQuery::getJsPatternForWords); + searchHighlightPattern = queryNewValue.flatMap(SearchQuery::getJavaScriptPatternForWords); highlightSearchPattern(); }; diff --git a/src/main/java/org/jabref/logic/search/SearchQuery.java b/src/main/java/org/jabref/logic/search/SearchQuery.java index 2171a7329c3..6d89beb3c4b 100644 --- a/src/main/java/org/jabref/logic/search/SearchQuery.java +++ b/src/main/java/org/jabref/logic/search/SearchQuery.java @@ -24,7 +24,7 @@ public class SearchQuery implements SearchMatcher { public static final Pattern JAVASCRIPT_ESCAPED_CHARS_PATTERN = Pattern.compile("[\\.\\*\\+\\?\\^\\$\\{\\}\\(\\)\\|\\[\\]\\\\/]"); /** - * Metod for escaping special characters in regular expressions + * The mode of escaping special characters in regular expressions */ private enum EscapeMode { /** @@ -147,12 +147,12 @@ public Optional getPatternForWords() { } // Returns a regular expression pattern in the form (w1)|(w2)| ... wi are escaped for javascript if no regular expression search is enabled - public Optional getJsPatternForWords() { + public Optional getJavaScriptPatternForWords() { return joinWordsToPattern(EscapeMode.JAVASCRIPT); } /** Returns a regular expression pattern in the form (w1)|(w2)| ... wi are escaped if no regular expression search is enabled - * @param escapeMode method for escaping special characters in wi + * @param escapeMode the mode of escaping special characters in wi */ private Optional joinWordsToPattern(EscapeMode escapeMode) { List words = getSearchWords(); @@ -166,8 +166,7 @@ private Optional joinWordsToPattern(EscapeMode escapeMode) { for (String word : words) { if (regularExpression) { joiner.add(word); - } - else { + } else { switch (escapeMode) { case JAVA: joiner.add(Pattern.quote(word)); @@ -176,7 +175,7 @@ private Optional joinWordsToPattern(EscapeMode escapeMode) { joiner.add(JAVASCRIPT_ESCAPED_CHARS_PATTERN.matcher(word).replaceAll("\\\\$0")); break; default: - throw new IllegalArgumentException("Unknown special characters escape method: " + escapeMode); + throw new IllegalArgumentException("Unknown special characters escape mode: " + escapeMode); } } } From 266bfc8029d7f37f41897e4f0d47c7fdcb33bbd5 Mon Sep 17 00:00:00 2001 From: Dawid Date: Fri, 14 Feb 2020 17:08:42 +0100 Subject: [PATCH 5/5] Add tests of escaping special characters in search patterns --- .../jabref/logic/search/SearchQueryTest.java | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/test/java/org/jabref/logic/search/SearchQueryTest.java b/src/test/java/org/jabref/logic/search/SearchQueryTest.java index d430eab0e3f..e8630135ed7 100644 --- a/src/test/java/org/jabref/logic/search/SearchQueryTest.java +++ b/src/test/java/org/jabref/logic/search/SearchQueryTest.java @@ -203,4 +203,38 @@ public void testGetPattern() { //We can't directly compare the pattern objects assertEquals(Optional.of(pattern.toString()), result.getPatternForWords().map(Pattern::toString)); } + + @Test + public void testGetRegexpPattern() { + String queryText = "[a-c]\\d* \\d*"; + SearchQuery regexQuery = new SearchQuery(queryText, false, true); + Pattern pattern = Pattern.compile("([a-c]\\d* \\d*)"); + assertEquals(Optional.of(pattern.toString()), regexQuery.getPatternForWords().map(Pattern::toString)); + } + + @Test + public void testGetRegexpJavascriptPattern() { + String queryText = "[a-c]\\d* \\d*"; + SearchQuery regexQuery = new SearchQuery(queryText, false, true); + Pattern pattern = Pattern.compile("([a-c]\\d* \\d*)"); + assertEquals(Optional.of(pattern.toString()), regexQuery.getJavaScriptPatternForWords().map(Pattern::toString)); + } + + @Test + public void testEscapingInPattern() { + //first word contain all java special regex characters + String queryText = "<([{\\\\^-=$!|]})?*+.> word1 word2."; + SearchQuery textQueryWithSpecialChars = new SearchQuery(queryText, false, false); + String pattern = "(\\Q<([{\\^-=$!|]})?*+.>\\E)|(\\Qword1\\E)|(\\Qword2.\\E)"; + assertEquals(Optional.of(pattern), textQueryWithSpecialChars.getPatternForWords().map(Pattern::toString)); + } + + @Test + public void testEscapingInJavascriptPattern() { + //first word contain all javascript special regex characters that should be escaped individually in text based search + String queryText = "([{\\\\^$|]})?*+./ word1 word2."; + SearchQuery textQueryWithSpecialChars = new SearchQuery(queryText, false, false); + String pattern = "(\\(\\[\\{\\\\\\^\\$\\|\\]\\}\\)\\?\\*\\+\\.\\/)|(word1)|(word2\\.)"; + assertEquals(Optional.of(pattern), textQueryWithSpecialChars.getJavaScriptPatternForWords().map(Pattern::toString)); + } }