diff --git a/src/main/java/org/jabref/logic/formatter/bibtexfields/RegexFormatter.java b/src/main/java/org/jabref/logic/formatter/bibtexfields/RegexFormatter.java index 0fdb4166e3e..26859a82699 100644 --- a/src/main/java/org/jabref/logic/formatter/bibtexfields/RegexFormatter.java +++ b/src/main/java/org/jabref/logic/formatter/bibtexfields/RegexFormatter.java @@ -5,41 +5,57 @@ import java.util.Objects; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; import org.jabref.logic.cleanup.Formatter; import org.jabref.logic.l10n.Localization; -public class RegexFormatter extends Formatter { +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +public class RegexFormatter extends Formatter { public static final String KEY = "regex"; - private static final Pattern PATTERN_ESCAPED_OPENING_CURLY_BRACE = Pattern.compile("\\\\\\{"); - private static final Pattern PATTERN_ESCAPED_CLOSING_CURLY_BRACE = Pattern.compile("\\\\\\}"); - // RegEx to match {...} - // \\ is required to have the { interpreted as character - // ? is required to disable the aggressive match - private static final Pattern PATTERN_ENCLOSED_IN_CURLY_BRACES = Pattern.compile("(\\{.*?})"); + private static final Logger LOGGER = LoggerFactory.getLogger(RegexFormatter.class); + private static final Pattern ESCAPED_OPENING_CURLY_BRACE = Pattern.compile("\\\\\\{"); + private static final Pattern ESCAPED_CLOSING_CURLY_BRACE = Pattern.compile("\\\\\\}"); + /** + * Matches text enclosed in curly brackets. The capturing group is used to prevent part of the input from being + * replaced. + */ + private static final Pattern ENCLOSED_IN_CURLY_BRACES = Pattern.compile("\\{.*?}"); + private static final String REGEX_CAPTURING_GROUP = "regex"; + private static final String REPLACEMENT_CAPTURING_GROUP = "replacement"; + /** + * Matches a valid argument to the constructor. Two capturing groups are used to parse the {@link + * RegexFormatter#regex} and {@link RegexFormatter#replacement} used in {@link RegexFormatter#format(String)} + */ + private static final Pattern CONSTRUCTOR_ARGUMENT = Pattern.compile( + "^\\(\"(?<" + REGEX_CAPTURING_GROUP + ">.*?)\" *?, *?\"(?<" + REPLACEMENT_CAPTURING_GROUP + ">.*)\"\\)$"); // Magic arbitrary unicode char, which will never appear in bibtex files private static final String PLACEHOLDER_FOR_PROTECTED_GROUP = Character.toString('\u0A14'); private static final String PLACEHOLDER_FOR_OPENING_CURLY_BRACE = Character.toString('\u0A15'); private static final String PLACEHOLDER_FOR_CLOSING_CURLY_BRACE = Character.toString('\u0A16'); - private static final String QUOTE_AND_OPENING_BRACE = "\"("; - private static final int LENGTH_OF_QUOTE_AND_OPENING_BRACE = QUOTE_AND_OPENING_BRACE.length(); - private static final String CLOSING_BRACE_AND_QUOTE = ")\""; - private static final int LENGTH_OF_CLOSING_BRACE_AND_QUOTE = CLOSING_BRACE_AND_QUOTE.length(); - private static String regex; - private String replacement; + private final String regex; + private final String replacement; /** * Constructs a new regular expression-based formatter with the given RegEx. * - * @param input the regular expressions for matching and replacing given in the form {@code (, )}. + * @param input the regular expressions for matching and replacing given in the form {@code ("", + * "")}. */ public RegexFormatter(String input) { - // formatting is like ("exp1","exp2"), we want to first remove (" and ") - String rexToSet = input.substring(LENGTH_OF_QUOTE_AND_OPENING_BRACE, input.length() - LENGTH_OF_CLOSING_BRACE_AND_QUOTE); - String[] parts = rexToSet.split("\",\""); - regex = parts[0]; - replacement = parts[1]; + Objects.requireNonNull(input); + input = input.trim(); + Matcher constructorArgument = CONSTRUCTOR_ARGUMENT.matcher(input); + if (constructorArgument.matches()) { + regex = constructorArgument.group(REGEX_CAPTURING_GROUP); + replacement = constructorArgument.group(REPLACEMENT_CAPTURING_GROUP); + } else { + regex = null; + replacement = null; + LOGGER.warn("RegexFormatter could not parse the input: {}", input); + } } @Override @@ -53,14 +69,19 @@ public String getKey() { } private String replaceHonoringProtectedGroups(final String input) { - Matcher matcher = PATTERN_ENCLOSED_IN_CURLY_BRACES.matcher(input); + Matcher matcher = ENCLOSED_IN_CURLY_BRACES.matcher(input); List replaced = new ArrayList<>(); while (matcher.find()) { - replaced.add(matcher.group(1)); + replaced.add(matcher.group()); } String workingString = matcher.replaceAll(PLACEHOLDER_FOR_PROTECTED_GROUP); - workingString = workingString.replaceAll(regex, replacement); + try { + workingString = workingString.replaceAll(regex, replacement); + } catch (PatternSyntaxException e) { + LOGGER.warn("There is a syntax error in the regular expression \"{}\" used by the regex modifier", regex, e); + return input; + } for (String r : replaced) { workingString = workingString.replaceFirst(PLACEHOLDER_FOR_PROTECTED_GROUP, r); @@ -71,19 +92,19 @@ private String replaceHonoringProtectedGroups(final String input) { @Override public String format(final String input) { Objects.requireNonNull(input); - if (regex == null) { + if (regex == null || replacement == null) { return input; } - Matcher matcherOpeningCurlyBrace = PATTERN_ESCAPED_OPENING_CURLY_BRACE.matcher(input); - final String openingCurlyBraceReplaced = matcherOpeningCurlyBrace.replaceAll(PLACEHOLDER_FOR_OPENING_CURLY_BRACE); + Matcher escapedOpeningCurlyBrace = ESCAPED_OPENING_CURLY_BRACE.matcher(input); + String inputWithPlaceholder = escapedOpeningCurlyBrace.replaceAll(PLACEHOLDER_FOR_OPENING_CURLY_BRACE); - Matcher matcherClosingCurlyBrace = PATTERN_ESCAPED_CLOSING_CURLY_BRACE.matcher(openingCurlyBraceReplaced); - final String closingCurlyBraceReplaced = matcherClosingCurlyBrace.replaceAll(PLACEHOLDER_FOR_CLOSING_CURLY_BRACE); + Matcher escapedClosingCurlyBrace = ESCAPED_CLOSING_CURLY_BRACE.matcher(inputWithPlaceholder); + inputWithPlaceholder = escapedClosingCurlyBrace.replaceAll(PLACEHOLDER_FOR_CLOSING_CURLY_BRACE); - final String regexApplied = replaceHonoringProtectedGroups(closingCurlyBraceReplaced); + final String regexMatchesReplaced = replaceHonoringProtectedGroups(inputWithPlaceholder); - return regexApplied + return regexMatchesReplaced .replaceAll(PLACEHOLDER_FOR_OPENING_CURLY_BRACE, "\\\\{") .replaceAll(PLACEHOLDER_FOR_CLOSING_CURLY_BRACE, "\\\\}"); } diff --git a/src/test/java/org/jabref/logic/formatter/bibtexfields/RegexFormatterTest.java b/src/test/java/org/jabref/logic/formatter/bibtexfields/RegexFormatterTest.java index 5f54cacee85..8cd7b10ab5d 100644 --- a/src/test/java/org/jabref/logic/formatter/bibtexfields/RegexFormatterTest.java +++ b/src/test/java/org/jabref/logic/formatter/bibtexfields/RegexFormatterTest.java @@ -4,9 +4,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals; -/** - * Tests in addition to the general tests from {@link org.jabref.logic.formatter.FormatterTest} - */ class RegexFormatterTest { private RegexFormatter formatter; @@ -52,4 +49,34 @@ void formatExample() { formatter = new RegexFormatter("(\" \",\"-\")"); assertEquals("Please-replace-the-spaces", formatter.format(formatter.getExampleInput())); } + + @Test + void formatCanRemoveMatchesWithEmptyReplacement() { + formatter = new RegexFormatter("(\"[A-Z]\",\"\")"); + assertEquals("abc", formatter.format("AaBbCc")); + } + + @Test + void constructorWithInvalidConstructorArgumentReturnUnchangedString() { + formatter = new RegexFormatter("(\"\",\"\""); + assertEquals("AaBbCc", formatter.format("AaBbCc")); + } + + @Test + void constructorWithEmptyStringArgumentReturnUnchangedString() { + formatter = new RegexFormatter(""); + assertEquals("AaBbCc", formatter.format("AaBbCc")); + } + + @Test + void constructorAllowsSpacesBetweenQuotes() { + formatter = new RegexFormatter("(\"[A-Z]\", \"\")"); + assertEquals("abc", formatter.format("AaBbCc")); + } + + @Test + void formatWithSyntaxErrorReturnUnchangedString() { + formatter = new RegexFormatter("(\"(\", \"\")"); + assertEquals("AaBbCc", formatter.format("AaBbCc")); + } }