Skip to content

Commit

Permalink
Fixes bugs in the regex cite key pattern modifier (#6893)
Browse files Browse the repository at this point in the history
  • Loading branch information
k3KAW8Pnf7mkmdSMPHz27 authored Sep 21, 2020
1 parent aee54e5 commit 0da0f0c
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,41 +5,57 @@
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.l10n.Localization;

public class RegexFormatter extends Formatter {
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class RegexFormatter extends Formatter {
public static final String KEY = "regex";
private static final Pattern PATTERN_ESCAPED_OPENING_CURLY_BRACE = Pattern.compile("\\\\\\{");
private static final Pattern PATTERN_ESCAPED_CLOSING_CURLY_BRACE = Pattern.compile("\\\\\\}");
// RegEx to match {...}
// \\ is required to have the { interpreted as character
// ? is required to disable the aggressive match
private static final Pattern PATTERN_ENCLOSED_IN_CURLY_BRACES = Pattern.compile("(\\{.*?})");
private static final Logger LOGGER = LoggerFactory.getLogger(RegexFormatter.class);
private static final Pattern ESCAPED_OPENING_CURLY_BRACE = Pattern.compile("\\\\\\{");
private static final Pattern ESCAPED_CLOSING_CURLY_BRACE = Pattern.compile("\\\\\\}");
/**
* Matches text enclosed in curly brackets. The capturing group is used to prevent part of the input from being
* replaced.
*/
private static final Pattern ENCLOSED_IN_CURLY_BRACES = Pattern.compile("\\{.*?}");
private static final String REGEX_CAPTURING_GROUP = "regex";
private static final String REPLACEMENT_CAPTURING_GROUP = "replacement";
/**
* Matches a valid argument to the constructor. Two capturing groups are used to parse the {@link
* RegexFormatter#regex} and {@link RegexFormatter#replacement} used in {@link RegexFormatter#format(String)}
*/
private static final Pattern CONSTRUCTOR_ARGUMENT = Pattern.compile(
"^\\(\"(?<" + REGEX_CAPTURING_GROUP + ">.*?)\" *?, *?\"(?<" + REPLACEMENT_CAPTURING_GROUP + ">.*)\"\\)$");
// Magic arbitrary unicode char, which will never appear in bibtex files
private static final String PLACEHOLDER_FOR_PROTECTED_GROUP = Character.toString('\u0A14');
private static final String PLACEHOLDER_FOR_OPENING_CURLY_BRACE = Character.toString('\u0A15');
private static final String PLACEHOLDER_FOR_CLOSING_CURLY_BRACE = Character.toString('\u0A16');
private static final String QUOTE_AND_OPENING_BRACE = "\"(";
private static final int LENGTH_OF_QUOTE_AND_OPENING_BRACE = QUOTE_AND_OPENING_BRACE.length();
private static final String CLOSING_BRACE_AND_QUOTE = ")\"";
private static final int LENGTH_OF_CLOSING_BRACE_AND_QUOTE = CLOSING_BRACE_AND_QUOTE.length();
private static String regex;
private String replacement;
private final String regex;
private final String replacement;

/**
* Constructs a new regular expression-based formatter with the given RegEx.
*
* @param input the regular expressions for matching and replacing given in the form {@code (<regex>, <replace>)}.
* @param input the regular expressions for matching and replacing given in the form {@code ("<regex>",
* "<replace>")}.
*/
public RegexFormatter(String input) {
// formatting is like ("exp1","exp2"), we want to first remove (" and ")
String rexToSet = input.substring(LENGTH_OF_QUOTE_AND_OPENING_BRACE, input.length() - LENGTH_OF_CLOSING_BRACE_AND_QUOTE);
String[] parts = rexToSet.split("\",\"");
regex = parts[0];
replacement = parts[1];
Objects.requireNonNull(input);
input = input.trim();
Matcher constructorArgument = CONSTRUCTOR_ARGUMENT.matcher(input);
if (constructorArgument.matches()) {
regex = constructorArgument.group(REGEX_CAPTURING_GROUP);
replacement = constructorArgument.group(REPLACEMENT_CAPTURING_GROUP);
} else {
regex = null;
replacement = null;
LOGGER.warn("RegexFormatter could not parse the input: {}", input);
}
}

@Override
Expand All @@ -53,14 +69,19 @@ public String getKey() {
}

private String replaceHonoringProtectedGroups(final String input) {
Matcher matcher = PATTERN_ENCLOSED_IN_CURLY_BRACES.matcher(input);
Matcher matcher = ENCLOSED_IN_CURLY_BRACES.matcher(input);

List<String> replaced = new ArrayList<>();
while (matcher.find()) {
replaced.add(matcher.group(1));
replaced.add(matcher.group());
}
String workingString = matcher.replaceAll(PLACEHOLDER_FOR_PROTECTED_GROUP);
workingString = workingString.replaceAll(regex, replacement);
try {
workingString = workingString.replaceAll(regex, replacement);
} catch (PatternSyntaxException e) {
LOGGER.warn("There is a syntax error in the regular expression \"{}\" used by the regex modifier", regex, e);
return input;
}

for (String r : replaced) {
workingString = workingString.replaceFirst(PLACEHOLDER_FOR_PROTECTED_GROUP, r);
Expand All @@ -71,19 +92,19 @@ private String replaceHonoringProtectedGroups(final String input) {
@Override
public String format(final String input) {
Objects.requireNonNull(input);
if (regex == null) {
if (regex == null || replacement == null) {
return input;
}

Matcher matcherOpeningCurlyBrace = PATTERN_ESCAPED_OPENING_CURLY_BRACE.matcher(input);
final String openingCurlyBraceReplaced = matcherOpeningCurlyBrace.replaceAll(PLACEHOLDER_FOR_OPENING_CURLY_BRACE);
Matcher escapedOpeningCurlyBrace = ESCAPED_OPENING_CURLY_BRACE.matcher(input);
String inputWithPlaceholder = escapedOpeningCurlyBrace.replaceAll(PLACEHOLDER_FOR_OPENING_CURLY_BRACE);

Matcher matcherClosingCurlyBrace = PATTERN_ESCAPED_CLOSING_CURLY_BRACE.matcher(openingCurlyBraceReplaced);
final String closingCurlyBraceReplaced = matcherClosingCurlyBrace.replaceAll(PLACEHOLDER_FOR_CLOSING_CURLY_BRACE);
Matcher escapedClosingCurlyBrace = ESCAPED_CLOSING_CURLY_BRACE.matcher(inputWithPlaceholder);
inputWithPlaceholder = escapedClosingCurlyBrace.replaceAll(PLACEHOLDER_FOR_CLOSING_CURLY_BRACE);

final String regexApplied = replaceHonoringProtectedGroups(closingCurlyBraceReplaced);
final String regexMatchesReplaced = replaceHonoringProtectedGroups(inputWithPlaceholder);

return regexApplied
return regexMatchesReplaced
.replaceAll(PLACEHOLDER_FOR_OPENING_CURLY_BRACE, "\\\\{")
.replaceAll(PLACEHOLDER_FOR_CLOSING_CURLY_BRACE, "\\\\}");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@

import static org.junit.jupiter.api.Assertions.assertEquals;

/**
* Tests in addition to the general tests from {@link org.jabref.logic.formatter.FormatterTest}
*/
class RegexFormatterTest {

private RegexFormatter formatter;
Expand Down Expand Up @@ -52,4 +49,34 @@ void formatExample() {
formatter = new RegexFormatter("(\" \",\"-\")");
assertEquals("Please-replace-the-spaces", formatter.format(formatter.getExampleInput()));
}

@Test
void formatCanRemoveMatchesWithEmptyReplacement() {
formatter = new RegexFormatter("(\"[A-Z]\",\"\")");
assertEquals("abc", formatter.format("AaBbCc"));
}

@Test
void constructorWithInvalidConstructorArgumentReturnUnchangedString() {
formatter = new RegexFormatter("(\"\",\"\"");
assertEquals("AaBbCc", formatter.format("AaBbCc"));
}

@Test
void constructorWithEmptyStringArgumentReturnUnchangedString() {
formatter = new RegexFormatter("");
assertEquals("AaBbCc", formatter.format("AaBbCc"));
}

@Test
void constructorAllowsSpacesBetweenQuotes() {
formatter = new RegexFormatter("(\"[A-Z]\", \"\")");
assertEquals("abc", formatter.format("AaBbCc"));
}

@Test
void formatWithSyntaxErrorReturnUnchangedString() {
formatter = new RegexFormatter("(\"(\", \"\")");
assertEquals("AaBbCc", formatter.format("AaBbCc"));
}
}

0 comments on commit 0da0f0c

Please sign in to comment.