Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes bugs in the regex cite key pattern modifier #6893

Merged
merged 10 commits into from
Sep 21, 2020
Original file line number Diff line number Diff line change
Expand Up @@ -5,41 +5,56 @@
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.l10n.Localization;

public class RegexFormatter extends Formatter {
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class RegexFormatter extends Formatter {
public static final String KEY = "regex";
private static final Logger LOGGER = LoggerFactory.getLogger(RegexFormatter.class);
private static final Pattern PATTERN_ESCAPED_OPENING_CURLY_BRACE = Pattern.compile("\\\\\\{");
private static final Pattern PATTERN_ESCAPED_CLOSING_CURLY_BRACE = Pattern.compile("\\\\\\}");
// RegEx to match {...}
k3KAW8Pnf7mkmdSMPHz27 marked this conversation as resolved.
Show resolved Hide resolved
// \\ is required to have the { interpreted as character
// ? is required to disable the aggressive match
private static final Pattern PATTERN_ENCLOSED_IN_CURLY_BRACES = Pattern.compile("(\\{.*?})");
private static final String REGEX_CAPTURING_GROUP = "regex";
private static final String REPLACEMENT_CAPTURING_GROUP = "replacement";
/**
* Matches a valid argument to the constructor. Two capturing groups are used to parse the {@link
* RegexFormatter#regex} and {@link RegexFormatter#replacement} used in {@link RegexFormatter#format(String)}
*/
private static final Pattern CONSTRUCTOR_ARGUMENT = Pattern.compile(
"^\\(\"(?<" + REGEX_CAPTURING_GROUP + ">.*?)\" *?, *?\"(?<" + REPLACEMENT_CAPTURING_GROUP + ">.*?)\"\\)$");
k3KAW8Pnf7mkmdSMPHz27 marked this conversation as resolved.
Show resolved Hide resolved
// Magic arbitrary unicode char, which will never appear in bibtex files
private static final String PLACEHOLDER_FOR_PROTECTED_GROUP = Character.toString('\u0A14');
private static final String PLACEHOLDER_FOR_OPENING_CURLY_BRACE = Character.toString('\u0A15');
private static final String PLACEHOLDER_FOR_CLOSING_CURLY_BRACE = Character.toString('\u0A16');
private static final String QUOTE_AND_OPENING_BRACE = "\"(";
private static final int LENGTH_OF_QUOTE_AND_OPENING_BRACE = QUOTE_AND_OPENING_BRACE.length();
private static final String CLOSING_BRACE_AND_QUOTE = ")\"";
private static final int LENGTH_OF_CLOSING_BRACE_AND_QUOTE = CLOSING_BRACE_AND_QUOTE.length();
private static String regex;
private String replacement;
private final String regex;
private final String replacement;

/**
* Constructs a new regular expression-based formatter with the given RegEx.
*
* @param input the regular expressions for matching and replacing given in the form {@code (<regex>, <replace>)}.
* @param input the regular expressions for matching and replacing given in the form {@code ("<regex>",
* "<replace>")}.
*/
public RegexFormatter(String input) {
// formatting is like ("exp1","exp2"), we want to first remove (" and ")
String rexToSet = input.substring(LENGTH_OF_QUOTE_AND_OPENING_BRACE, input.length() - LENGTH_OF_CLOSING_BRACE_AND_QUOTE);
String[] parts = rexToSet.split("\",\"");
regex = parts[0];
replacement = parts[1];
Objects.requireNonNull(input);
input = input.trim();
Matcher constructorArgument = CONSTRUCTOR_ARGUMENT.matcher(input);
if (constructorArgument.matches()) {
regex = constructorArgument.group(REGEX_CAPTURING_GROUP);
replacement = constructorArgument.group(REPLACEMENT_CAPTURING_GROUP);
} else {
regex = null;
replacement = null;
LOGGER.warn("RegexFormatter could not parse the input: " + input);
k3KAW8Pnf7mkmdSMPHz27 marked this conversation as resolved.
Show resolved Hide resolved
}
}

@Override
Expand All @@ -60,7 +75,13 @@ private String replaceHonoringProtectedGroups(final String input) {
replaced.add(matcher.group(1));
}
String workingString = matcher.replaceAll(PLACEHOLDER_FOR_PROTECTED_GROUP);
workingString = workingString.replaceAll(regex, replacement);
try {
workingString = workingString.replaceAll(regex, replacement);
} catch (PatternSyntaxException e) {
LOGGER.warn("There is a syntax error in the regular expression, " +
regex + ", used by the regex modifier", e);
k3KAW8Pnf7mkmdSMPHz27 marked this conversation as resolved.
Show resolved Hide resolved
return input;
}

for (String r : replaced) {
workingString = workingString.replaceFirst(PLACEHOLDER_FOR_PROTECTED_GROUP, r);
Expand All @@ -71,7 +92,7 @@ private String replaceHonoringProtectedGroups(final String input) {
@Override
public String format(final String input) {
Objects.requireNonNull(input);
if (regex == null) {
if (regex == null || replacement == null) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you switch to Optional<String> as datatype for regex and replacement? JabRef wants to get rid off null.

Copy link
Member Author

@k3KAW8Pnf7mkmdSMPHz27 k3KAW8Pnf7mkmdSMPHz27 Sep 21, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not easily without code-style complaints. regex and replacement are instance fields (private final String).

I believe the choice of using instance fields is to inherit the method logic.cleanup.Formatter#format(String value), which most modifiers do, while still allowing arguments.

The behavior of most code in the citation key patterns is to either return an empty string or an unmodified string on invalid usage. Hence throwing an exception during construction would require code outside of RegexFormatter to deal with that circumstance, which I'd like to avoid due to separation of concerns.

I'd guess the alternative is to create an interface extending logic.cleanup.Formatter with format(String value, String ... args).

I have avoided taking these types of decisions as I am fairly new to Java and JabRef, but I can look into other alternatives.

return input;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@

import static org.junit.jupiter.api.Assertions.assertEquals;

/**
* Tests in addition to the general tests from {@link org.jabref.logic.formatter.FormatterTest}
*/
class RegexFormatterTest {

private RegexFormatter formatter;
Expand Down Expand Up @@ -52,4 +49,28 @@ void formatExample() {
formatter = new RegexFormatter("(\" \",\"-\")");
assertEquals("Please-replace-the-spaces", formatter.format(formatter.getExampleInput()));
}

@Test
void formatCanRemoveMatchesWithEmptyReplacement() {
formatter = new RegexFormatter("(\"[A-Z]\",\"\")");
assertEquals("abc", formatter.format("AaBbCc"));
}

@Test
void constructorWithInvalidConstructorArgumentReturnUnchangedString() {
formatter = new RegexFormatter("(\"\",\"\"");
assertEquals("AaBbCc", formatter.format("AaBbCc"));
}

@Test
void constructorWithEmptyStringArgumentReturnUnchangedString() {
formatter = new RegexFormatter("");
assertEquals("AaBbCc", formatter.format("AaBbCc"));
}

@Test
void constructorAllowsSpacesBetweenQuotes() {
formatter = new RegexFormatter("(\"[A-Z]\", \"\")");
assertEquals("abc", formatter.format("AaBbCc"));
}
}