Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix title-related key patterns in BibtexKeyPatternUtil #2610

Merged
merged 4 commits into from
Mar 5, 2017
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand Down Expand Up @@ -484,7 +485,7 @@ public static String applyModifiers(final String label, final List<String> parts
Optional<Formatter> formatter = Formatters.getFormatterForModifier(modifier);
if (formatter.isPresent()) {
resultingLabel = formatter.get().format(label);
} else if (!modifier.isEmpty() && modifier.length()>= 2 && (modifier.charAt(0) == '(') && modifier.endsWith(")")) {
} else if (!modifier.isEmpty() && (modifier.length()>= 2) && (modifier.charAt(0) == '(') && modifier.endsWith(")")) {
// Alternate text modifier in parentheses. Should be inserted if
// the label is empty:
if (label.isEmpty() && (modifier.length() > 2)) {
Expand Down Expand Up @@ -632,14 +633,19 @@ else if (val.matches("edtr\\d+")) {
return firstPage(entry.getField(FieldName.PAGES).orElse(""));
} else if ("lastpage".equals(val)) {
return lastPage(entry.getField(FieldName.PAGES).orElse(""));
} else if ("title".equals(val)) {
return camelizeSignificantWordsInTitle(entry.getField(FieldName.TITLE).orElse(""));
} else if ("shorttitle".equals(val)) {
return getTitleWords(3, entry.getField(FieldName.TITLE).orElse(""));
} else if ("shorttitleINI".equals(val)) {
return keepLettersAndDigitsOnly(
applyModifiers(getTitleWordsWithSpaces(3, entry.getField(FieldName.TITLE).orElse("")),
Collections.singletonList("abbr"), 0));
} else if ("veryshorttitle".equals(val)) {
return getTitleWords(1, entry.getField(FieldName.TITLE).orElse(""));
return getTitleWords(1,
removeSmallWords(entry.getField(FieldName.TITLE).orElse("")));
} else if ("camel".equals(val)) {
return getCamelizedTitle(entry.getField(FieldName.TITLE).orElse(""));
} else if ("shortyear".equals(val)) {
String yearString = entry.getFieldOrAlias(FieldName.YEAR).orElse("");
if (yearString.isEmpty()) {
Expand Down Expand Up @@ -719,16 +725,16 @@ public static String getTitleWords(int number, String title) {
return keepLettersAndDigitsOnly(getTitleWordsWithSpaces(number, title));
}

private static String getTitleWordsWithSpaces(int number, String title) {
/**
* Removes any '-', unnecessary whitespace and latex commands formatting
*/
private static String formatTitle(String title) {
String ss = new RemoveLatexCommandsFormatter().format(title);
StringBuilder stringBuilder = new StringBuilder();
StringBuilder current;
int piv = 0;
int words = 0;

// sorry for being English-centric. I guess these
// words should really be an editable preference.
mainl: while ((piv < ss.length()) && (words < number)) {
while (piv < ss.length()) {
current = new StringBuilder();
// Get the next word:
while ((piv < ss.length()) && !Character.isWhitespace(ss.charAt(piv))
Expand All @@ -742,18 +748,122 @@ private static String getTitleWordsWithSpaces(int number, String title) {
if (word.isEmpty()) {
continue;
}
for (String smallWord: Word.SMALLER_WORDS) {
if (word.equalsIgnoreCase(smallWord)) {
continue mainl;
}
}

// If we get here, the word was accepted.
if (stringBuilder.length() > 0) {
stringBuilder.append(' ');
}
stringBuilder.append(word);
words++;
}

return stringBuilder.toString();
}

/**
* Capitalises and concatenates the words out of the "title" field in the given BibTeX entry
*/
public static String getCamelizedTitle(String title) {
return keepLettersAndDigitsOnly(camelizeTitle(title));
}

private static String camelizeTitle(String title) {
StringBuilder stringBuilder = new StringBuilder();
String formattedTitle = formatTitle(title);

try (Scanner titleScanner = new Scanner(formattedTitle)) {
while (titleScanner.hasNext()) {
String word = titleScanner.next();

// Camelize the word
word = word.substring(0, 1).toUpperCase() + word.substring(1);

if (stringBuilder.length() > 0) {
stringBuilder.append(' ');
}
stringBuilder.append(word);
}
}

return stringBuilder.toString();
}

/**
* Capitalises the significant words of the "title" field in the given BibTeX entry
*/
public static String camelizeSignificantWordsInTitle(String title) {
StringBuilder stringBuilder = new StringBuilder();
String formattedTitle = formatTitle(title);
Boolean camelize;

try (Scanner titleScanner = new Scanner(formattedTitle)) {
while (titleScanner.hasNext()) {
String word = titleScanner.next();
camelize = true;

// Camelize the word if it is significant
for (String smallWord : Word.SMALLER_WORDS) {
if (word.equalsIgnoreCase(smallWord)) {
camelize = false;
continue;
}
}
// We want to capitalize significant words and the first word of the title
if (camelize || (stringBuilder.length() == 0)) {
word = word.substring(0, 1).toUpperCase() + word.substring(1);
} else {
word = word.substring(0, 1).toLowerCase() + word.substring(1);
}

if (stringBuilder.length() > 0) {
stringBuilder.append(' ');
}
stringBuilder.append(word);
}
}

return stringBuilder.toString();
}


public static String removeSmallWords(String title) {
StringBuilder stringBuilder = new StringBuilder();
String formattedTitle = formatTitle(title);

try (Scanner titleScanner = new Scanner(formattedTitle)) {
mainl: while (titleScanner.hasNext()) {
String word = titleScanner.next();

for (String smallWord : Word.SMALLER_WORDS) {
if (word.equalsIgnoreCase(smallWord)) {
continue mainl;
}
}

if (stringBuilder.length() > 0) {
stringBuilder.append(' ');
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normally, we would use StringJoiner to avoid this kind of code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I copied this from some of the existing code, I'll try to change it to use a StringJoiner!

I'll also add a changelog entry.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you! 👍 We're just trying to improve the code quality wherever possible 😇

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem :)

I've made the suggested code change and added a change to the changelog.

}
stringBuilder.append(word);
}
}

return stringBuilder.toString();
}

private static String getTitleWordsWithSpaces(int number, String title) {
StringBuilder stringBuilder = new StringBuilder();
String formattedTitle = formatTitle(title);
int words = 0;

try (Scanner titleScanner = new Scanner(formattedTitle)) {
while (titleScanner.hasNext() && (words < number)) {
String word = titleScanner.next();

if (stringBuilder.length() > 0) {
stringBuilder.append(' ');
}
stringBuilder.append(word);
words++;
}
}

return stringBuilder.toString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -619,20 +619,29 @@ public void veryShortTitle() {
// veryShortTitle is getTitleWords with "1" as count
int count = 1;
assertEquals("application",
BibtexKeyPatternUtil.getTitleWords(count, TITLE_STRING_ALL_LOWER_FOUR_SMALL_WORDS_ONE_EN_DASH));
BibtexKeyPatternUtil.getTitleWords(count,
BibtexKeyPatternUtil.removeSmallWords(TITLE_STRING_ALL_LOWER_FOUR_SMALL_WORDS_ONE_EN_DASH)));
assertEquals("BPEL", BibtexKeyPatternUtil.getTitleWords(count,
TITLE_STRING_ALL_LOWER_FIRST_WORD_IN_BRACKETS_TWO_SMALL_WORDS_SMALL_WORD_AFTER_COLON));
assertEquals("Process", BibtexKeyPatternUtil.getTitleWords(count, TITLE_STRING_CASED));
BibtexKeyPatternUtil.removeSmallWords(
TITLE_STRING_ALL_LOWER_FIRST_WORD_IN_BRACKETS_TWO_SMALL_WORDS_SMALL_WORD_AFTER_COLON)));
assertEquals("Process", BibtexKeyPatternUtil.getTitleWords(count,
BibtexKeyPatternUtil.removeSmallWords(TITLE_STRING_CASED)));
assertEquals("BPMN",
BibtexKeyPatternUtil.getTitleWords(count, TITLE_STRING_CASED_ONE_UPPER_WORD_ONE_SMALL_WORD));
BibtexKeyPatternUtil.getTitleWords(count,
BibtexKeyPatternUtil.removeSmallWords(TITLE_STRING_CASED_ONE_UPPER_WORD_ONE_SMALL_WORD)));
assertEquals("Difference", BibtexKeyPatternUtil.getTitleWords(count,
TITLE_STRING_CASED_TWO_SMALL_WORDS_SMALL_WORD_AT_THE_BEGINNING));
BibtexKeyPatternUtil.removeSmallWords(TITLE_STRING_CASED_TWO_SMALL_WORDS_SMALL_WORD_AT_THE_BEGINNING)));
assertEquals("Cloud",
BibtexKeyPatternUtil.getTitleWords(count, TITLE_STRING_CASED_TWO_SMALL_WORDS_SMALL_WORD_AFTER_COLON));
BibtexKeyPatternUtil.getTitleWords(count,
BibtexKeyPatternUtil
.removeSmallWords(TITLE_STRING_CASED_TWO_SMALL_WORDS_SMALL_WORD_AFTER_COLON)));
assertEquals("Towards",
BibtexKeyPatternUtil.getTitleWords(count, TITLE_STRING_CASED_TWO_SMALL_WORDS_ONE_CONNECTED_WORD));
BibtexKeyPatternUtil.getTitleWords(count,
BibtexKeyPatternUtil.removeSmallWords(TITLE_STRING_CASED_TWO_SMALL_WORDS_ONE_CONNECTED_WORD)));
assertEquals("Measurement",
BibtexKeyPatternUtil.getTitleWords(count, TITLE_STRING_CASED_FOUR_SMALL_WORDS_TWO_CONNECTED_WORDS));
BibtexKeyPatternUtil.getTitleWords(count,
BibtexKeyPatternUtil
.removeSmallWords(TITLE_STRING_CASED_FOUR_SMALL_WORDS_TWO_CONNECTED_WORDS)));
}

/**
Expand All @@ -644,21 +653,78 @@ public void shortTitle() {
int count = 3;
assertEquals("applicationmigrationeffort",
BibtexKeyPatternUtil.getTitleWords(count, TITLE_STRING_ALL_LOWER_FOUR_SMALL_WORDS_ONE_EN_DASH));
assertEquals("BPELconformanceopen", BibtexKeyPatternUtil.getTitleWords(count,
assertEquals("BPELconformancein", BibtexKeyPatternUtil.getTitleWords(count,
TITLE_STRING_ALL_LOWER_FIRST_WORD_IN_BRACKETS_TWO_SMALL_WORDS_SMALL_WORD_AFTER_COLON));
assertEquals("ProcessViewingPatterns", BibtexKeyPatternUtil.getTitleWords(count, TITLE_STRING_CASED));
assertEquals("BPMNConformanceOpen",
assertEquals("BPMNConformancein",
BibtexKeyPatternUtil.getTitleWords(count, TITLE_STRING_CASED_ONE_UPPER_WORD_ONE_SMALL_WORD));
assertEquals("DifferenceGraphBased", BibtexKeyPatternUtil.getTitleWords(count,
assertEquals("TheDifferenceBetween", BibtexKeyPatternUtil.getTitleWords(count,
TITLE_STRING_CASED_TWO_SMALL_WORDS_SMALL_WORD_AT_THE_BEGINNING));
assertEquals("CloudComputingNext",
assertEquals("CloudComputingThe",
BibtexKeyPatternUtil.getTitleWords(count, TITLE_STRING_CASED_TWO_SMALL_WORDS_SMALL_WORD_AFTER_COLON));
assertEquals("TowardsChoreographybased",
BibtexKeyPatternUtil.getTitleWords(count, TITLE_STRING_CASED_TWO_SMALL_WORDS_ONE_CONNECTED_WORD));
assertEquals("MeasurementDesignTime",
assertEquals("OntheMeasurement",
BibtexKeyPatternUtil.getTitleWords(count, TITLE_STRING_CASED_FOUR_SMALL_WORDS_TWO_CONNECTED_WORDS));
}

/**
* Tests [camel]
*/
@Test
public void camel() {
// camel capitalises and concatenates all the words of the title
assertEquals("ApplicationMigrationEffortInTheCloudTheCaseOfCloudPlatforms",
BibtexKeyPatternUtil.getCamelizedTitle(TITLE_STRING_ALL_LOWER_FOUR_SMALL_WORDS_ONE_EN_DASH));
assertEquals("BPELConformanceInOpenSourceEnginesTheCaseOfStaticAnalysis",
BibtexKeyPatternUtil.getCamelizedTitle(
TITLE_STRING_ALL_LOWER_FIRST_WORD_IN_BRACKETS_TWO_SMALL_WORDS_SMALL_WORD_AFTER_COLON));
assertEquals("ProcessViewingPatterns", BibtexKeyPatternUtil.getCamelizedTitle(TITLE_STRING_CASED));
assertEquals("BPMNConformanceInOpenSourceEngines",
BibtexKeyPatternUtil.getCamelizedTitle(TITLE_STRING_CASED_ONE_UPPER_WORD_ONE_SMALL_WORD));
assertEquals("TheDifferenceBetweenGraphBasedAndBlockStructuredBusinessProcessModellingLanguages",
BibtexKeyPatternUtil.getCamelizedTitle(
TITLE_STRING_CASED_TWO_SMALL_WORDS_SMALL_WORD_AT_THE_BEGINNING));
assertEquals("CloudComputingTheNextRevolutionInIT",
BibtexKeyPatternUtil.getCamelizedTitle(TITLE_STRING_CASED_TWO_SMALL_WORDS_SMALL_WORD_AFTER_COLON));
assertEquals("TowardsChoreographyBasedProcessDistributionInTheCloud",
BibtexKeyPatternUtil.getCamelizedTitle(TITLE_STRING_CASED_TWO_SMALL_WORDS_ONE_CONNECTED_WORD));
assertEquals("OnTheMeasurementOfDesignTimeAdaptabilityForProcessBasedSystems",
BibtexKeyPatternUtil.getCamelizedTitle(TITLE_STRING_CASED_FOUR_SMALL_WORDS_TWO_CONNECTED_WORDS));
}

/**
* Tests [title]
*/
@Test
public void title() {
// title capitalises the significant words of the title
// for the title case the concatenation happens at formatting, which is tested in MakeLabelWithDatabaseTest.java
assertEquals("Application Migration Effort in the Cloud the Case of Cloud Platforms",
BibtexKeyPatternUtil
.camelizeSignificantWordsInTitle(TITLE_STRING_ALL_LOWER_FOUR_SMALL_WORDS_ONE_EN_DASH));
assertEquals("BPEL Conformance in Open Source Engines: the Case of Static Analysis",
BibtexKeyPatternUtil.camelizeSignificantWordsInTitle(
TITLE_STRING_ALL_LOWER_FIRST_WORD_IN_BRACKETS_TWO_SMALL_WORDS_SMALL_WORD_AFTER_COLON));
assertEquals("Process Viewing Patterns",
BibtexKeyPatternUtil.camelizeSignificantWordsInTitle(TITLE_STRING_CASED));
assertEquals("BPMN Conformance in Open Source Engines",
BibtexKeyPatternUtil
.camelizeSignificantWordsInTitle(TITLE_STRING_CASED_ONE_UPPER_WORD_ONE_SMALL_WORD));
assertEquals("The Difference between Graph Based and Block Structured Business Process Modelling Languages",
BibtexKeyPatternUtil.camelizeSignificantWordsInTitle(
TITLE_STRING_CASED_TWO_SMALL_WORDS_SMALL_WORD_AT_THE_BEGINNING));
assertEquals("Cloud Computing: the Next Revolution in IT",
BibtexKeyPatternUtil.camelizeSignificantWordsInTitle(
TITLE_STRING_CASED_TWO_SMALL_WORDS_SMALL_WORD_AFTER_COLON));
assertEquals("Towards Choreography Based Process Distribution in the Cloud",
BibtexKeyPatternUtil
.camelizeSignificantWordsInTitle(TITLE_STRING_CASED_TWO_SMALL_WORDS_ONE_CONNECTED_WORD));
assertEquals("On the Measurement of Design Time Adaptability for Process Based Systems",
BibtexKeyPatternUtil.camelizeSignificantWordsInTitle(
TITLE_STRING_CASED_FOUR_SMALL_WORDS_TWO_CONNECTED_WORDS));
}

@Test
public void keywordNKeywordsSeparatedBySpace() {
BibEntry entry = new BibEntry();
Expand Down Expand Up @@ -718,8 +784,8 @@ public void testCheckLegalNullInNullOut() {
public void testApplyModifiers() {
BibEntry entry = new BibEntry();
entry.setField("title", "Green Scheduling of Whatever");
assertEquals("GSW", BibtexKeyPatternUtil.makeLabel(entry, "shorttitleINI", ',', new BibDatabase()));
assertEquals("GreenSchedulingWhatever", BibtexKeyPatternUtil.makeLabel(entry, "shorttitle",
assertEquals("GSo", BibtexKeyPatternUtil.makeLabel(entry, "shorttitleINI", ',', new BibDatabase()));
assertEquals("GreenSchedulingof", BibtexKeyPatternUtil.makeLabel(entry, "shorttitle",
',', new BibDatabase()));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,21 +191,21 @@ public void generateKeyEmptyFieldColonInDefaultText() {
public void generateKeyTitle() {
bibtexKeyPattern.setDefaultValue("[title]");
BibtexKeyPatternUtil.makeAndSetLabel(bibtexKeyPattern, database, entry, preferences);
assertEquals(Optional.of("AnawesomepaperonJabRef"), entry.getCiteKeyOptional());
assertEquals(Optional.of("AnAwesomePaperonJabRef"), entry.getCiteKeyOptional());
}

@Test
public void generateKeyTitleAbbr() {
bibtexKeyPattern.setDefaultValue("[title:abbr]");
BibtexKeyPatternUtil.makeAndSetLabel(bibtexKeyPattern, database, entry, preferences);
assertEquals(Optional.of("AapoJ"), entry.getCiteKeyOptional());
assertEquals(Optional.of("AAPoJ"), entry.getCiteKeyOptional());
}

@Test
public void generateKeyShorttitle() {
bibtexKeyPattern.setDefaultValue("[shorttitle]");
BibtexKeyPatternUtil.makeAndSetLabel(bibtexKeyPattern, database, entry, preferences);
assertEquals(Optional.of("awesomepaperJabRef"), entry.getCiteKeyOptional());
assertEquals(Optional.of("Anawesomepaper"), entry.getCiteKeyOptional());
}

@Test
Expand All @@ -219,7 +219,7 @@ public void generateKeyVeryshorttitle() {
public void generateKeyShorttitleINI() {
bibtexKeyPattern.setDefaultValue("[shorttitleINI]");
BibtexKeyPatternUtil.makeAndSetLabel(bibtexKeyPattern, database, entry, preferences);
assertEquals(Optional.of("apJ"), entry.getCiteKeyOptional());
assertEquals(Optional.of("Aap"), entry.getCiteKeyOptional());
}

@Test
Expand Down