Skip to content

Commit

Permalink
Fixes JabRef#1181 and JabRef#1504: Improved "Normalize to BibTeX name…
Browse files Browse the repository at this point in the history
… format"

Added the jr, sr,... special cases for semicolon partition.
Fixed to avoid the "and", "{", ";" cases.
Added Test for every case.
  • Loading branch information
bruehldev authored and Daniel Brühl committed Aug 15, 2016
1 parent a89f4fc commit a68f862
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 1 deletion.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- Fixed NullPointerException when trying to set a special field or mark an entry through the menu without having an open database
- Fixed [#1257](https://github.com/JabRef/jabref/issues/1324): Preferences for the BibTeX key generator set in a version prior to 3.2 are now migrated automatically to the new version
- Fixed [#1716](https://github.com/JabRef/jabref/issues/1716): `@`-Symbols stored in BibTeX fields no longer break the database
- Fixed [#405](https://github.com/JabRef/jabref/issues/405): Added more {} around capital letters in Unicode/HTML to LaTeX conversion to preserve them
- Alleviate multiuser concurrency issue when near simultaneous saves occur to a shared database file
- Fixed [#1476](https://github.com/JabRef/jabref/issues/1476): NPE when importing from SQL DB because of missing DatabaseMode
- Fixed [#1481](https://github.com/JabRef/jabref/issues/1481): Mac OS X binary seems broken for JabRef 3.4 release
- Fixed [#1430](https://github.com/JabRef/jabref/issues/1430): "review changes" did misinterpret changes
- Fixed [#1434](https://github.com/JabRef/jabref/issues/1434): Static groups are now longer displayed as dynamic ones
- Fixed [#1482](https://github.com/JabRef/jabref/issues/1482): Correct number of matched entries is displayed for refining subgroups
- Fixed [#1181](https://github.com/JabRef/jabref/issues/1181) and [#1504](https://github.com/JabRef/jabref/issues/1504): Improved "Normalize to BibTeX name format": Support separated names with commas and colons. Considered name affixes such as "Jr".

### Removed
- It is not longer possible to choose to convert HTML sub- and superscripts to equations
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
*/
package net.sf.jabref.logic.formatter.bibtexfields;

import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Objects;
import java.util.StringJoiner;

import net.sf.jabref.logic.formatter.Formatter;
import net.sf.jabref.logic.l10n.Localization;
import net.sf.jabref.model.entry.AuthorList;
Expand All @@ -24,6 +30,9 @@
*/
public class NormalizeNamesFormatter implements Formatter {

// Avoid partition where these values are contained
private final Collection<String> avoidTermsInLowerCase = Arrays.asList("jr", "sr", "jnr", "snr", "von", "zu", "van", "der");

@Override
public String getName() {
return Localization.lang("Normalize names of persons");
Expand All @@ -36,6 +45,73 @@ public String getKey() {

@Override
public String format(String value) {
Objects.requireNonNull(value);
// Handle case names in order lastname, firstname and separated by ","
// E.g., Ali Babar, M., Dingsøyr, T., Lago, P., van der Vliet, H.
if (!value.contains(" and ") && !value.contains("{") && !value.contains(";")) {
String[] valueParts = value.split(",");
// Delete spaces for correct case identification
for(int i=0; i < valueParts.length; i++) {
valueParts[i] = valueParts[i].trim();
}
// Looking for space between pre- and lastname
boolean spaceInAllParts = false;
for (int i=0; i<valueParts.length; i++) {
if (valueParts[i].contains(" ") ) {
spaceInAllParts = true;
} else {
spaceInAllParts = false;
break;
}
}

// We hit the comma name separator case
// Usually the getAsLastFirstNamesWithAnd method would separate them if pre- and lastname are separated with "and"
// If not, we check if spaces separate pre- and lastname
if (spaceInAllParts) {
value = value.replaceAll(",", " and");
} else {
// Looking for name affixes to avoid
// partCount need to reduce by the count off avoiding terms
int valuePartsCount = valueParts.length;
// Holding the index of every term, which need to avoid
Collection<Integer> avoidIndex = new HashSet<>();

for (int i = 0; i < valueParts.length; i++) {
if (avoidTermsInLowerCase.contains(valueParts[i].toLowerCase())) {
avoidIndex.add(i);
valuePartsCount--;
}
}

if ((valuePartsCount % 2) == 0) {
// We hit the described special case with name affix like Jr
StringBuilder stringBuilder = new StringBuilder();
// avoidedTimes need to increase the count of avoided terms for correct module calculation
int avoidedTimes = 0;
for (int i = 0; i < valueParts.length; i++) {
if (avoidIndex.contains(i)) {
// We hit a name affix
stringBuilder.append(valueParts[i]);
stringBuilder.append(',');
avoidedTimes++;
} else {
stringBuilder.append(valueParts[i]);
if (((i + avoidedTimes) % 2) == 0) {
// Hit separation between last name and firstname --> comma has to be kept
stringBuilder.append(',');
} else {
// Hit separation between full names (e.g., Ali Babar, M. and Dingsøyr, T.) --> semicolon has to be used
// Will be treated correctly by AuthorList.parse(value);
stringBuilder.append(';');
}
}
}
value = stringBuilder.toString();
}
}
}

AuthorList authorList = AuthorList.parse(value);
return authorList.getAsLastFirstNamesWithAnd(false);
}
Expand All @@ -50,4 +126,14 @@ public String getExampleInput() {
return "Albert Einstein and Alan Turing";
}

private static boolean contains(final String[] array, final String[] searchTerms) {
for (String currentTerm : array) {
for (String beCompared : searchTerms) {
if (beCompared.trim().toLowerCase().equals(currentTerm.trim().toLowerCase())) {
return true;
}
}
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,18 @@ public void lastThenJuniorThenFirst() {
expectCorrect("Name, della, first", "Name, della, first");
}

@Test
public void testConcatenationOfAuthorsWithCommas() {
expectCorrect("Ali Babar, M., Dingsøyr, T., Lago, P., van der Vliet, H.",
"Ali Babar, M. and Dingsøyr, T. and Lago, P. and van der Vliet, H.");
expectCorrect("Ali Babar, M.", "Ali Babar, M.");
}

@Test
public void testOddCountOfCommas() {
expectCorrect("Ali Babar, M., Dingsøyr, T., Lago P.", "Ali Babar, M., Dingsøyr T. Lago P.");
}

private void expectCorrect(String input, String expected) {
Assert.assertEquals(expected, formatter.format(input));
}
Expand All @@ -107,4 +119,35 @@ public void formatExample() {
assertEquals("Einstein, Albert and Turing, Alan", formatter.format(formatter.getExampleInput()));
}

}
@Test
public void testNameAffixe() {
expectCorrect("Surname, jr, First, Surname2, First2", "Surname, jr, First and Surname2, First2");
}

@Test
public void testAvoidSpecialCharacter() {
expectCorrect("Surname, {, First; Surname2, First2", "Surname, {, First; Surname2, First2");
}

@Test
public void testAndInName() {
expectCorrect("Surname, and , First, Surname2, First2", "Surname and , First, Surname2 First2");
}

@Test
public void testMultipleNameAffixes() {
expectCorrect("Mair, Jr, Daniel, Brühl, Sr, Daniel", "Mair, Jr, Daniel and Brühl, Sr, Daniel");
}

@Test
public void testCommaSeperatedNames() {
expectCorrect("Cristina Bosoi, Mariana Oliveira, Rafael Ochoa Sanchez, Mélanie Tremblay, Gabrie TenHave, Nicoolas Deutz, Christopher F. Rose, Chantal Bemeur",
"Bosoi, Cristina and Oliveira, Mariana and Sanchez, Rafael Ochoa and Tremblay, Mélanie and TenHave, Gabrie and Deutz, Nicoolas and Rose, Christopher F. and Bemeur, Chantal");
}

@Test
public void testMultipleSpaces() {
expectCorrect("Cristina Bosoi, Mariana Oliveira, Rafael Ochoa Sanchez , Mélanie Tremblay , Gabrie TenHave, Nicoolas Deutz, Christopher F. Rose, Chantal Bemeur",
"Bosoi, Cristina and Oliveira, Mariana and Sanchez, Rafael Ochoa and Tremblay, Mélanie and TenHave, Gabrie and Deutz, Nicoolas and Rose, Christopher F. and Bemeur, Chantal");
}
}
10 changes: 10 additions & 0 deletions src/test/java/net/sf/jabref/model/entry/AuthorListTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,15 @@ public void testGetAuthorsLastFirstAnds() {

}

@Test
public void testGetAuthorsLastFirstAndsCaching() {
// getAsLastFirstNamesWithAnd caches its results, therefore we call the method twice using the same arguments
Assert.assertEquals("Smith, John", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(false));
Assert.assertEquals("Smith, John", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(false));
Assert.assertEquals("Smith, J.", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(true));
Assert.assertEquals("Smith, J.", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(true));
}

@Test
public void testGetAuthorsFirstFirst() {

Expand Down Expand Up @@ -611,4 +620,5 @@ public void parseNameWithBraces() throws Exception {
Author expected = new Author("H{e}lene", "H.", null, "Fiaux", null);
Assert.assertEquals(new AuthorList(expected), AuthorList.parse("H{e}lene Fiaux"));
}

}

0 comments on commit a68f862

Please sign in to comment.