Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for #9068: Fix capitalization after en-dash characters in title #9099

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We fixed a bug where spaces are trimmed when highlighting differences in the Entries merge dialog. [koppor#371](https://github.com/koppor/jabref/issues/371)
- We fixed several bugs regarding the manual and the autosave of library files that sometimes lead to exceptions or data loss. [#8448](https://github.com/JabRef/jabref/issues/8484), [#8746](https://github.com/JabRef/jabref/issues/8746), [#6684](https://github.com/JabRef/jabref/issues/6684), [#6644](https://github.com/JabRef/jabref/issues/6644), [#6102](https://github.com/JabRef/jabref/issues/6102), [#6002](https://github.com/JabRef/jabref/issues/6000)
- We fixed an issue where applied save actions on saving the library file would lead to the dialog "The libary has been modified by another program" popping up [#4877](https://github.com/JabRef/jabref/issues/4877)
- Title case capitalizes after en-dash in title. [#9068](https://github.com/JabRef/jabref/issues/9068)

### Removed

Expand Down
29 changes: 21 additions & 8 deletions src/main/java/org/jabref/logic/formatter/casechanger/Word.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,30 +10,42 @@
/**
* Represents a word in a title of a bibtex entry.
* <p>
* A word can have protected chars (enclosed in '{' '}') and may be a small (a, an, the, ...) word.
* A word can have protected chars (enclosed in '{' '}') and may be a small (a,
* an, the, ...) word.
*/
public final class Word {
/**
* Set containing common lowercase function words
*/
public static final Set<String> SMALLER_WORDS;
public static final Set<Character> DASHES_N_HYPHENS;
private final char[] chars;
private final boolean[] protectedChars;

static {
Set<String> smallerWords = new HashSet<>();
Set<Character> dashesAndHyphens = new HashSet<>();

// Articles
smallerWords.addAll(Arrays.asList("a", "an", "the"));
// Prepositions
smallerWords.addAll(Arrays.asList("above", "about", "across", "against", "along", "among", "around", "at", "before", "behind", "below", "beneath", "beside", "between", "beyond", "by", "down", "during", "except", "for", "from", "in", "inside", "into", "like", "near", "of", "off", "on", "onto", "since", "to", "toward", "through", "under", "until", "up", "upon", "with", "within", "without"));
smallerWords.addAll(Arrays.asList("above", "about", "across", "against", "along", "among", "around", "at",
"before", "behind", "below", "beneath", "beside", "between", "beyond", "by", "down", "during", "except",
"for", "from", "in", "inside", "into", "like", "near", "of", "off", "on", "onto", "since", "to",
"toward", "through", "under", "until", "up", "upon", "with", "within", "without"));
// Conjunctions
smallerWords.addAll(Arrays.asList("and", "but", "for", "nor", "or", "so", "yet"));

// Dashes and Hyphens
dashesAndHyphens.addAll(Arrays.asList('-', '~', '֊', '־', '᐀', '‐', '‑', '‒', '–', '—', '―', '⁓', '⁻', '₋', '−',
'⸗', '⸺', '⸻', '〜', '〰', '゠', '︱', '︲', '﹘', '﹣', '-'));

// unmodifiable for thread safety
SMALLER_WORDS = smallerWords.stream()
.map(word -> word.toLowerCase(Locale.ROOT))
.collect(Collectors.toUnmodifiableSet());
.map(word -> word.toLowerCase(Locale.ROOT))
.collect(Collectors.toUnmodifiableSet());

DASHES_N_HYPHENS = dashesAndHyphens;
}

public Word(char[] chars, boolean[] protectedChars) {
Expand All @@ -46,7 +58,8 @@ public Word(char[] chars, boolean[] protectedChars) {
}

/**
* Case-insensitive check against {@link Word#SMALLER_WORDS}. Checks for common function words.
* Case-insensitive check against {@link Word#SMALLER_WORDS}. Checks for common
* function words.
*/
public static boolean isSmallerWord(String word) {
return SMALLER_WORDS.contains(word.toLowerCase(Locale.ROOT));
Expand Down Expand Up @@ -77,9 +90,9 @@ public void toLowerCase() {
public void toUpperFirst() {
for (int i = 0; i < chars.length; i++) {
if (!protectedChars[i]) {
chars[i] = (i == 0) ?
Character.toUpperCase(chars[i]) :
Character.toLowerCase(chars[i]);
chars[i] = (i == 0 || DASHES_N_HYPHENS.contains(chars[i - 1]))
? Character.toUpperCase(chars[i])
: Character.toLowerCase(chars[i]);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
import static org.junit.jupiter.api.Assertions.assertEquals;

/**
* Tests in addition to the general tests from {@link org.jabref.logic.formatter.FormatterTest}
* Tests in addition to the general tests from
* {@link org.jabref.logic.formatter.FormatterTest}
*/
public class CapitalizeFormatterTest {

Expand Down Expand Up @@ -41,8 +42,47 @@ public void formatExample() {
"upper each {NOT} first, Upper Each {NOT} First", // multiple words lower case with {}
"Upper {E}ach {NOT} First, Upper {E}ach {NOT} First", // multiple words correct with {}
"UPPER {E}ACH {NOT} FIRST, Upper {E}ach {NOT} First", // multiple words upper case with {}
"upper each first {NOT} {this}, Upper Each First {NOT} {this}", // multiple words in lower and upper case with {}
"upper each first {N}OT {t}his, Upper Each First {N}ot {t}his", // multiple words in lower and upper case with {} part 2
"upper each first {NOT} {this}, Upper Each First {NOT} {this}", // multiple words in lower and upper case
// with {}
"upper each first {N}OT {t}his, Upper Each First {N}ot {t}his", // multiple words in lower and upper case
// with {} part 2
"first-second USING hypheN, First-Second Using Hyphen", // multiple words in lower and upper case using
// hyphen
"breaking Your Next TEST-cASE, Breaking Your Next Test-Case", // multiple words in lower and upper case
// using hyphen
"THIS looks〰LIKE a daSH, This Looks〰Like A Dash", // dash-like character
"one-dash, One-Dash", // testing all dash-like characters
"one~dash, One~Dash",
"one֊dash, One֊Dash",
"one־dash, One־Dash",
"one᐀dash, One᐀Dash",
"one‐dash, One‐Dash",
"one‑dash, One‑Dash",
"one‒dash, One‒Dash",
"one–dash, One–Dash",
"one—dash, One—Dash",
"one―dash, One―Dash",
"one⁓dash, One⁓Dash",
"one⁻dash, One⁻Dash",
"one₋dash, One₋Dash",
"one−dash, One−Dash",
"one⸗dash, One⸗Dash",
"one⸺dash, One⸺Dash",
"one⸻dash, One⸻Dash",
"one〜dash, One〜Dash",
"one〰dash, One〰Dash",
"one゠dash, One゠Dash",
"one︱dash, One︱Dash",
"one︲dash, One︲Dash",
"one﹘dash, One﹘Dash",
"one﹣dash, One﹣Dash",
"one-dash, One-Dash",
"--, --", // testing weird cases
"--this is one sentence, --This Is One Sentence",
"-d-o-i-t right-, -D-O-I-T Right-",
"testing hy---------------phen, Testing Hy---------------Phen",
"its getting cr--a---z-y, Its Getting Cr--A---Z-Y",
"does it wor⸻k with cra゠︱zy dashes?, Does It Wor⸻K With Cra゠︱Zy Dashes?",
})
public void testInputs(String input, String expectedResult) {
String formattedStr = formatter.format(input);
Expand Down
Loading