Skip to content

Commit

Permalink
Update journal abbrev list (#9469)
Browse files Browse the repository at this point in the history
Co-authored-by: calixtus <calixtus@users.noreply.github.com>
  • Loading branch information
github-actions[bot] and calixtus authored Dec 17, 2022
1 parent 5eae7e4 commit 690f837
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ public boolean equals(Object obj) {
return true;
}

if (obj == null || getClass() != obj.getClass()) {
if ((obj == null) || (getClass() != obj.getClass())) {
return false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,6 @@ public static JournalAbbreviationRepository loadRepository(JournalAbbreviationPr
}

public static JournalAbbreviationRepository loadBuiltInRepository() {
return loadRepository(new JournalAbbreviationPreferences(Collections.emptyList(), StandardCharsets.UTF_8));
return loadRepository(new JournalAbbreviationPreferences(Collections.emptyList(), StandardCharsets.UTF_8, true));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ public class JournalAbbreviationPreferences {

private final Charset defaultEncoding;
private List<String> externalJournalLists;
private boolean useFJournalField;

public JournalAbbreviationPreferences(List<String> externalJournalLists, Charset defaultEncoding) {
public JournalAbbreviationPreferences(List<String> externalJournalLists, Charset defaultEncoding, boolean useFJournalField) {
this.externalJournalLists = externalJournalLists;
this.defaultEncoding = defaultEncoding;
this.useFJournalField = useFJournalField;
}

public List<String> getExternalJournalLists() {
Expand All @@ -24,4 +26,12 @@ public void setExternalJournalLists(List<String> externalJournalLists) {
public Charset getDefaultEncoding() {
return defaultEncoding;
}

public boolean useAMSFJournalFieldForAbbrevAndUnabbrev() {
return useFJournalField;
}

public void setUseAMSFJournalFieldForAbbrevAndUnabbrev(boolean useFJournalField) {
this.useFJournalField = useFJournalField;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import org.h2.mvstore.MVMap;
Expand All @@ -17,6 +18,8 @@
* A repository for all journal abbreviations, including add and find methods.
*/
public class JournalAbbreviationRepository {
static final Pattern DOT = Pattern.compile("\\.");
static final Pattern QUESTION_MARK = Pattern.compile("\\?");

private final MVMap<String, String> fullToAbbreviation;
private final MVMap<String, String> abbreviationToFull;
Expand Down Expand Up @@ -49,14 +52,20 @@ private static boolean isMatchedAbbreviated(String name, Abbreviation abbreviati
* Letters) or its abbreviated form (e.g. Phys. Rev. Lett.).
*/
public boolean isKnownName(String journalName) {
// check for at least one "?"
if (QUESTION_MARK.matcher(journalName).find()) {
return false;
}

String journal = journalName.trim().replaceAll(Matcher.quoteReplacement("\\&"), "&");

boolean isKnown = customAbbreviations.stream().anyMatch(abbreviation -> isMatched(journal, abbreviation));
if (isKnown) {
return true;
}

return fullToAbbreviation.containsKey(journal) || abbreviationToFull.containsKey(journal);
return fullToAbbreviation.containsKey(journal) || abbreviationToFull.containsKey(journal)
|| findDottedAbbrFromDotless(journal).length() > 0;
}

/**
Expand All @@ -66,8 +75,40 @@ public boolean isKnownName(String journalName) {
public boolean isAbbreviatedName(String journalName) {
String journal = journalName.trim();

// journal abbreviation must be at least 2 words
boolean isMoreThanTwoWords = journalName.split(" ").length >= 2;

return customAbbreviations.stream().anyMatch(abbreviation -> isMatchedAbbreviated(journal, abbreviation))
|| abbreviationToFull.containsKey(journal);
|| abbreviationToFull.containsKey(journal)
|| (isMoreThanTwoWords && findDottedAbbrFromDotless(journal).length() > 0);
}

public String findDottedAbbrFromDotless(String journalName) {
// check for at least one "?"
if (QUESTION_MARK.matcher(journalName).find()) {
return "UNKNOWN";
}

String foundKey = "";

// check for a dot-less abbreviation
if (!DOT.matcher(journalName).find()) {
// use dot-less abbr to find full name using regex
String[] journalSplit = journalName.split(" ");

for (int i = 0; i < journalSplit.length; i++) {
String word = journalSplit[i] + "[\\.\\s]*";
journalSplit[i] = word;
}

String joined = String.join("", journalSplit);

foundKey = abbreviationToFull.keySet().stream()
.filter(s -> Pattern.compile(joined).matcher(s).find())
.collect(Collectors.joining());
}

return foundKey;
}

/**
Expand All @@ -87,7 +128,17 @@ public Optional<Abbreviation> get(String input) {

return Optional.ofNullable(fullToAbbreviation.get(journal))
.map(abbreviation -> new Abbreviation(journal, abbreviation))
.or(() -> Optional.ofNullable(abbreviationToFull.get(journal)).map(fullName -> new Abbreviation(fullName, journal)));
.or(() -> {
String abbr = "";

// check for dot-less abbr
if (isKnownName(journal) && isAbbreviatedName(journal)) {
abbr = findDottedAbbrFromDotless(journal);
}

return Optional.ofNullable(abbreviationToFull.get(abbr.equals("") ? journal : abbr))
.map(fullName -> new Abbreviation(fullName, journal));
});
}

public void addCustomAbbreviation(Abbreviation abbreviation) {
Expand Down
Binary file modified src/main/resources/journals/journalList.mv
Binary file not shown.

0 comments on commit 690f837

Please sign in to comment.