From 8f7042ed9460d5a2c1bbca0ef7b65feb6bb9b3c8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Jun 2022 19:54:08 +0200 Subject: [PATCH 1/6] Bump org.eclipse.jgit from 6.1.0.202203080745-r to 6.2.0.202206071550-r (#8916) Bumps org.eclipse.jgit from 6.1.0.202203080745-r to 6.2.0.202206071550-r. --- updated-dependencies: - dependency-name: org.eclipse.jgit:org.eclipse.jgit dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 102060d8177..55b2e7e4a29 100644 --- a/build.gradle +++ b/build.gradle @@ -142,7 +142,7 @@ dependencies { antlr4 'org.antlr:antlr4:4.9.3' implementation 'org.antlr:antlr4-runtime:4.9.3' - implementation group: 'org.eclipse.jgit', name: 'org.eclipse.jgit', version: '6.1.0.202203080745-r' + implementation group: 'org.eclipse.jgit', name: 'org.eclipse.jgit', version: '6.2.0.202206071550-r' implementation group: 'com.fasterxml.jackson.dataformat', name: 'jackson-dataformat-yaml', version: '2.13.3' implementation group: 'com.fasterxml.jackson.datatype', name: 'jackson-datatype-jsr310', version: '2.13.3' From d034c038869fe15d952cf04819599eb6c66e2d21 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Jun 2022 19:54:19 +0200 Subject: [PATCH 2/6] Bump tika-core from 2.4.0 to 2.4.1 (#8914) Bumps [tika-core](https://github.com/apache/tika) from 2.4.0 to 2.4.1. - [Release notes](https://github.com/apache/tika/releases) - [Changelog](https://github.com/apache/tika/blob/main/CHANGES.txt) - [Commits](https://github.com/apache/tika/compare/2.4.0...2.4.1) --- updated-dependencies: - dependency-name: org.apache.tika:tika-core dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 55b2e7e4a29..f8773bbfc1f 100644 --- a/build.gradle +++ b/build.gradle @@ -122,7 +122,7 @@ dependencies { implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0' implementation 'com.h2database:h2-mvstore:2.1.212' - implementation group: 'org.apache.tika', name: 'tika-core', version: '2.4.0' + implementation group: 'org.apache.tika', name: 'tika-core', version: '2.4.1' implementation 'com.ibm.icu:icu4j-charset:71.1' // required for reading write-protected PDFs - see https://github.com/JabRef/jabref/pull/942#issuecomment-209252635 From b1df72a8a45ff681ae32a923e710c059bc71f016 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Jun 2022 19:54:31 +0200 Subject: [PATCH 3/6] Bump libreoffice from 7.3.3 to 7.3.4 (#8913) Bumps libreoffice from 7.3.3 to 7.3.4. --- updated-dependencies: - dependency-name: org.libreoffice:libreoffice dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index f8773bbfc1f..5bc15ef4a1e 100644 --- a/build.gradle +++ b/build.gradle @@ -130,7 +130,7 @@ dependencies { implementation 'commons-cli:commons-cli:1.5.0' - implementation 'org.libreoffice:libreoffice:7.3.3' + implementation 'org.libreoffice:libreoffice:7.3.4' implementation 'org.libreoffice:unoloader:7.3.3' implementation 'io.github.java-diff-utils:java-diff-utils:4.11' From df0f48c55b387ac42897281c16d705078abf6dbf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Jun 2022 19:56:39 +0200 Subject: [PATCH 4/6] Bump unoloader from 7.3.3 to 7.3.4 (#8912) Bumps unoloader from 7.3.3 to 7.3.4. --- updated-dependencies: - dependency-name: org.libreoffice:unoloader dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.gradle b/build.gradle index 5bc15ef4a1e..a7b90c7fe1c 100644 --- a/build.gradle +++ b/build.gradle @@ -131,7 +131,7 @@ dependencies { implementation 'commons-cli:commons-cli:1.5.0' implementation 'org.libreoffice:libreoffice:7.3.4' - implementation 'org.libreoffice:unoloader:7.3.3' + implementation 'org.libreoffice:unoloader:7.3.4' implementation 'io.github.java-diff-utils:java-diff-utils:4.11' implementation 'info.debatty:java-string-similarity:2.0.0' From 3475ec94b4e1eefbacb51accebdb41d714a60333 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Jun 2022 20:05:28 +0200 Subject: [PATCH 5/6] Bump h2-mvstore from 2.1.212 to 2.1.214 in /buildSrc (#8915) * Bump h2-mvstore from 2.1.212 to 2.1.214 in /buildSrc Bumps [h2-mvstore](https://github.com/h2database/h2database) from 2.1.212 to 2.1.214. - [Release notes](https://github.com/h2database/h2database/releases) - [Commits](https://github.com/h2database/h2database/compare/version-2.1.212...version-2.1.214) --- updated-dependencies: - dependency-name: com.h2database:h2-mvstore dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] * also increase version in build.gradle Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Siedlerchr --- build.gradle | 2 +- buildSrc/build.gradle | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/build.gradle b/build.gradle index a7b90c7fe1c..413e9c6f29f 100644 --- a/build.gradle +++ b/build.gradle @@ -120,7 +120,7 @@ dependencies { implementation group: 'org.apache.commons', name: 'commons-csv', version: '1.9.0' implementation group: 'org.apache.commons', name: 'commons-lang3', version: '3.12.0' - implementation 'com.h2database:h2-mvstore:2.1.212' + implementation 'com.h2database:h2-mvstore:2.1.214' implementation group: 'org.apache.tika', name: 'tika-core', version: '2.4.1' implementation 'com.ibm.icu:icu4j-charset:71.1' diff --git a/buildSrc/build.gradle b/buildSrc/build.gradle index 43eb84d3cb2..61d0772e817 100644 --- a/buildSrc/build.gradle +++ b/buildSrc/build.gradle @@ -6,7 +6,7 @@ repositories { } dependencies { - implementation 'com.h2database:h2-mvstore:2.1.212' + implementation 'com.h2database:h2-mvstore:2.1.214' implementation 'org.apache.commons:commons-csv:1.9.0' implementation 'org.slf4j:slf4j-api:2.0.0-alpha7' } From 41edd28a8d374f059e00215d8a4a531cfd68a866 Mon Sep 17 00:00:00 2001 From: Sim Teck Lim <49628911+LIM0000@users.noreply.github.com> Date: Tue, 21 Jun 2022 03:47:55 +0930 Subject: [PATCH 6/6] Rework IACR fetcher (#8904) --- .../importer/fetcher/IacrEprintFetcher.java | 104 +++--------------- src/main/resources/l10n/JabRef_en.properties | 1 - .../fetcher/IacrEprintFetcherTest.java | 21 ++-- 3 files changed, 29 insertions(+), 97 deletions(-) diff --git a/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java index 6eae4cf35c2..42777780515 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/IacrEprintFetcher.java @@ -1,17 +1,8 @@ package org.jabref.logic.importer.fetcher; import java.io.IOException; -import java.time.DateTimeException; -import java.time.format.DateTimeFormatter; -import java.time.format.DateTimeParseException; -import java.time.temporal.TemporalAccessor; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Locale; import java.util.Optional; import java.util.function.Predicate; -import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jabref.logic.importer.FetcherException; @@ -26,25 +17,16 @@ import org.jabref.model.strings.StringUtil; import org.jabref.model.util.DummyFileUpdateMonitor; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - public class IacrEprintFetcher implements IdBasedFetcher { public static final String NAME = "IACR eprints"; - private static final Logger LOGGER = LoggerFactory.getLogger(IacrEprintFetcher.class); - private static final Pattern DATE_FROM_WEBSITE_AFTER_2000_PATTERN = Pattern.compile("[a-z ]+(\\d{1,2} [A-Za-z][a-z]{2} \\d{4})"); - private static final Pattern DATE_FROM_WEBSITE_BEFORE_2000_PATTERN = Pattern.compile("[A-Za-z ]+? ([A-Za-z][a-z]{2,10} \\d{1,2}(th|st|nd|rd)?, \\d{4})\\.?"); private static final Pattern WITHOUT_LETTERS_SPACE = Pattern.compile("[^0-9/]"); - private static final DateTimeFormatter DATE_FORMAT_WEBSITE_AFTER_2000 = DateTimeFormatter.ofPattern("d MMM yyyy", Locale.US); - private static final DateTimeFormatter DATE_FORMAT_WEBSITE_BEFORE_2000_LONG_MONTHS = DateTimeFormatter.ofPattern("MMMM d['th']['st']['nd']['rd'] yyyy", Locale.US); - private static final DateTimeFormatter DATE_FORMAT_WEBSITE_BEFORE_2000_SHORT_MONTHS = DateTimeFormatter.ofPattern("MMM d['th']['st']['nd']['rd'] yyyy", Locale.US); - private static final DateTimeFormatter DATE_FORMAT_BIBTEX = DateTimeFormatter.ISO_LOCAL_DATE; private static final Predicate IDENTIFIER_PREDICATE = Pattern.compile("\\d{4}/\\d{3,5}").asPredicate(); - private static final String CITATION_URL_PREFIX = "https://eprint.iacr.org/eprint-bin/cite.pl?entry="; + private static final String CITATION_URL_PREFIX = "https://eprint.iacr.org/"; private static final String DESCRIPTION_URL_PREFIX = "https://eprint.iacr.org/"; + private static final String VERSION_URL_PREFIX = "https://eprint.iacr.org/archive/versions/"; private final ImportFormatPreferences prefs; @@ -74,7 +56,7 @@ private Optional createEntryFromIacrCitation(String validIdentifier) t if (bibtexCitationHtml.contains("No such report found")) { throw new FetcherException(Localization.lang("No results found.")); } - String actualEntry = getRequiredValueBetween("
", "
", bibtexCitationHtml); + String actualEntry = getRequiredValueBetween("
", "
", bibtexCitationHtml); try { return BibtexParser.singleFromString(actualEntry, prefs, new DummyFileUpdateMonitor()); @@ -86,86 +68,36 @@ private Optional createEntryFromIacrCitation(String validIdentifier) t private void setAdditionalFields(BibEntry entry, String identifier) throws FetcherException { String entryUrl = DESCRIPTION_URL_PREFIX + identifier; String descriptiveHtml = getHtml(entryUrl); + entry.setField(StandardField.ABSTRACT, getAbstract(descriptiveHtml)); - String dateStringAsInHtml = getRequiredValueBetween("Date: ", "

", descriptiveHtml); - entry.setField(StandardField.DATE, getLatestDate(dateStringAsInHtml)); + entry.setField(StandardField.DATE, getDate(descriptiveHtml)); + // Version information for entries after year 2000 if (isFromOrAfterYear2000(entry)) { - String version = getVersion(identifier, descriptiveHtml); + String entryVersion = VERSION_URL_PREFIX + identifier; + String versionHtml = getHtml(entryVersion); + String version = getVersion(identifier, versionHtml); entry.setField(StandardField.VERSION, version); entry.setField(StandardField.URL, entryUrl + "/" + version); - } else { - // No version information for entries before year 2000 - entry.setField(StandardField.URL, entryUrl); } } - private String getVersion(String identifier, String descriptiveHtml) throws FetcherException { - String startOfVersionString = "Version: ", versionHtml); return version; } private String getAbstract(String descriptiveHtml) throws FetcherException { - String abstractText = getRequiredValueBetween("Abstract: ", "

", descriptiveHtml); - // for some reason, all spaces are doubled... - abstractText = abstractText.replaceAll("\\s(\\s)", "$1"); + String startOfAbstractString = "

Abstract
\n

"; + String abstractText = getRequiredValueBetween(startOfAbstractString, "

", descriptiveHtml); return abstractText; } - private String getLatestDate(String dateStringAsInHtml) throws FetcherException { - if (dateStringAsInHtml.contains("withdrawn")) { - throw new FetcherException(Localization.lang("This paper has been withdrawn.")); - } - String[] rawDates = dateStringAsInHtml.split(", \\D"); - List formattedDates = new ArrayList<>(); - for (String rawDate : rawDates) { - TemporalAccessor date = parseSingleDateFromWebsite(rawDate); - if (date != null) { - formattedDates.add(DATE_FORMAT_BIBTEX.format(date)); - } - } - - if (formattedDates.isEmpty()) { - throw new FetcherException(Localization.lang("Entry from %0 could not be parsed.", "IACR")); - } - - Collections.sort(formattedDates, Collections.reverseOrder()); - return formattedDates.get(0); - } - - private TemporalAccessor parseSingleDateFromWebsite(String dateStringFromWebsite) { - TemporalAccessor date = null; - // Some entries contain double spaces in the date string (which would break our regexs below) - String dateStringWithoutDoubleSpaces = dateStringFromWebsite.replaceAll("\\s\\s+", " "); - - Matcher dateMatcherAfter2000 = DATE_FROM_WEBSITE_AFTER_2000_PATTERN.matcher(dateStringWithoutDoubleSpaces.trim()); - if (dateMatcherAfter2000.find()) { - try { - date = DATE_FORMAT_WEBSITE_AFTER_2000.parse(dateMatcherAfter2000.group(1)); - } catch (DateTimeParseException e) { - LOGGER.warn("Date from IACR could not be parsed", e); - } - } - - // Entries before year 2000 use a variety of date formats - fortunately, we can match them with only two different - // date formats (each of which differ from the unified format of post-2000 entries). - Matcher dateMatcherBefore2000 = DATE_FROM_WEBSITE_BEFORE_2000_PATTERN.matcher(dateStringWithoutDoubleSpaces.trim()); - if (dateMatcherBefore2000.find()) { - String dateWithoutComma = dateMatcherBefore2000.group(1).replace(",", ""); - try { - date = DATE_FORMAT_WEBSITE_BEFORE_2000_LONG_MONTHS.parse(dateWithoutComma); - } catch (DateTimeParseException e) { - try { - date = DATE_FORMAT_WEBSITE_BEFORE_2000_SHORT_MONTHS.parse(dateWithoutComma); - } catch (DateTimeException e1) { - LOGGER.warn("Date from IACR could not be parsed", e); - LOGGER.warn("Date from IACR could not be parsed", e1); - } - } - } - - return date; + private String getDate(String descriptiveHtml) throws FetcherException { + String startOfHistoryString = "
History
\n \n \n
"; + String dateStringAsInHtml = getRequiredValueBetween(startOfHistoryString, ":", descriptiveHtml); + return dateStringAsInHtml; } private String getHtml(String url) throws FetcherException { diff --git a/src/main/resources/l10n/JabRef_en.properties b/src/main/resources/l10n/JabRef_en.properties index 1cc2c330268..368b722e913 100644 --- a/src/main/resources/l10n/JabRef_en.properties +++ b/src/main/resources/l10n/JabRef_en.properties @@ -1868,7 +1868,6 @@ Removes\ all\ hyphenated\ line\ breaks\ in\ the\ field\ content.=Removes all hyp Could\ not\ retrieve\ entry\ data\ from\ '%0'.=Could not retrieve entry data from '%0'. Entry\ from\ %0\ could\ not\ be\ parsed.=Entry from %0 could not be parsed. Invalid\ identifier\:\ '%0'.=Invalid identifier: '%0'. -This\ paper\ has\ been\ withdrawn.=This paper has been withdrawn. empty\ citation\ key=empty citation key Aux\ file=Aux file Group\ containing\ entries\ cited\ in\ a\ given\ TeX\ file=Group containing entries cited in a given TeX file diff --git a/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java index 43ca28eaaa7..bf5bf9eef3a 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/IacrEprintFetcherTest.java @@ -44,36 +44,37 @@ public void setUp() { fetcher = new IacrEprintFetcher(mock(ImportFormatPreferences.class, Answers.RETURNS_DEEP_STUBS)); abram2017 = new BibEntry(StandardEntryType.Misc) - .withCitationKey("cryptoeprint:2017:1118") + .withCitationKey("cryptoeprint:2017/1118") .withField(StandardField.ABSTRACT, "dummy") .withField(StandardField.AUTHOR, "Ittai Abraham and Dahlia Malkhi and Kartik Nayak and Ling Ren and Alexander Spiegelman") - .withField(StandardField.DATE, "2017-11-18") - .withField(StandardField.HOWPUBLISHED, "Cryptology ePrint Archive, Report 2017/1118") - .withField(StandardField.NOTE, "\\url{https://ia.cr/2017/1118}") + .withField(StandardField.DATE, "2017-11-24") + .withField(StandardField.HOWPUBLISHED, "Cryptology ePrint Archive, Paper 2017/1118") + .withField(StandardField.NOTE, "\\url{https://eprint.iacr.org/2017/1118}") .withField(StandardField.TITLE, "Solida: A Blockchain Protocol Based on Reconfigurable Byzantine Consensus") .withField(StandardField.URL, "https://eprint.iacr.org/2017/1118/20171124:064527") .withField(StandardField.VERSION, "20171124:064527") .withField(StandardField.YEAR, "2017"); beierle2016 = new BibEntry(StandardEntryType.Misc) - .withCitationKey("cryptoeprint:2016:119") + .withCitationKey("cryptoeprint:2016/119") .withField(StandardField.ABSTRACT, "dummy") .withField(StandardField.AUTHOR, "Christof Beierle and Thorsten Kranz and Gregor Leander") .withField(StandardField.DATE, "2017-02-17") - .withField(StandardField.HOWPUBLISHED, "Cryptology ePrint Archive, Report 2016/119") - .withField(StandardField.NOTE, "\\url{https://ia.cr/2016/119}") + .withField(StandardField.DOI, "10.1007/978-3-662-53018-4_23") + .withField(StandardField.HOWPUBLISHED, "Cryptology ePrint Archive, Paper 2016/119") + .withField(StandardField.NOTE, "\\url{https://eprint.iacr.org/2016/119}") .withField(StandardField.TITLE, "Lightweight Multiplication in GF(2^n) with Applications to MDS Matrices") .withField(StandardField.URL, "https://eprint.iacr.org/2016/119/20170217:150415") .withField(StandardField.VERSION, "20170217:150415") .withField(StandardField.YEAR, "2016"); delgado2017 = new BibEntry(StandardEntryType.Misc) - .withCitationKey("cryptoeprint:2017:1095") + .withCitationKey("cryptoeprint:2017/1095") .withField(StandardField.ABSTRACT, "dummy") .withField(StandardField.AUTHOR, "Sergi Delgado-Segura and Cristina Pérez-Solà and Guillermo Navarro-Arribas and Jordi Herrera-Joancomartí") .withField(StandardField.DATE, "2018-01-19") - .withField(StandardField.HOWPUBLISHED, "Cryptology ePrint Archive, Report 2017/1095") - .withField(StandardField.NOTE, "\\url{https://ia.cr/2017/1095}") + .withField(StandardField.HOWPUBLISHED, "Cryptology ePrint Archive, Paper 2017/1095") + .withField(StandardField.NOTE, "\\url{https://eprint.iacr.org/2017/1095}") .withField(StandardField.TITLE, "Analysis of the Bitcoin UTXO set") .withField(StandardField.URL, "https://eprint.iacr.org/2017/1095/20180119:113352") .withField(StandardField.VERSION, "20180119:113352")