Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/enable paginated fetchers #7082

Merged
merged 9 commits into from
Nov 15, 2020
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/cli/ArgumentProcessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ private Optional<ParserResult> fetch(String fetchCommand) {
System.out.println(Localization.lang("Running query '%0' with fetcher '%1'.", query, engine));
System.out.print(Localization.lang("Please wait..."));
try {
List<BibEntry> matches = selectedFetcher.get().performSearch(query);
List<BibEntry> matches = selectedFetcher.get().performComplexSearch(query);
if (matches.isEmpty()) {
System.out.println("\r" + Localization.lang("No results found."));
return Optional.empty();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public StringProperty inputTextProperty() {
}

public void startParsing() {
BackgroundTask.wrap(() -> currentCitationfetcher.performSearch(inputTextProperty.getValue()))
BackgroundTask.wrap(() -> currentCitationfetcher.performComplexSearch(inputTextProperty.getValue()))
.onRunning(() -> dialogService.notify(Localization.lang("Your text is being parsed...")))
.onFailure((e) -> {
if (e instanceof FetcherException) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.jabref.gui.importer.fetcher;

import java.util.Optional;
import java.util.SortedSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand All @@ -23,10 +22,8 @@
import org.jabref.gui.util.BackgroundTask;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.importer.QueryParser;
import org.jabref.logic.importer.SearchBasedFetcher;
import org.jabref.logic.importer.WebFetchers;
import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.strings.StringUtil;
import org.jabref.preferences.JabRefPreferences;
Expand Down Expand Up @@ -109,15 +106,8 @@ public void search() {
SearchBasedFetcher activeFetcher = getSelectedFetcher();

BackgroundTask<ParserResult> task;
QueryParser queryParser = new QueryParser();
Optional<ComplexSearchQuery> generatedQuery = queryParser.parseQueryStringIntoComplexQuery(getQuery());
if (generatedQuery.isPresent()) {
task = BackgroundTask.wrap(() -> new ParserResult(activeFetcher.performComplexSearch(generatedQuery.get())))
.withInitialMessage(Localization.lang("Processing %0", getQuery()));
} else {
task = BackgroundTask.wrap(() -> new ParserResult(activeFetcher.performSearch(getQuery().trim())))
.withInitialMessage(Localization.lang("Processing %0", getQuery()));
}
task = BackgroundTask.wrap(() -> new ParserResult(activeFetcher.performComplexSearch(getQuery().trim())))
.withInitialMessage(Localization.lang("Processing %0", getQuery().trim()));
task.onFailure(dialogService::showErrorDialogAndWait);

ImportEntriesDialog dialog = new ImportEntriesDialog(frame.getCurrentLibraryTab().getBibDatabaseContext(), task);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,52 @@
package org.jabref.logic.importer;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;

import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.paging.Page;

public interface PagedSearchBasedFetcher extends SearchBasedFetcher {

/**
* @param query search query send to endpoint
* @param pageNumber requested site number
* @param pageNumber requested site number indexed from 0
* @return Page with search results
*/
Page<BibEntry> performComplexSearchPaged(ComplexSearchQuery query, int pageNumber) throws FetcherException;

/**
* @param complexSearchQuery search query send to endpoint
* @param pageNumber requested site number indexed from 0
* @return Page with search results
*/
Page<BibEntry> performSearchPaged(String query, int pageNumber) throws FetcherException;
default Page<BibEntry> performComplexSearchPaged(String complexSearchQuery, int pageNumber) throws FetcherException {
if (complexSearchQuery.isBlank()) {
return new Page<>(complexSearchQuery, pageNumber, Collections.emptyList());
}
QueryParser queryParser = new QueryParser();
Optional<ComplexSearchQuery> generatedQuery = queryParser.parseQueryStringIntoComplexQuery(complexSearchQuery);
// Otherwise just use query as a default term
return this.performComplexSearchPaged(generatedQuery.orElse(ComplexSearchQuery.builder().defaultFieldPhrase(complexSearchQuery).build()), pageNumber);
}

/**
* @return default pageSize
*/
default int getPageSize() {
return 20;
}

@Override
default List<BibEntry> performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
return new ArrayList<>(performComplexSearchPaged(complexSearchQuery, 0).getContent());
}

@Override
default List<BibEntry> performComplexSearch(String complexSearchQuery) throws FetcherException {
return new ArrayList<>(performComplexSearchPaged(complexSearchQuery, 0).getContent());
}
}
Original file line number Diff line number Diff line change
@@ -1,16 +1,72 @@
package org.jabref.logic.importer;

import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;

import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.paging.Page;

public interface PagedSearchBasedParserFetcher extends SearchBasedParserFetcher, PagedSearchBasedFetcher {

@Override
default Page<BibEntry> performComplexSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException {
// ADR-0014
URL urlForQuery;
try {
urlForQuery = getComplexQueryURL(complexSearchQuery, pageNumber);
} catch (URISyntaxException | MalformedURLException e) {
throw new FetcherException("Search URI crafted from complex search query is malformed", e);
}
return new Page<>(complexSearchQuery.toString(), pageNumber, getBibEntries(urlForQuery));
}

private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
try (InputStream stream = getUrlDownload(urlForQuery).asInputStream()) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);
fetchedEntries.forEach(this::doPostCleanup);
return fetchedEntries;
} catch (IOException e) {
throw new FetcherException("A network error occurred while fetching from " + urlForQuery, e);
} catch (ParseException e) {
throw new FetcherException("An internal parser error occurred while fetching from " + urlForQuery, e);
}
}

/**
* Constructs a URL based on the query, size and page number.
* @param query the search query
* @param size the size of the page
* @param pageNumber the number of the page
* */
URL getURLForQuery(String query, int size, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException;
*
* @param query the search query
* @param pageNumber the number of the page indexed from 0
*/
URL getURLForQuery(String query, int pageNumber) throws URISyntaxException, MalformedURLException;

/**
* Constructs a URL based on the query, size and page number.
*
* @param complexSearchQuery the search query
* @param pageNumber the number of the page indexed from 0
*/
default URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery, int pageNumber) throws URISyntaxException, MalformedURLException {
return getURLForQuery(complexSearchQuery.toString(), pageNumber);
}

@Override
default List<BibEntry> performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
return SearchBasedParserFetcher.super.performComplexSearch(complexSearchQuery);
}

@Override
default URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException {
return getURLForQuery(query, 0);
}

@Override
default URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery) throws URISyntaxException, MalformedURLException, FetcherException {
return getComplexQueryURL(complexSearchQuery, 0);
}
}
2 changes: 0 additions & 2 deletions src/main/java/org/jabref/logic/importer/QueryParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ public class QueryParser {
*/
public Optional<ComplexSearchQuery> parseQueryStringIntoComplexQuery(String queryString) {
try {
ComplexSearchQuery.ComplexSearchQueryBuilder builder = ComplexSearchQuery.builder();

StandardQueryParser parser = new StandardQueryParser();
Query luceneQuery = parser.parse(queryString, "default");
Set<Term> terms = new HashSet<>();
Expand Down
23 changes: 15 additions & 8 deletions src/main/java/org/jabref/logic/importer/SearchBasedFetcher.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.jabref.logic.importer;

import java.util.Collections;
import java.util.List;
import java.util.Optional;

import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.model.entry.BibEntry;
Expand All @@ -12,21 +14,26 @@
public interface SearchBasedFetcher extends WebFetcher {

/**
* Looks for hits which are matched by the given free-text query.
* This method is used to send complex queries using fielded search.
*
* @param query search string
* @param complexSearchQuery the search query defining all fielded search parameters
* @return a list of {@link BibEntry}, which are matched by the query (may be empty)
*/
List<BibEntry> performSearch(String query) throws FetcherException;
List<BibEntry> performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException;

/**
* This method is used to send complex queries using fielded search.
* Looks for hits which are matched by the given free-text query.
*
* @param complexSearchQuery the search query defining all fielded search parameters
* @param query search string
* @return a list of {@link BibEntry}, which are matched by the query (may be empty)
*/
default List<BibEntry> performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
// Default implementation behaves as perform search on all fields concatenated as query
return performSearch(complexSearchQuery.toString());
default List<BibEntry> performComplexSearch(String query) throws FetcherException {
if (query.isBlank()) {
return Collections.emptyList();
}
QueryParser queryParser = new QueryParser();
Optional<ComplexSearchQuery> generatedQuery = queryParser.parseQueryStringIntoComplexQuery(query);
// Otherwise just use query as a default term
return this.performComplexSearch(generatedQuery.orElse(ComplexSearchQuery.builder().defaultFieldPhrase(query).build()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,11 @@
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Collections;
import java.util.List;

import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.strings.StringUtil;

/**
* Provides a convenient interface for search-based fetcher, which follow the usual three-step procedure:
Expand All @@ -35,22 +33,6 @@ public interface SearchBasedParserFetcher extends SearchBasedFetcher {
*/
Parser getParser();

@Override
default List<BibEntry> performSearch(String query) throws FetcherException {
if (StringUtil.isBlank(query)) {
return Collections.emptyList();
}

// ADR-0014
URL urlForQuery;
try {
urlForQuery = getURLForQuery(query);
} catch (URISyntaxException | MalformedURLException | FetcherException e) {
throw new FetcherException(String.format("Search URI crafted from query %s is malformed", query), e);
}
return getBibEntries(urlForQuery);
}

/**
* This method is used to send queries with advanced URL parameters.
* This method is necessary as the performSearch method does not support certain URL parameters that are used for
Expand Down Expand Up @@ -83,9 +65,8 @@ private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
}

default URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery) throws URISyntaxException, MalformedURLException, FetcherException {
// Default implementation behaves as getURLForQuery using the default field phrases as query
List<String> defaultPhrases = complexSearchQuery.getDefaultFieldPhrases();
return this.getURLForQuery(String.join(" ", defaultPhrases));
// Default implementation behaves as getURLForQuery treating complex query as plain string query
return this.getURLForQuery(complexSearchQuery.toString());
}

/**
Expand Down
26 changes: 12 additions & 14 deletions src/main/java/org/jabref/logic/importer/fetcher/ArXiv.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import org.jabref.logic.importer.IdBasedFetcher;
import org.jabref.logic.importer.IdFetcher;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.SearchBasedFetcher;
import org.jabref.logic.importer.PagedSearchBasedFetcher;
import org.jabref.logic.util.io.XMLUtil;
import org.jabref.logic.util.strings.StringSimilarity;
import org.jabref.model.entry.BibEntry;
Expand All @@ -31,6 +31,7 @@
import org.jabref.model.entry.identifier.ArXivIdentifier;
import org.jabref.model.entry.identifier.DOI;
import org.jabref.model.entry.types.StandardEntryType;
import org.jabref.model.paging.Page;
import org.jabref.model.strings.StringUtil;
import org.jabref.model.util.OptionalUtil;

Expand All @@ -52,7 +53,7 @@
* <a href="https://github.com/nathangrigg/arxiv2bib">arxiv2bib</a> which is <a href="https://arxiv2bibtex.org/">live</a>
* <a herf="https://gitlab.c3sl.ufpr.br/portalmec/dspace-portalmec/blob/aa209d15082a9870f9daac42c78a35490ce77b52/dspace-api/src/main/java/org/dspace/submit/lookup/ArXivService.java">dspace-portalmec</a>
*/
public class ArXiv implements FulltextFetcher, SearchBasedFetcher, IdBasedFetcher, IdFetcher<ArXivIdentifier> {
public class ArXiv implements FulltextFetcher, PagedSearchBasedFetcher, IdBasedFetcher, IdFetcher<ArXivIdentifier> {

private static final Logger LOGGER = LoggerFactory.getLogger(ArXiv.class);

Expand Down Expand Up @@ -153,8 +154,8 @@ private List<ArXivEntry> searchForEntries(BibEntry entry) throws FetcherExceptio
return Collections.emptyList();
}

private List<ArXivEntry> searchForEntries(String searchQuery) throws FetcherException {
return queryApi(searchQuery, Collections.emptyList(), 0, 10);
private List<ArXivEntry> searchForEntries(String searchQuery, int pageNumber) throws FetcherException {
return queryApi(searchQuery, Collections.emptyList(), getPageSize() * pageNumber, getPageSize());
}

private List<ArXivEntry> queryApi(String searchQuery, List<ArXivIdentifier> ids, int start, int maxResults)
Expand Down Expand Up @@ -248,31 +249,28 @@ public Optional<HelpFile> getHelpPage() {
return Optional.of(HelpFile.FETCHER_OAI2_ARXIV);
}

@Override
public List<BibEntry> performSearch(String query) throws FetcherException {
return searchForEntries(query).stream().map(
(arXivEntry) -> arXivEntry.toBibEntry(importFormatPreferences.getKeywordSeparator()))
.collect(Collectors.toList());
}

/**
* Constructs a complex query string using the field prefixes specified at https://arxiv.org/help/api/user-manual
*
* @param complexSearchQuery the search query defining all fielded search parameters
* @return A list of entries matching the complex query
*/
@Override
public List<BibEntry> performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
public Page<BibEntry> performComplexSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException {
List<String> searchTerms = new ArrayList<>();
complexSearchQuery.getAuthors().forEach(author -> searchTerms.add("au:" + author));
complexSearchQuery.getTitlePhrases().forEach(title -> searchTerms.add("ti:" + title));
complexSearchQuery.getTitlePhrases().forEach(abstr -> searchTerms.add("abs:" + abstr));
complexSearchQuery.getAbstractPhrases().forEach(abstr -> searchTerms.add("abs:" + abstr));
complexSearchQuery.getJournal().ifPresent(journal -> searchTerms.add("jr:" + journal));
// Since ArXiv API does not support year search, we ignore the year related terms
complexSearchQuery.getToYear().ifPresent(year -> searchTerms.add(year.toString()));
searchTerms.addAll(complexSearchQuery.getDefaultFieldPhrases());
String complexQueryString = String.join(" AND ", searchTerms);
return performSearch(complexQueryString);

List<BibEntry> searchResult = searchForEntries(complexQueryString, pageNumber).stream()
.map((arXivEntry) -> arXivEntry.toBibEntry(importFormatPreferences.getKeywordSeparator()))
.collect(Collectors.toList());
return new Page<>(complexQueryString, pageNumber, searchResult);
}

@Override
Expand Down
Loading