Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/enable paginated fetchers #7082

Merged
merged 9 commits into from
Nov 15, 2020
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.jabref.gui.importer.fetcher;

import java.util.Optional;
import java.util.SortedSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand All @@ -23,10 +22,8 @@
import org.jabref.gui.util.BackgroundTask;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.importer.QueryParser;
import org.jabref.logic.importer.SearchBasedFetcher;
import org.jabref.logic.importer.WebFetchers;
import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.logic.l10n.Localization;
import org.jabref.model.strings.StringUtil;
import org.jabref.preferences.JabRefPreferences;
Expand Down Expand Up @@ -109,15 +106,8 @@ public void search() {
SearchBasedFetcher activeFetcher = getSelectedFetcher();

BackgroundTask<ParserResult> task;
QueryParser queryParser = new QueryParser();
Optional<ComplexSearchQuery> generatedQuery = queryParser.parseQueryStringIntoComplexQuery(getQuery());
if (generatedQuery.isPresent()) {
task = BackgroundTask.wrap(() -> new ParserResult(activeFetcher.performComplexSearch(generatedQuery.get())))
.withInitialMessage(Localization.lang("Processing %0", getQuery()));
} else {
task = BackgroundTask.wrap(() -> new ParserResult(activeFetcher.performSearch(getQuery().trim())))
.withInitialMessage(Localization.lang("Processing %0", getQuery()));
}
task = BackgroundTask.wrap(() -> new ParserResult(activeFetcher.performSearch(getQuery().trim())))
.withInitialMessage(Localization.lang("Processing %0", getQuery().trim()));
task.onFailure(dialogService::showErrorDialogAndWait);

ImportEntriesDialog dialog = new ImportEntriesDialog(frame.getCurrentLibraryTab().getBibDatabaseContext(), task);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,52 @@
package org.jabref.logic.importer;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Optional;

import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.paging.Page;

public interface PagedSearchBasedFetcher extends SearchBasedFetcher {

/**
* @param query search query send to endpoint
* @param pageNumber requested site number
* @param complexSearchQuery the complex query defining all fielded search parameters
* @param pageNumber requested site number indexed from 0
* @return Page with search results
*/
Page<BibEntry> performSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException;

/**
* @param complexSearchQuery query string that can be parsed into a complex search query
* @param pageNumber requested site number indexed from 0
* @return Page with search results
*/
Page<BibEntry> performSearchPaged(String query, int pageNumber) throws FetcherException;
default Page<BibEntry> performSearchPaged(String complexSearchQuery, int pageNumber) throws FetcherException {
if (complexSearchQuery.isBlank()) {
return new Page<>(complexSearchQuery, pageNumber, Collections.emptyList());
}
QueryParser queryParser = new QueryParser();
Optional<ComplexSearchQuery> generatedQuery = queryParser.parseQueryStringIntoComplexQuery(complexSearchQuery);
// Otherwise just use query as a default term
return this.performSearchPaged(generatedQuery.orElse(ComplexSearchQuery.builder().defaultFieldPhrase(complexSearchQuery).build()), pageNumber);
}

/**
* @return default pageSize
*/
default int getPageSize() {
return 20;
}

@Override
default List<BibEntry> performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
return new ArrayList<>(performSearchPaged(complexSearchQuery, 0).getContent());
}

@Override
default List<BibEntry> performSearch(String complexSearchQuery) throws FetcherException {
return new ArrayList<>(performSearchPaged(complexSearchQuery, 0).getContent());
}
}
Original file line number Diff line number Diff line change
@@ -1,16 +1,72 @@
package org.jabref.logic.importer;

import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;

import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.paging.Page;

public interface PagedSearchBasedParserFetcher extends SearchBasedParserFetcher, PagedSearchBasedFetcher {

@Override
default Page<BibEntry> performSearchPaged(ComplexSearchQuery complexSearchQuery, int pageNumber) throws FetcherException {
// ADR-0014
URL urlForQuery;
try {
urlForQuery = getComplexQueryURL(complexSearchQuery, pageNumber);
} catch (URISyntaxException | MalformedURLException e) {
throw new FetcherException("Search URI crafted from complex search query is malformed", e);
}
return new Page<>(complexSearchQuery.toString(), pageNumber, getBibEntries(urlForQuery));
}

private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
try (InputStream stream = getUrlDownload(urlForQuery).asInputStream()) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);
fetchedEntries.forEach(this::doPostCleanup);
return fetchedEntries;
} catch (IOException e) {
throw new FetcherException("A network error occurred while fetching from " + urlForQuery, e);
} catch (ParseException e) {
throw new FetcherException("An internal parser error occurred while fetching from " + urlForQuery, e);
}
}

/**
* Constructs a URL based on the query, size and page number.
* @param query the search query
* @param size the size of the page
* @param pageNumber the number of the page
* */
URL getURLForQuery(String query, int size, int pageNumber) throws URISyntaxException, MalformedURLException, FetcherException;
*
* @param query the search query
* @param pageNumber the number of the page indexed from 0
*/
URL getURLForQuery(String query, int pageNumber) throws URISyntaxException, MalformedURLException;

/**
* Constructs a URL based on the query, size and page number.
*
* @param complexSearchQuery the search query
* @param pageNumber the number of the page indexed from 0
*/
default URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery, int pageNumber) throws URISyntaxException, MalformedURLException {
return getURLForQuery(complexSearchQuery.toString(), pageNumber);
}

@Override
default List<BibEntry> performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
return SearchBasedParserFetcher.super.performSearch(complexSearchQuery);
}

@Override
default URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException {
return getURLForQuery(query, 0);
}

@Override
default URL getURLForQuery(ComplexSearchQuery query) throws URISyntaxException, MalformedURLException, FetcherException {
return getComplexQueryURL(query, 0);
}
}
17 changes: 7 additions & 10 deletions src/main/java/org/jabref/logic/importer/QueryParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,23 @@
import org.apache.lucene.search.QueryVisitor;

/**
* This class converts a query string written in lucene syntax into a complex search query.
* This class converts a query string written in lucene syntax into a complex query.
*
* For simplicity this is limited to fielded data and the boolean AND operator.
* For simplicity this is currently limited to fielded data and the boolean AND operator.
*/
public class QueryParser {

/**
* Parses the given query string into a complex query using lucene.
* Note: For unique fields, the alphabetically first instance in the query string is used in the complex query.
* Note: For unique fields, the alphabetically and numerically first instance in the query string is used in the complex query.
*
* @param queryString The given query string
* @param query The given query string
* @return A complex query containing all fields of the query string
* @throws QueryNodeException Error during parsing
*/
public Optional<ComplexSearchQuery> parseQueryStringIntoComplexQuery(String queryString) {
public Optional<ComplexSearchQuery> parseQueryStringIntoComplexQuery(String query) {
try {
ComplexSearchQuery.ComplexSearchQueryBuilder builder = ComplexSearchQuery.builder();

StandardQueryParser parser = new StandardQueryParser();
Query luceneQuery = parser.parse(queryString, "default");
Query luceneQuery = parser.parse(query, "default");
Set<Term> terms = new HashSet<>();
// This implementation collects all terms from the leaves of the query tree independent of the internal boolean structure
// If further capabilities are required in the future the visitor and ComplexSearchQuery has to be adapted accordingly.
Expand All @@ -44,7 +41,7 @@ public Optional<ComplexSearchQuery> parseQueryStringIntoComplexQuery(String quer

List<Term> sortedTerms = new ArrayList<>(terms);
sortedTerms.sort(Comparator.comparing(Term::text).reversed());
return Optional.of(ComplexSearchQuery.fromTerms(terms));
return Optional.of(ComplexSearchQuery.fromTerms(sortedTerms));
} catch (QueryNodeException | IllegalStateException | IllegalArgumentException ex) {
return Optional.empty();
}
Expand Down
23 changes: 15 additions & 8 deletions src/main/java/org/jabref/logic/importer/SearchBasedFetcher.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.jabref.logic.importer;

import java.util.Collections;
import java.util.List;
import java.util.Optional;

import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.model.entry.BibEntry;
Expand All @@ -12,21 +14,26 @@
public interface SearchBasedFetcher extends WebFetcher {

/**
* Looks for hits which are matched by the given free-text query.
* This method is used to send complex queries using fielded search.
*
* @param query search string
* @param complexSearchQuery the complex search query defining all fielded search parameters
* @return a list of {@link BibEntry}, which are matched by the query (may be empty)
*/
List<BibEntry> performSearch(String query) throws FetcherException;
List<BibEntry> performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException;

/**
* This method is used to send complex queries using fielded search.
* Looks for hits which are matched by the given free-text query.
*
* @param complexSearchQuery the search query defining all fielded search parameters
* @param complexSearchQuery query string that can be parsed into a complex search query
* @return a list of {@link BibEntry}, which are matched by the query (may be empty)
*/
default List<BibEntry> performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
// Default implementation behaves as perform search on all fields concatenated as query
return performSearch(complexSearchQuery.toString());
default List<BibEntry> performSearch(String complexSearchQuery) throws FetcherException {
if (complexSearchQuery.isBlank()) {
return Collections.emptyList();
}
QueryParser queryParser = new QueryParser();
Optional<ComplexSearchQuery> generatedQuery = queryParser.parseQueryStringIntoComplexQuery(complexSearchQuery);
// Otherwise just use query as a default term
return this.performSearch(generatedQuery.orElse(ComplexSearchQuery.builder().defaultFieldPhrase(complexSearchQuery).build()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,11 @@
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.Collections;
import java.util.List;

import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.importer.fetcher.ComplexSearchQuery;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.strings.StringUtil;

/**
* Provides a convenient interface for search-based fetcher, which follow the usual three-step procedure:
Expand All @@ -23,34 +21,6 @@
*/
public interface SearchBasedParserFetcher extends SearchBasedFetcher {

/**
* Constructs a URL based on the query.
*
* @param query the search query
*/
URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException;

/**
* Returns the parser used to convert the response to a list of {@link BibEntry}.
*/
Parser getParser();

@Override
default List<BibEntry> performSearch(String query) throws FetcherException {
if (StringUtil.isBlank(query)) {
return Collections.emptyList();
}

// ADR-0014
URL urlForQuery;
try {
urlForQuery = getURLForQuery(query);
} catch (URISyntaxException | MalformedURLException | FetcherException e) {
throw new FetcherException(String.format("Search URI crafted from query %s is malformed", query), e);
}
return getBibEntries(urlForQuery);
}

/**
* This method is used to send queries with advanced URL parameters.
* This method is necessary as the performSearch method does not support certain URL parameters that are used for
Expand All @@ -59,11 +29,11 @@ default List<BibEntry> performSearch(String query) throws FetcherException {
* @param complexSearchQuery the search query defining all fielded search parameters
*/
@Override
default List<BibEntry> performComplexSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
default List<BibEntry> performSearch(ComplexSearchQuery complexSearchQuery) throws FetcherException {
// ADR-0014
URL urlForQuery;
try {
urlForQuery = getComplexQueryURL(complexSearchQuery);
urlForQuery = getURLForQuery(complexSearchQuery);
} catch (URISyntaxException | MalformedURLException | FetcherException e) {
throw new FetcherException("Search URI crafted from complex search query is malformed", e);
}
Expand All @@ -82,12 +52,23 @@ private List<BibEntry> getBibEntries(URL urlForQuery) throws FetcherException {
}
}

default URL getComplexQueryURL(ComplexSearchQuery complexSearchQuery) throws URISyntaxException, MalformedURLException, FetcherException {
// Default implementation behaves as getURLForQuery using the default field phrases as query
List<String> defaultPhrases = complexSearchQuery.getDefaultFieldPhrases();
return this.getURLForQuery(String.join(" ", defaultPhrases));
default URL getURLForQuery(ComplexSearchQuery query) throws URISyntaxException, MalformedURLException, FetcherException {
// Default implementation behaves as getURLForQuery treating complex query as plain string query
return this.getURLForQuery(query.toString());
}

/**
* Returns the parser used to convert the response to a list of {@link BibEntry}.
*/
Parser getParser();

/**
* Constructs a URL based on the query.
*
* @param query the search query
*/
URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException;

/**
* Performs a cleanup of the fetched entry.
* <p>
Expand Down
Loading