diff --git a/easy_entrez/api.py b/easy_entrez/api.py index 4151cb3..a6de818 100644 --- a/easy_entrez/api.py +++ b/easy_entrez/api.py @@ -142,13 +142,17 @@ def _request(self, query: EntrezQuery, custom_payload=None) -> EntrezResponse: @uses_query(SearchQuery) def search( self, term: Union[str, dict], max_results: int, - database: EntrezDatabase = 'pubmed', min_date=None, max_date=None + database: EntrezDatabase = 'pubmed', min_date=None, max_date=None, + ignore_max_results_limit: bool = False ): if isinstance(term, dict): term = _match_all(**term) assert not min_date and not max_date # TODO - query = SearchQuery(term=term, max_results=max_results, database=database) + query = SearchQuery( + term=term, max_results=max_results, database=database, + ignore_max_results_limit=ignore_max_results_limit + ) return self._request(query=query) def in_batches_of(self, size: int = 100, sleep_interval: int = 3): @@ -161,20 +165,27 @@ def in_batches_of(self, size: int = 100, sleep_interval: int = 3): @uses_query(SummaryQuery) def summarize( self, ids: List[str], max_results: int, - database: EntrezDatabase = 'pubmed' + database: EntrezDatabase = 'pubmed', ignore_max_results_limit: bool = False ): self._ensure_list_like(ids) - query = SummaryQuery(ids=ids, max_results=max_results, database=database) + query = SummaryQuery( + ids=ids, max_results=max_results, database=database, + ignore_max_results_limit=ignore_max_results_limit + ) return self._request(query=query) @supports_batches @uses_query(FetchQuery) def fetch( self, ids: List[str], max_results: int, - database: EntrezDatabase = 'pubmed', return_type: ReturnType = 'xml' + database: EntrezDatabase = 'pubmed', return_type: ReturnType = 'xml', + ignore_max_results_limit: bool = False ): self._ensure_list_like(ids) - query = FetchQuery(ids=ids, max_results=max_results, database=database, return_type=return_type) + query = FetchQuery( + ids=ids, max_results=max_results, database=database, + return_type=return_type, ignore_max_results_limit=ignore_max_results_limit + ) return self._request(query=query) @supports_batches diff --git a/easy_entrez/queries.py b/easy_entrez/queries.py index 0a3a269..b2cadba 100644 --- a/easy_entrez/queries.py +++ b/easy_entrez/queries.py @@ -82,16 +82,22 @@ class SearchQuery(EntrezQuery): database: Database to search. Value must be a valid E-utility database name (default = :py:obj:`'pubmed'`). term: Entrez text query - max_results: maximal number of results to return + max_results: Maximal number of results to return. Limited to 10'000, following + the eUtils documentation. + ignore_max_results_limit: Ignore the upper limit placed on max_results. + Experimentation has shown that some databases allow for higher limits, but + as this is not documented, setting higher limits needs to be explicitly + enabled here. Use at your own risk of hard to predict errors. """ endpoint = 'esearch' term: str max_results: int + ignore_max_results_limit: bool = False def validate(self): super().validate() - if self.max_results > 100_000: - raise ValueError('Fetching more than 100,000 results is not implemented') + if self.max_results > 10_000 and not self.ignore_max_results_limit: + raise ValueError('Fetching more than 10,000 results is not implemented') def to_params(self) -> Dict[str, str]: params = super().to_params() @@ -124,16 +130,22 @@ class SummaryQuery(EntrezQuery): There is no set maximum for the number of UIDs that can be passed to ESummary. To comply with the recommendation of using HTTP POST method if lists of UIDs for ESummary is long, the method is by default set to `post`. - max_results: maximal number of results to return + max_results: Maximal number of results to return. Limited to 10'000, following + the eUtils documentation. + ignore_max_results_limit: Ignore the upper limit placed on max_results. + Experimentation has shown that some databases allow for higher limits, but + as this is not documented, setting higher limits needs to be explicitly + enabled here. Use at your own risk of hard to predict errors. """ endpoint = 'esummary' method = 'post' ids: List[Identifier] max_results: int + ignore_max_results_limit: bool = False def validate(self): super().validate() - if self.max_results > 10_000: + if self.max_results > 10_000 and not self.ignore_max_results_limit: raise ValueError('Fetching more than 10,000 results is not implemented') def to_params(self) -> Dict[str, str]: