Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add built-in pager to simplify paging #2

Merged
merged 2 commits into from
Dec 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 10 additions & 44 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -193,57 +193,23 @@ PyAlex, although cursor paging seems to be easier to implement and less error-pr

##### Basic paging

See limitations of [basic paging](https://docs.openalex.org/api#basic-paging) in the OpenAlex documentation.

```python
from pyalex import Authors

# example query
query = Authors().search_filter(display_name="einstein")

# set the page
page = 1

# store the results
results = []

# loop till page is None
while page is not None:

# get the results
r, m = query.get(return_meta=True, per_page=200, page=page)

# results
results.append(r)

page = m["page"] + 1 if page is not None else None
```
See limitations of [basic paging]
(https://docs.openalex.org/api#basic-paging) in the OpenAlex documentation.
It's relatively easy to implement basic paging with PyAlex, however it is
advised to use the built-in pager based on cursor paging.

##### Cursor paging

Use `paginate()` for paging results. By default, `paginate`s argument `n_max`
is set to 10000. Use `None` to retrieve all results.

```python
from pyalex import Authors

# example query
query = Authors().search_filter(display_name="einstein")

# set the next_cursor (to *)
next_cursor = "*"

# store the results
results = []

# loop till next_cursor is None
while next_cursor is not None:

# get the results
r, m = query.get(return_meta=True, per_page=200, cursor=next_cursor)

# results
results.extend(r)
pager = Authors().search_filter(display_name="einstein").paginate(per_page=200)

# set the next cursor
next_cursor = m["next_cursor"]
for page in pager:
print(len(page))
```

### Get N-grams
Expand Down
36 changes: 36 additions & 0 deletions pyalex/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,38 @@ class Concept(OpenAlexEntity):
pass


class CursorPaginator(object):
def __init__(self, alex_class=None, per_page=None, cursor="*", n_max=None):

self.alex_class = alex_class
self.per_page = per_page
self.cursor = cursor
self.n_max = n_max

def __iter__(self):

self.n = 0

return self

def __next__(self):

if self.n_max and self.n >= self.n_max:
raise StopIteration

r, m = self.alex_class.get(
return_meta=True, per_page=self.per_page, cursor=self.cursor
)

if m["next_cursor"] is None:
raise StopIteration

self.n = self.n + len(r)
self.cursor = m["next_cursor"]

return r


class BaseOpenAlex(object):

"""Base class for OpenAlex objects."""
Expand Down Expand Up @@ -153,6 +185,10 @@ def get(self, return_meta=False, page=None, per_page=None, cursor=None):
else:
return results

def paginate(self, per_page=None, cursor="*", n_max=10000):

return CursorPaginator(self, per_page=per_page, cursor=cursor, n_max=n_max)

def random(self):

return self.__getitem__("random")
Expand Down
70 changes: 67 additions & 3 deletions tests/test_pyalex.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def test_search_filter():
assert r["meta"]["count"] == m["count"]


def test_cursor_paging():
def test_cursor_by_hand():

# example query
query = Authors().search_filter(display_name="einstein")
Expand All @@ -170,8 +170,6 @@ def test_cursor_paging():
# loop till next_cursor is None
while next_cursor is not None:

print(next_cursor)

# get the results
r, m = query.get(return_meta=True, per_page=200, cursor=next_cursor)

Expand All @@ -184,6 +182,72 @@ def test_cursor_paging():
assert len(results) > 200


def test_basic_paging():

# example query
query = Authors().search_filter(display_name="einstein")

# set the page
page = 1

# store the results
results = []

# loop till page is None
while page is not None:

# get the results
r, m = query.get(return_meta=True, per_page=200, page=page)

# results
results.extend(r)
page = None if len(r) == 0 else m["page"] + 1

assert len(results) > 200


def test_cursor_paging():

# example query
pager = Authors().search_filter(display_name="einstein").paginate(per_page=200)

for page in pager:

assert len(page) >= 1 and len(page) <= 200


def test_cursor_paging_n_max():

# example query
pager = (
Authors()
.search_filter(display_name="einstein")
.paginate(per_page=200, n_max=400)
)

n = 0
for page in pager:

n = n + len(page)

assert n == 400


def test_cursor_paging_n_max_none():

# example query
pager = (
Authors()
.search_filter(display_name="einstein")
.paginate(per_page=200, n_max=None)
)

n = 0
for page in pager:

n = n + len(page)


def test_referenced_works():

# the work to extract the referenced works of
Expand Down