Skip to content

Commit

Permalink
Add built-in pager to simplify paging (#2)
Browse files Browse the repository at this point in the history
  • Loading branch information
J535D165 authored Dec 8, 2022
1 parent 1b32de0 commit e508f2d
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 47 deletions.
54 changes: 10 additions & 44 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,57 +195,23 @@ PyAlex, although cursor paging seems to be easier to implement and less error-pr

##### Basic paging

See limitations of [basic paging](https://docs.openalex.org/api#basic-paging) in the OpenAlex documentation.

```python
from pyalex import Authors

# example query
query = Authors().search_filter(display_name="einstein")

# set the page
page = 1

# store the results
results = []

# loop till page is None
while page is not None:

# get the results
r, m = query.get(return_meta=True, per_page=200, page=page)

# results
results.append(r)

page = m["page"] + 1 if page is not None else None
```
See limitations of [basic paging]
(https://docs.openalex.org/api#basic-paging) in the OpenAlex documentation.
It's relatively easy to implement basic paging with PyAlex, however it is
advised to use the built-in pager based on cursor paging.

##### Cursor paging

Use `paginate()` for paging results. By default, `paginate`s argument `n_max`
is set to 10000. Use `None` to retrieve all results.

```python
from pyalex import Authors

# example query
query = Authors().search_filter(display_name="einstein")

# set the next_cursor (to *)
next_cursor = "*"

# store the results
results = []

# loop till next_cursor is None
while next_cursor is not None:

# get the results
r, m = query.get(return_meta=True, per_page=200, cursor=next_cursor)

# results
results.extend(r)
pager = Authors().search_filter(display_name="einstein").paginate(per_page=200)

# set the next cursor
next_cursor = m["next_cursor"]
for page in pager:
print(len(page))
```

### Get N-grams
Expand Down
36 changes: 36 additions & 0 deletions pyalex/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,38 @@ class Concept(OpenAlexEntity):
pass


class CursorPaginator(object):
def __init__(self, alex_class=None, per_page=None, cursor="*", n_max=None):

self.alex_class = alex_class
self.per_page = per_page
self.cursor = cursor
self.n_max = n_max

def __iter__(self):

self.n = 0

return self

def __next__(self):

if self.n_max and self.n >= self.n_max:
raise StopIteration

r, m = self.alex_class.get(
return_meta=True, per_page=self.per_page, cursor=self.cursor
)

if m["next_cursor"] is None:
raise StopIteration

self.n = self.n + len(r)
self.cursor = m["next_cursor"]

return r


class BaseOpenAlex(object):

"""Base class for OpenAlex objects."""
Expand Down Expand Up @@ -153,6 +185,10 @@ def get(self, return_meta=False, page=None, per_page=None, cursor=None):
else:
return results

def paginate(self, per_page=None, cursor="*", n_max=10000):

return CursorPaginator(self, per_page=per_page, cursor=cursor, n_max=n_max)

def random(self):

return self.__getitem__("random")
Expand Down
70 changes: 67 additions & 3 deletions tests/test_pyalex.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def test_search_filter():
assert r["meta"]["count"] == m["count"]


def test_cursor_paging():
def test_cursor_by_hand():

# example query
query = Authors().search_filter(display_name="einstein")
Expand All @@ -170,8 +170,6 @@ def test_cursor_paging():
# loop till next_cursor is None
while next_cursor is not None:

print(next_cursor)

# get the results
r, m = query.get(return_meta=True, per_page=200, cursor=next_cursor)

Expand All @@ -184,6 +182,72 @@ def test_cursor_paging():
assert len(results) > 200


def test_basic_paging():

# example query
query = Authors().search_filter(display_name="einstein")

# set the page
page = 1

# store the results
results = []

# loop till page is None
while page is not None:

# get the results
r, m = query.get(return_meta=True, per_page=200, page=page)

# results
results.extend(r)
page = None if len(r) == 0 else m["page"] + 1

assert len(results) > 200


def test_cursor_paging():

# example query
pager = Authors().search_filter(display_name="einstein").paginate(per_page=200)

for page in pager:

assert len(page) >= 1 and len(page) <= 200


def test_cursor_paging_n_max():

# example query
pager = (
Authors()
.search_filter(display_name="einstein")
.paginate(per_page=200, n_max=400)
)

n = 0
for page in pager:

n = n + len(page)

assert n == 400


def test_cursor_paging_n_max_none():

# example query
pager = (
Authors()
.search_filter(display_name="einstein")
.paginate(per_page=200, n_max=None)
)

n = 0
for page in pager:

n = n + len(page)


def test_referenced_works():

# the work to extract the referenced works of
Expand Down

0 comments on commit e508f2d

Please sign in to comment.