Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pagination #279

Merged
merged 32 commits into from
Sep 1, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
75de903
Add workaround for `req_body_json()` and content type
mgirlich Aug 15, 2023
24997de
Add `req_paginate()`
mgirlich Aug 15, 2023
8d44aec
Fix workaround
mgirlich Aug 15, 2023
f327c33
WIP
mgirlich Aug 17, 2023
43f350a
Make `req_paginate()` more lower level
mgirlich Aug 18, 2023
66fd240
Quick documentation
mgirlich Aug 18, 2023
1fe5359
Export `in_query()`, `in_header()`, and `in_body()`
mgirlich Aug 18, 2023
134b27e
Refactor
mgirlich Aug 18, 2023
a940b0b
Change interface to anonymous functions
mgirlich Aug 31, 2023
daf53c6
Fix documentation
mgirlich Aug 31, 2023
76b08b1
Actually check arguments in `check_function2()`
mgirlich Aug 31, 2023
600c9da
Add standalone cli
mgirlich Aug 31, 2023
277a4dc
No need for standalone cli
mgirlich Aug 31, 2023
c2d708d
Add some basic tests
mgirlich Aug 31, 2023
24de724
Remove `calculate_n_pages()`
mgirlich Sep 1, 2023
92b38dd
Improve documentation for `req_paginate()`
mgirlich Sep 1, 2023
ec09637
Rename to `paginate_req_perform()`
mgirlich Sep 1, 2023
61a33ae
Link to `*_req_perform()` from `req_perform()`
mgirlich Sep 1, 2023
aa2d77f
Export `paginate_next_request()`
mgirlich Sep 1, 2023
ba3c937
Check for pagination policy in `paginate_req_perform()`
mgirlich Sep 1, 2023
f0063ed
Simplify `req_paginate_offset()`
mgirlich Sep 1, 2023
01a509e
Store offset in request
mgirlich Sep 1, 2023
39252aa
Fix example for `paginate_req_perform()`
mgirlich Sep 1, 2023
a35ee5e
Rename to `req_paginate_token()`
mgirlich Sep 1, 2023
e5209cf
Kind of support an infinite amount of pages
mgirlich Sep 1, 2023
b3aef39
Add more tests
mgirlich Sep 1, 2023
e54a904
Fix test
mgirlich Sep 1, 2023
1427759
Avoid modern R syntax
mgirlich Sep 1, 2023
ddef0a6
More documentation tweaks
mgirlich Sep 1, 2023
f4b1766
Add pagination to pkgdown yaml
mgirlich Sep 1, 2023
cf1f435
Remove workaround
mgirlich Sep 1, 2023
9d87912
Fix pkgdown
mgirlich Sep 1, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ export("%>%")
export(curl_help)
export(curl_translate)
export(example_url)
export(in_body)
export(in_header)
export(in_query)
export(jwt_claim)
export(jwt_encode_hmac)
export(jwt_encode_sig)
Expand All @@ -41,6 +44,7 @@ export(oauth_flow_refresh)
export(oauth_token)
export(obfuscate)
export(obfuscated)
export(paginate_perform)
export(req_auth_basic)
export(req_auth_bearer_token)
export(req_body_file)
Expand All @@ -61,6 +65,10 @@ export(req_oauth_device)
export(req_oauth_password)
export(req_oauth_refresh)
export(req_options)
export(req_paginate)
export(req_paginate_next_token)
export(req_paginate_next_url)
export(req_paginate_offset)
export(req_perform)
export(req_progress)
export(req_proxy)
Expand Down
336 changes: 336 additions & 0 deletions R/paginate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,336 @@
#' Pagination
#'
mgirlich marked this conversation as resolved.
Show resolved Hide resolved
#' @inheritParams req_perform
#' @param next_request A callback function that takes a two arguments (the
mgirlich marked this conversation as resolved.
Show resolved Hide resolved
#' original request and the response) and returns:
#'
#' * a new [request] to request the next page or
#' * `NULL` if there is no next page.
#' @param page_size A parameter object that specifies how the page size is added
#' to the request.
#' @param total A character that specifies the path where in the body the field
#' with the total number of elements is stored.
#' @param next_url A character that specifies the path where in the body the field
#' with the next url of the next page is stored.
#' @param offset A parameter object that specifies how the offset is added to
#' the request.
#' @param token_field A parameter object that specifies how the next token is
#' added to the request.
#' @param next_token_field A character that specifies the path where in the body
#' the field with the token of the next page is stored.
#'
#' @return A modified HTTP [request].
#' @export
#'
#' @examples
#' request("https://pokeapi.co/api/v2/pokemon") %>%
#' req_paginate_next_url(
#' "next",
#' page_size = in_query("limit", 150L),
#' total = "count"
#' )
req_paginate <- function(req,
next_request,
page_size = NULL,
total = NULL) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably the total interface isn't flexible enough. The response might contain a) the number of elements (what I assume in this implementation) or b) the number of pages.

check_request(req)
check_function(next_request)
check_param(page_size, value = TRUE, allow_null = TRUE)
check_character(total)

req <- req_set_param(req, page_size)

req_policies(
req,
paginate = list(
next_request = next_request,
page_size = page_size$value,
total = total
)
)
}

#' Perform a paginated request
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we document this with req_paginate()?

#'
#' @inheritParams req_perform
#' @param resp An HTTP [response].
#' @param max_pages The maximum number of pages to request.
#' @param progress Display a progress bar?
#'
#' @return A list of responses.
#' @export
#'
#' @examples
#' req_pokemon <- request("https://pokeapi.co/api/v2/pokemon") %>%
#' req_paginate_next_url(
#' "next",
#' page_size = in_query("limit", 150L),
#' total = "count"
#' )
#'
#' responses <- paginate_perform(req_pokemon)
paginate_perform <- function(req,
mgirlich marked this conversation as resolved.
Show resolved Hide resolved
max_pages = 20L,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It probably makes sense to add a callback data that is applied on the response and returns the data to store.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed; but lets do that in the next PR.

progress = TRUE) {
check_request(req)
check_bool(progress)

resp <- req_perform(req)

n_pages <- paginate_n_pages(resp, req, max_pages = max_pages)

out <- vector("list", length = n_pages)
out[[1]] <- resp

cli::cli_progress_bar(
"Paginate",
total = n_pages,
format = "{cli::pb_spin} Page {cli::pb_current}/{cli::pb_total} | ETA: {cli::pb_eta}",
current = 1L
)

for (page in seq2(2, n_pages)) {
req <- paginate_next_request(resp, req)
if (is.null(req)) {
page <- page - 1L
break
}

resp <- req_perform(req)

body_parsed <- resp_body_json(resp)
out[[page]] <- resp

cli::cli_progress_update()
}
cli::cli_progress_done()

# remove unused end of `out` in case the pagination loop exits before all
# `max_pages` is reached
if (page < n_pages) {
out <- out[seq2(1, page)]
}

out
}

#' @rdname paginate_perform
paginate_next_request <- function(resp, req) {
check_response(resp)
check_request(req)

if (!req_policy_exists(req, "paginate")) {
cli_abort(c(
"{.arg req} doesn't have a pagination policy",
i = "You can add pagination via `req_paginate()`."
))
}

next_request <- req$policies$paginate$next_request
next_request(resp = resp, req = req)
}

#' @rdname paginate_perform
paginate_n_pages <- function(resp, req, max_pages) {
check_response(resp)
check_request(req)

page_size <- req$policies$paginate$page_size
total <- req$policies$paginate$total

if (is.null(total) || is.null(page_size)) {
return(max_pages)
}

body_parsed <- resp_body_json(resp)
total <- purrr::pluck(body_parsed, total)
if (is.null(total)) {
return(max_pages)
}
n_pages <- ceiling(total / page_size)

min(n_pages, max_pages)
}

#' @rdname req_paginate
#' @export
req_paginate_next_url <- function(req,
next_url,
...,
page_size = NULL,
total = NULL) {
check_character(next_url)

next_request <- function(req, resp) {
body_parsed <- resp_body_json(resp)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to cache the parsed body. Otherwise the body is usually parsed twice.

next_url <- purrr::pluck(body_parsed, next_url)

if (is.null(next_url)) {
return(NULL)
}

req_url(req, next_url)
}

req_paginate(
req,
next_request,
page_size = page_size,
total = total
)
}

#' @rdname req_paginate
#' @export
req_paginate_offset <- function(req,
offset,
page_size,
total = NULL) {
check_param(offset, value = FALSE, allow_null_value = TRUE)

cur_offset <- 0L
mgirlich marked this conversation as resolved.
Show resolved Hide resolved
env <- current_env()
next_request <- function(resp, req) {
cur_offset <- get("cur_offset", envir = env)
new_offset <- cur_offset + page_size$value
assign("cur_offset", new_offset, envir = env)

req_set_param(req, offset, new_offset)
}

req_paginate(
req,
next_request,
page_size = page_size,
total = total
)
}

#' @rdname req_paginate
#' @export
req_paginate_next_token <- function(req,
token_field,
next_token_field,
page_size = NULL,
total = NULL) {
check_param(token_field, value = FALSE, allow_null_value = TRUE)
check_character(next_token_field)

next_request <- function(req, resp) {
body_parsed <- resp_body_json(resp)
next_token <- purrr::pluck(body_parsed, next_token_field)

if (is.null(next_token)) {
return(NULL)
}

req_set_param(req, token_field, next_token)
}

req_paginate(
req,
next_request,
page_size = page_size,
total = total
)
}

#' Specify a request parameter
#'
#' @param name Name of the query resp. header parameter.
#' @param path Path of the parameter in the body.
#' @param value The value of the parameter.
#' @param error_call
#'
#' @return A parameter object.
#' @export
#'
#' @examples
#' in_query("start", 20)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we need these helpers. It's not too awful to supply an anonymous function if there's an existing response helper:

page_size = \(resp) resp_url_query(resp, "limit", 150L),
next_page = \(resp) resp_link_url(resp, "next")

Then we'd just need something extra in resp_body_json() that let you drill down to a specific component.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(That makes me wonder if resp_link_url() should actually be resp_header_link())

in_query <- function(name,
value = NULL,
error_call = caller_env()) {
check_string(name, call = error_call)
out <- list(value = value, name = name)
class(out) <- c("httr2_query_param", "httr2_param")
out
}

#' @rdname in_query
#' @export
in_header <- function(name,
mgirlich marked this conversation as resolved.
Show resolved Hide resolved
value = NULL,
error_call = caller_env()) {
check_string(name, call = error_call)
out <- list(value = value, name = name)
class(out) <- c("httr2_header_param", "httr2_param")
out
}

#' @rdname in_query
#' @export
in_body <- function(path,
value = NULL,
error_call = caller_env()) {
# TODO check path
out <- list(value = value, path = path)
class(out) <- c("httr2_body_param", "httr2_param")
out
}

check_param <- function(x,
value,
...,
allow_null = FALSE,
arg = caller_arg(x),
call = caller_env()) {
if (!missing(x)) {
if (is_param(x)) {
if (value && is.null(x$value)) {
abort("{.arg value} must not be `NULL`.", call = call)
} else if (!value && !is.null(x$value)) {
abort("{.arg value} must not `NULL`.", call = call)
}
return(invisible(NULL))
}
if (allow_null && is_null(x)) {
return(invisible(NULL))
}
}

stop_input_type(
x,
"an httr2 parameter object",
allow_null = FALSE,
arg = arg,
call = call
)
}

is_param <- function(x) {
inherits(x, "httr2_param")
}

is_query_param <- function(x) {
inherits(x, "httr2_query_param")
}

is_body_param <- function(x) {
inherits(x, "httr2_body_param")
}

is_header_param <- function(x) {
inherits(x, "httr2_header_param")
}

req_set_param <- function(req, x, value = NULL) {
value <- value %||% x$value

if (is_query_param(x)) {
req_url_query(req, "{x$name}" := value)
} else if (is_body_param(x)) {
data <- req$body$data %||% set_names(list())
data <- purrr::assign_in(data, x$path, value)
req_body_json(req, data)
}
}
3 changes: 2 additions & 1 deletion R/req-body.R
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,8 @@ req_body_apply <- function(req) {
} else if (type == "raw") {
req <- req_body_apply_raw(req, data)
} else if (type == "json") {
content_type <- "application/json"
# FIXME temporary workaround just for testing purposes. Remove before merging!
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to fix now?

content_type <- content_type %||% "application/json"
json <- exec(jsonlite::toJSON, data, !!!req$body$params)
req <- req_body_apply_raw(req, json)
} else if (type == "multipart") {
Expand Down
Loading
Loading