diff --git a/DESCRIPTION b/DESCRIPTION index 71264216..b2968141 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: remotes Title: R Package Installation from Remote Repositories, Including 'GitHub' -Version: 2.4.0.9001 +Version: 2.4.0.9002 Authors@R: c( person("Jim", "Hester", , "jim.hester@rstudio.com", role = c("aut", "cre")), person("Gábor", "Csárdi", , "csardi.gabor@gmail.com", role = c("aut")), diff --git a/NAMESPACE b/NAMESPACE index 7e0930a0..d3ed1bc5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -98,6 +98,7 @@ export(system_requirements) export(update_packages) importFrom(stats,update) importFrom(tools,file_ext) +importFrom(utils,URLencode) importFrom(utils,available.packages) importFrom(utils,compareVersion) importFrom(utils,contrib.url) diff --git a/NEWS.md b/NEWS.md index cd26d21e..6cd0ece7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,11 @@ # remotes (development version) +* Using `install_gitlab` will now revert to using `install_git` when API access to the GitLab server is unavailable using the provided credentials. This is especially useful within a GitLab CI job where the created `CI_JOB_TOKEN` environment variable does not provide necessary API access, but is sufficient for cloning the repository using `git` (#608, @dgkf) * pkgbuild is no longer accidentally loaded even in standalone mode (#548) * The internal GitHub token used to increase rate limits has been regenerated. * Using `remote_package_name.git2r_remote` now passes credentials when looking up the package `DESCRIPTION` (#633, @rnorberg) * Using `remote_package_name.git2r_remote` and `remote_package_name.xgit_remote`, http responses returning an invalid `DESCRIPTION` or that redirect to another page will now fallback to return `NA` instead of throwing an error when trying to parse the unexpected content (#628, @dgkf). + * Fix regex that breaks git protocol in `git_remote` (@niheaven #630). * Clarify `github_pull()` documentation (@ms609 #640). diff --git a/R/git.R b/R/git.R index 153ad5d1..7b3f7d9e 100644 --- a/R/git.R +++ b/R/git.R @@ -26,10 +26,12 @@ git_extract_sha1_tar <- function(bundle) { } } -git <- function(args, quiet = TRUE, path = ".") { +git <- function(args, quiet = TRUE, path = ".", display_args = args) { full <- paste0(shQuote(check_git_path()), " ", paste(args, collapse = "")) + display_full <- paste0(shQuote(check_git_path()), " ", paste(display_args, collapse = "")) + if (!quiet) { - message(full) + message(display_full) } result <- in_dir(path, system(full, intern = TRUE, ignore.stderr = quiet)) diff --git a/R/install-git.R b/R/install-git.R index fb5d2de8..894c345f 100644 --- a/R/install-git.R +++ b/R/install-git.R @@ -79,12 +79,55 @@ git_remote <- function(url, subdir = NULL, ref = NULL, credentials = git_credent stop("`credentials` can only be used with `git = \"git2r\"`", call. = FALSE) } - meta <- re_match(url, "(?(?:git@)?[^@]*)(?:@(?.*))?") + meta <- parse_git_url(url) + url <- paste0(meta$prot, meta$auth, meta$url) ref <- ref %||% (if (meta$ref == "") NULL else meta$ref) - list(git2r = git_remote_git2r, external = git_remote_xgit)[[git]](meta$url, subdir, ref, credentials) + list(git2r = git_remote_git2r, external = git_remote_xgit)[[git]](url, subdir, ref, credentials) } +#' Extract URL parts from a git-style url +#' +#' Although not a full url parser, this expression captures and separates url +#' protocol (`prot`), full authentication prefix (`auth`, containing `username` +#' and `password`), the host and path (`url`) and git reference (`ref`). +#' +#' @param url A `character` vector of urls to parse +#' +parse_git_url <- function(url) { + re_match(url, paste0( + "(?.*://)?(?(?[^:@/]*)(?::(?[^@/]*)?)?@)?", + "(?[^@]*)", + "(?:@(?.*))?" + )) +} + +#' Anonymize a git-style url +#' +#' Strip a url of user-specific username and password if embedded as part of a +#' url string. +#' +#' @inheritParams parse_git_url +#' +git_anon_url <- function(url) { + meta <- parse_git_url(url) + paste0(meta$prot, meta$url) +} + +#' Censor user password in a git-style url +#' +#' If a password is provided as part of a url string, censor the url string, +#' replacing the password with a series of asterisks. +#' +#' @inheritParams parse_git_url +#' +git_censored_url <- function(url) { + meta <- parse_git_url(url) + auth <- meta$username + auth <- ifelse(nzchar(meta$password), paste0(auth, ":", strrep("*", 8L)), auth) + auth <- ifelse(nzchar(auth), paste0(auth, "@"), auth) + paste0(meta$prot, auth, meta$url) +} git_remote_git2r <- function(url, subdir = NULL, ref = NULL, credentials = git_credentials()) { remote("git2r", @@ -107,7 +150,7 @@ git_remote_xgit <- function(url, subdir = NULL, ref = NULL, credentials = git_cr #' @export remote_download.git2r_remote <- function(x, quiet = FALSE) { if (!quiet) { - message("Downloading git repo ", x$url) + message("Downloading git repo ", git_anon_url(x$url)) } bundle <- tempfile() @@ -132,7 +175,7 @@ remote_metadata.git2r_remote <- function(x, bundle = NULL, source = NULL, sha = list( RemoteType = "git2r", - RemoteUrl = x$url, + RemoteUrl = git_anon_url(x$url), RemoteSubdir = x$subdir, RemoteRef = x$ref, RemoteSha = sha @@ -255,14 +298,18 @@ format.git2r_remote <- function(x, ...) { #' @export remote_download.xgit_remote <- function(x, quiet = FALSE) { if (!quiet) { - message("Downloading git repo ", x$url) + message("Downloading git repo ", git_anon_url(x$url)) } bundle <- tempfile() - args <- c("clone", "--depth", "1", "--no-hardlinks") - args <- c(args, x$args, x$url, bundle) - git(paste0(args, collapse = " "), quiet = quiet) + args <- c("clone", "--depth", "1", "--no-hardlinks", x$args) + display_args <- c(args, git_censored_url(x$url), bundle) + display_args <- paste0(display_args, collapse = " ") + args <- c(args, x$url, bundle) + args <- paste0(args, collapse = " ") + + git(args, quiet = quiet, display_args = display_args) if (!is.null(x$ref)) { git(paste0(c("fetch", "origin", x$ref), collapse = " "), quiet = quiet, path = bundle) @@ -280,7 +327,7 @@ remote_metadata.xgit_remote <- function(x, bundle = NULL, source = NULL, sha = N list( RemoteType = "xgit", - RemoteUrl = x$url, + RemoteUrl = git_anon_url(x$url), RemoteSubdir = x$subdir, RemoteRef = x$ref, RemoteSha = sha, diff --git a/R/install-gitlab.R b/R/install-gitlab.R index 4ed97820..a7617571 100644 --- a/R/install-gitlab.R +++ b/R/install-gitlab.R @@ -18,7 +18,14 @@ #' supply to this argument. This is safer than using a password because you #' can easily delete a PAT without affecting any others. Defaults to the #' GITLAB_PAT environment variable. -#' @inheritParams install_github +#' @param git_fallback A `logical` value indicating whether to defer to using +#' a `git` remote if the GitLab api is inaccessible. This can be a helpful +#' mitigating measure when an access token does not have the necessary scopes +#' for accessing the GitLab api, but still provides access for git +#' authentication. Defaults to the value of option +#' `"remotes.gitlab_git_fallback"`, or `TRUE` if the option is not set. +#' @inheritParams install_git +#' #' @export #' @family package installation #' @examples @@ -37,9 +44,19 @@ install_gitlab <- function(repo, build_manual = FALSE, build_vignettes = FALSE, repos = getOption("repos"), type = getOption("pkgType"), - ...) { + ..., + git_fallback = getOption("remotes.gitlab_git_fallback", TRUE), + credentials = git_credentials()) { - remotes <- lapply(repo, gitlab_remote, subdir = subdir, auth_token = auth_token, host = host) + remotes <- lapply( + repo, + gitlab_remote, + subdir = subdir, + auth_token = auth_token, + host = host, + git_fallback = git_fallback, + credentials = credentials + ) install_remotes(remotes, auth_token = auth_token, host = host, dependencies = dependencies, @@ -56,20 +73,103 @@ install_gitlab <- function(repo, } gitlab_remote <- function(repo, subdir = NULL, - auth_token = gitlab_pat(), sha = NULL, - host = "gitlab.com", ...) { + auth_token = gitlab_pat(quiet), sha = NULL, + host = "gitlab.com", ..., + git_fallback = getOption("remotes.gitlab_git_fallback", TRUE), + quiet = FALSE) { meta <- parse_git_repo(repo) meta$ref <- meta$ref %||% "HEAD" - remote("gitlab", - host = host, - repo = paste(c(meta$repo, meta$subdir), collapse = "/"), + # use project id api request as a canary for api access using auth_token. + repo <- paste0(c(meta$repo, meta$subdir), collapse = "/") + project_id <- try(silent = TRUE, { + gitlab_project_id(meta$username, repo, meta$ref, host, auth_token) + }) + + has_access_token <- !is.null(auth_token) && nchar(auth_token) > 0L + if (inherits(project_id, "try-error") && isTRUE(git_fallback)) { + if (has_access_token && !quiet) { + message(wrap(exdent = 2L, paste0("auth_token does not have scopes ", + "'read-repository' and 'api' for host '", host, "' required to ", + "install using gitlab_remote."))) + } else if (!quiet) { + message(wrap(exdent = 2L, paste0("Unable to establish api access for ", + "host '", host, "' required to install using gitlab_remote."))) + } + + gitlab_to_git_remote( + repo = paste0(c(meta$username, repo), collapse = "/"), + subdir = subdir, + auth_token = auth_token, + ref = sha %||% meta$ref, + host = host, + quiet = quiet, + ... + ) + } else { + remote("gitlab", + host = host, + repo = repo, + subdir = subdir, + username = meta$username, + ref = meta$ref, + sha = sha, + auth_token = auth_token + ) + } +} + +#' @importFrom utils URLencode +gitlab_to_git_remote <- function(repo, subdir = NULL, + auth_token = gitlab_pat(quiet), ref = NULL, + host = "gitlab.com", ..., + git_fallback = getOption("remotes.gitlab_git_fallback", TRUE), + credentials = NULL, + quiet = FALSE) { + + # for basic http auth, required names are largely undocumented: + # - in GitLab CI using job account, username must be "gitlab-ci-token" + # - for Project Access Tokens, username must be "" + # - for Personal Access Tokens, username is ignored + # + # choose to use "gitlab-ci-token" for most general default behavior + # https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html + + url <- paste0(build_url(host, repo), ".git") + url_has_embedded_token <- grepl("^(.*://)?[^@/]+@", url) + has_access_token <- !is.null(auth_token) && nchar(auth_token) > 0L + has_credentials <- !is.null(credentials) + use_git2r <- !is_standalone() && pkg_installed("git2r") + + if (url_has_embedded_token || has_credentials) { + if (!quiet) + message(wrap(exdent = 2L, paste0("Attempting git_remote"))) + } else if (has_access_token && !has_credentials && use_git2r) { + if (!quiet) + message(wrap(exdent = 2L, paste0("Attempting git_remote using ", + "credentials: username='gitlab-ci-token', password="))) + + credentials <- getExportedValue("git2r", "cred_user_pass")( + username = "gitlab-ci-token", + password = auth_token + ) + } else if (has_access_token && !has_credentials && !use_git2r) { + url_protocol <- gsub("((.*)://)?.*", "\\1", url) + url_path <- gsub("((.*)://)?", "", url) + url <- paste0(url_protocol, "gitlab-ci-token:", utils::URLencode(auth_token), "@", url_path) + + if (!quiet) + message(wrap(exdent = 2L, paste0("Attempting git_remote using ", + sprintf("url=%sgitlab-ci-token:@%s", url_protocol, url_path)))) + } + + git_remote( + url = url, subdir = subdir, - username = meta$username, - ref = meta$ref, - sha = sha, - auth_token = auth_token + ref = ref, + credentials = credentials, + ... ) } diff --git a/R/utils.R b/R/utils.R index a4efa1d3..e866fd44 100644 --- a/R/utils.R +++ b/R/utils.R @@ -517,3 +517,8 @@ raw_to_char_utf8 <- function(x) { Encoding(res) <- "UTF-8" res } + +wrap <- function(x, ..., simplify = FALSE) { + lines <- unlist(strwrap(unlist(strsplit(x, "\n")), ..., simplify = simplify)) + paste(lines, collapse = "\n") +} diff --git a/inst/install-github.R b/inst/install-github.R index 56d986a2..7fa1187f 100644 --- a/inst/install-github.R +++ b/inst/install-github.R @@ -1627,10 +1627,12 @@ function(...) { } } - git <- function(args, quiet = TRUE, path = ".") { + git <- function(args, quiet = TRUE, path = ".", display_args = args) { full <- paste0(shQuote(check_git_path()), " ", paste(args, collapse = "")) + display_full <- paste0(shQuote(check_git_path()), " ", paste(display_args, collapse = "")) + if (!quiet) { - message(full) + message(display_full) } result <- in_dir(path, system(full, intern = TRUE, ignore.stderr = quiet)) @@ -2664,12 +2666,55 @@ function(...) { stop("`credentials` can only be used with `git = \"git2r\"`", call. = FALSE) } - meta <- re_match(url, "(?(?:git@)?[^@]*)(?:@(?.*))?") + meta <- parse_git_url(url) + url <- paste0(meta$prot, meta$auth, meta$url) ref <- ref %||% (if (meta$ref == "") NULL else meta$ref) - list(git2r = git_remote_git2r, external = git_remote_xgit)[[git]](meta$url, subdir, ref, credentials) + list(git2r = git_remote_git2r, external = git_remote_xgit)[[git]](url, subdir, ref, credentials) } + #' Extract URL parts from a git-style url + #' + #' Although not a full url parser, this expression captures and separates url + #' protocol (`prot`), full authentication prefix (`auth`, containing `username` + #' and `password`), the host and path (`url`) and git reference (`ref`). + #' + #' @param url A `character` vector of urls to parse + #' + parse_git_url <- function(url) { + re_match(url, paste0( + "(?.*://)?(?(?[^:@/]*)(?::(?[^@/]*)?)?@)?", + "(?[^@]*)", + "(?:@(?.*))?" + )) + } + + #' Anonymize a git-style url + #' + #' Strip a url of user-specific username and password if embedded as part of a + #' url string. + #' + #' @inheritParams parse_git_url + #' + git_anon_url <- function(url) { + meta <- parse_git_url(url) + paste0(meta$prot, meta$url) + } + + #' Censor user password in a git-style url + #' + #' If a password is provided as part of a url string, censor the url string, + #' replacing the password with a series of asterisks. + #' + #' @inheritParams parse_git_url + #' + git_censored_url <- function(url) { + meta <- parse_git_url(url) + auth <- meta$username + auth <- ifelse(nzchar(meta$password), paste0(auth, ":", strrep("*", 8L)), auth) + auth <- ifelse(nzchar(auth), paste0(auth, "@"), auth) + paste0(meta$prot, auth, meta$url) + } git_remote_git2r <- function(url, subdir = NULL, ref = NULL, credentials = git_credentials()) { remote("git2r", @@ -2692,7 +2737,7 @@ function(...) { #' @export remote_download.git2r_remote <- function(x, quiet = FALSE) { if (!quiet) { - message("Downloading git repo ", x$url) + message("Downloading git repo ", git_anon_url(x$url)) } bundle <- tempfile() @@ -2717,7 +2762,7 @@ function(...) { list( RemoteType = "git2r", - RemoteUrl = x$url, + RemoteUrl = git_anon_url(x$url), RemoteSubdir = x$subdir, RemoteRef = x$ref, RemoteSha = sha @@ -2840,14 +2885,18 @@ function(...) { #' @export remote_download.xgit_remote <- function(x, quiet = FALSE) { if (!quiet) { - message("Downloading git repo ", x$url) + message("Downloading git repo ", git_anon_url(x$url)) } bundle <- tempfile() - args <- c("clone", "--depth", "1", "--no-hardlinks") - args <- c(args, x$args, x$url, bundle) - git(paste0(args, collapse = " "), quiet = quiet) + args <- c("clone", "--depth", "1", "--no-hardlinks", x$args) + display_args <- c(args, git_censored_url(x$url), bundle) + display_args <- paste0(display_args, collapse = " ") + args <- c(args, x$url, bundle) + args <- paste0(args, collapse = " ") + + git(args, quiet = quiet, display_args = display_args) if (!is.null(x$ref)) { git(paste0(c("fetch", "origin", x$ref), collapse = " "), quiet = quiet, path = bundle) @@ -2865,7 +2914,7 @@ function(...) { list( RemoteType = "xgit", - RemoteUrl = x$url, + RemoteUrl = git_anon_url(x$url), RemoteSubdir = x$subdir, RemoteRef = x$ref, RemoteSha = sha, @@ -3203,7 +3252,14 @@ function(...) { #' supply to this argument. This is safer than using a password because you #' can easily delete a PAT without affecting any others. Defaults to the #' GITLAB_PAT environment variable. - #' @inheritParams install_github + #' @param git_fallback A `logical` value indicating whether to defer to using + #' a `git` remote if the GitLab api is inaccessible. This can be a helpful + #' mitigating measure when an access token does not have the necessary scopes + #' for accessing the GitLab api, but still provides access for git + #' authentication. Defaults to the value of option + #' `"remotes.gitlab_git_fallback"`, or `TRUE` if the option is not set. + #' @inheritParams install_git + #' #' @export #' @family package installation #' @examples @@ -3222,9 +3278,19 @@ function(...) { build_manual = FALSE, build_vignettes = FALSE, repos = getOption("repos"), type = getOption("pkgType"), - ...) { + ..., + git_fallback = getOption("remotes.gitlab_git_fallback", TRUE), + credentials = git_credentials()) { - remotes <- lapply(repo, gitlab_remote, subdir = subdir, auth_token = auth_token, host = host) + remotes <- lapply( + repo, + gitlab_remote, + subdir = subdir, + auth_token = auth_token, + host = host, + git_fallback = git_fallback, + credentials = credentials + ) install_remotes(remotes, auth_token = auth_token, host = host, dependencies = dependencies, @@ -3241,20 +3307,103 @@ function(...) { } gitlab_remote <- function(repo, subdir = NULL, - auth_token = gitlab_pat(), sha = NULL, - host = "gitlab.com", ...) { + auth_token = gitlab_pat(quiet), sha = NULL, + host = "gitlab.com", ..., + git_fallback = getOption("remotes.gitlab_git_fallback", TRUE), + quiet = FALSE) { meta <- parse_git_repo(repo) meta$ref <- meta$ref %||% "HEAD" - remote("gitlab", - host = host, - repo = paste(c(meta$repo, meta$subdir), collapse = "/"), + # use project id api request as a canary for api access using auth_token. + repo <- paste0(c(meta$repo, meta$subdir), collapse = "/") + project_id <- try(silent = TRUE, { + gitlab_project_id(meta$username, repo, meta$ref, host, auth_token) + }) + + has_access_token <- !is.null(auth_token) && nchar(auth_token) > 0L + if (inherits(project_id, "try-error") && isTRUE(git_fallback)) { + if (has_access_token && !quiet) { + message(wrap(exdent = 2L, paste0("auth_token does not have scopes ", + "'read-repository' and 'api' for host '", host, "' required to ", + "install using gitlab_remote."))) + } else if (!quiet) { + message(wrap(exdent = 2L, paste0("Unable to establish api access for ", + "host '", host, "' required to install using gitlab_remote."))) + } + + gitlab_to_git_remote( + repo = paste0(c(meta$username, repo), collapse = "/"), + subdir = subdir, + auth_token = auth_token, + ref = sha %||% meta$ref, + host = host, + quiet = quiet, + ... + ) + } else { + remote("gitlab", + host = host, + repo = repo, + subdir = subdir, + username = meta$username, + ref = meta$ref, + sha = sha, + auth_token = auth_token + ) + } + } + + #' @importFrom utils URLencode + gitlab_to_git_remote <- function(repo, subdir = NULL, + auth_token = gitlab_pat(quiet), ref = NULL, + host = "gitlab.com", ..., + git_fallback = getOption("remotes.gitlab_git_fallback", TRUE), + credentials = NULL, + quiet = FALSE) { + + # for basic http auth, required names are largely undocumented: + # - in GitLab CI using job account, username must be "gitlab-ci-token" + # - for Project Access Tokens, username must be "" + # - for Personal Access Tokens, username is ignored + # + # choose to use "gitlab-ci-token" for most general default behavior + # https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html + + url <- paste0(build_url(host, repo), ".git") + url_has_embedded_token <- grepl("^(.*://)?[^@/]+@", url) + has_access_token <- !is.null(auth_token) && nchar(auth_token) > 0L + has_credentials <- !is.null(credentials) + use_git2r <- !is_standalone() && pkg_installed("git2r") + + if (url_has_embedded_token || has_credentials) { + if (!quiet) + message(wrap(exdent = 2L, paste0("Attempting git_remote"))) + } else if (has_access_token && !has_credentials && use_git2r) { + if (!quiet) + message(wrap(exdent = 2L, paste0("Attempting git_remote using ", + "credentials: username='gitlab-ci-token', password="))) + + credentials <- getExportedValue("git2r", "cred_user_pass")( + username = "gitlab-ci-token", + password = auth_token + ) + } else if (has_access_token && !has_credentials && !use_git2r) { + url_protocol <- gsub("((.*)://)?.*", "\\1", url) + url_path <- gsub("((.*)://)?", "", url) + url <- paste0(url_protocol, "gitlab-ci-token:", utils::URLencode(auth_token), "@", url_path) + + if (!quiet) + message(wrap(exdent = 2L, paste0("Attempting git_remote using ", + sprintf("url=%sgitlab-ci-token:@%s", url_protocol, url_path)))) + } + + git_remote( + url = url, subdir = subdir, - username = meta$username, - ref = meta$ref, - sha = sha, - auth_token = auth_token + ref = ref, + credentials = credentials, + ... ) } @@ -5674,6 +5823,11 @@ function(...) { Encoding(res) <- "UTF-8" res } + + wrap <- function(x, ..., simplify = FALSE) { + lines <- unlist(strwrap(unlist(strsplit(x, "\n")), ..., simplify = simplify)) + paste(lines, collapse = "\n") + } ## Standalone mode, make sure that we restore the env var on exit diff --git a/install-github.R b/install-github.R index 56d986a2..7fa1187f 100644 --- a/install-github.R +++ b/install-github.R @@ -1627,10 +1627,12 @@ function(...) { } } - git <- function(args, quiet = TRUE, path = ".") { + git <- function(args, quiet = TRUE, path = ".", display_args = args) { full <- paste0(shQuote(check_git_path()), " ", paste(args, collapse = "")) + display_full <- paste0(shQuote(check_git_path()), " ", paste(display_args, collapse = "")) + if (!quiet) { - message(full) + message(display_full) } result <- in_dir(path, system(full, intern = TRUE, ignore.stderr = quiet)) @@ -2664,12 +2666,55 @@ function(...) { stop("`credentials` can only be used with `git = \"git2r\"`", call. = FALSE) } - meta <- re_match(url, "(?(?:git@)?[^@]*)(?:@(?.*))?") + meta <- parse_git_url(url) + url <- paste0(meta$prot, meta$auth, meta$url) ref <- ref %||% (if (meta$ref == "") NULL else meta$ref) - list(git2r = git_remote_git2r, external = git_remote_xgit)[[git]](meta$url, subdir, ref, credentials) + list(git2r = git_remote_git2r, external = git_remote_xgit)[[git]](url, subdir, ref, credentials) } + #' Extract URL parts from a git-style url + #' + #' Although not a full url parser, this expression captures and separates url + #' protocol (`prot`), full authentication prefix (`auth`, containing `username` + #' and `password`), the host and path (`url`) and git reference (`ref`). + #' + #' @param url A `character` vector of urls to parse + #' + parse_git_url <- function(url) { + re_match(url, paste0( + "(?.*://)?(?(?[^:@/]*)(?::(?[^@/]*)?)?@)?", + "(?[^@]*)", + "(?:@(?.*))?" + )) + } + + #' Anonymize a git-style url + #' + #' Strip a url of user-specific username and password if embedded as part of a + #' url string. + #' + #' @inheritParams parse_git_url + #' + git_anon_url <- function(url) { + meta <- parse_git_url(url) + paste0(meta$prot, meta$url) + } + + #' Censor user password in a git-style url + #' + #' If a password is provided as part of a url string, censor the url string, + #' replacing the password with a series of asterisks. + #' + #' @inheritParams parse_git_url + #' + git_censored_url <- function(url) { + meta <- parse_git_url(url) + auth <- meta$username + auth <- ifelse(nzchar(meta$password), paste0(auth, ":", strrep("*", 8L)), auth) + auth <- ifelse(nzchar(auth), paste0(auth, "@"), auth) + paste0(meta$prot, auth, meta$url) + } git_remote_git2r <- function(url, subdir = NULL, ref = NULL, credentials = git_credentials()) { remote("git2r", @@ -2692,7 +2737,7 @@ function(...) { #' @export remote_download.git2r_remote <- function(x, quiet = FALSE) { if (!quiet) { - message("Downloading git repo ", x$url) + message("Downloading git repo ", git_anon_url(x$url)) } bundle <- tempfile() @@ -2717,7 +2762,7 @@ function(...) { list( RemoteType = "git2r", - RemoteUrl = x$url, + RemoteUrl = git_anon_url(x$url), RemoteSubdir = x$subdir, RemoteRef = x$ref, RemoteSha = sha @@ -2840,14 +2885,18 @@ function(...) { #' @export remote_download.xgit_remote <- function(x, quiet = FALSE) { if (!quiet) { - message("Downloading git repo ", x$url) + message("Downloading git repo ", git_anon_url(x$url)) } bundle <- tempfile() - args <- c("clone", "--depth", "1", "--no-hardlinks") - args <- c(args, x$args, x$url, bundle) - git(paste0(args, collapse = " "), quiet = quiet) + args <- c("clone", "--depth", "1", "--no-hardlinks", x$args) + display_args <- c(args, git_censored_url(x$url), bundle) + display_args <- paste0(display_args, collapse = " ") + args <- c(args, x$url, bundle) + args <- paste0(args, collapse = " ") + + git(args, quiet = quiet, display_args = display_args) if (!is.null(x$ref)) { git(paste0(c("fetch", "origin", x$ref), collapse = " "), quiet = quiet, path = bundle) @@ -2865,7 +2914,7 @@ function(...) { list( RemoteType = "xgit", - RemoteUrl = x$url, + RemoteUrl = git_anon_url(x$url), RemoteSubdir = x$subdir, RemoteRef = x$ref, RemoteSha = sha, @@ -3203,7 +3252,14 @@ function(...) { #' supply to this argument. This is safer than using a password because you #' can easily delete a PAT without affecting any others. Defaults to the #' GITLAB_PAT environment variable. - #' @inheritParams install_github + #' @param git_fallback A `logical` value indicating whether to defer to using + #' a `git` remote if the GitLab api is inaccessible. This can be a helpful + #' mitigating measure when an access token does not have the necessary scopes + #' for accessing the GitLab api, but still provides access for git + #' authentication. Defaults to the value of option + #' `"remotes.gitlab_git_fallback"`, or `TRUE` if the option is not set. + #' @inheritParams install_git + #' #' @export #' @family package installation #' @examples @@ -3222,9 +3278,19 @@ function(...) { build_manual = FALSE, build_vignettes = FALSE, repos = getOption("repos"), type = getOption("pkgType"), - ...) { + ..., + git_fallback = getOption("remotes.gitlab_git_fallback", TRUE), + credentials = git_credentials()) { - remotes <- lapply(repo, gitlab_remote, subdir = subdir, auth_token = auth_token, host = host) + remotes <- lapply( + repo, + gitlab_remote, + subdir = subdir, + auth_token = auth_token, + host = host, + git_fallback = git_fallback, + credentials = credentials + ) install_remotes(remotes, auth_token = auth_token, host = host, dependencies = dependencies, @@ -3241,20 +3307,103 @@ function(...) { } gitlab_remote <- function(repo, subdir = NULL, - auth_token = gitlab_pat(), sha = NULL, - host = "gitlab.com", ...) { + auth_token = gitlab_pat(quiet), sha = NULL, + host = "gitlab.com", ..., + git_fallback = getOption("remotes.gitlab_git_fallback", TRUE), + quiet = FALSE) { meta <- parse_git_repo(repo) meta$ref <- meta$ref %||% "HEAD" - remote("gitlab", - host = host, - repo = paste(c(meta$repo, meta$subdir), collapse = "/"), + # use project id api request as a canary for api access using auth_token. + repo <- paste0(c(meta$repo, meta$subdir), collapse = "/") + project_id <- try(silent = TRUE, { + gitlab_project_id(meta$username, repo, meta$ref, host, auth_token) + }) + + has_access_token <- !is.null(auth_token) && nchar(auth_token) > 0L + if (inherits(project_id, "try-error") && isTRUE(git_fallback)) { + if (has_access_token && !quiet) { + message(wrap(exdent = 2L, paste0("auth_token does not have scopes ", + "'read-repository' and 'api' for host '", host, "' required to ", + "install using gitlab_remote."))) + } else if (!quiet) { + message(wrap(exdent = 2L, paste0("Unable to establish api access for ", + "host '", host, "' required to install using gitlab_remote."))) + } + + gitlab_to_git_remote( + repo = paste0(c(meta$username, repo), collapse = "/"), + subdir = subdir, + auth_token = auth_token, + ref = sha %||% meta$ref, + host = host, + quiet = quiet, + ... + ) + } else { + remote("gitlab", + host = host, + repo = repo, + subdir = subdir, + username = meta$username, + ref = meta$ref, + sha = sha, + auth_token = auth_token + ) + } + } + + #' @importFrom utils URLencode + gitlab_to_git_remote <- function(repo, subdir = NULL, + auth_token = gitlab_pat(quiet), ref = NULL, + host = "gitlab.com", ..., + git_fallback = getOption("remotes.gitlab_git_fallback", TRUE), + credentials = NULL, + quiet = FALSE) { + + # for basic http auth, required names are largely undocumented: + # - in GitLab CI using job account, username must be "gitlab-ci-token" + # - for Project Access Tokens, username must be "" + # - for Personal Access Tokens, username is ignored + # + # choose to use "gitlab-ci-token" for most general default behavior + # https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html + + url <- paste0(build_url(host, repo), ".git") + url_has_embedded_token <- grepl("^(.*://)?[^@/]+@", url) + has_access_token <- !is.null(auth_token) && nchar(auth_token) > 0L + has_credentials <- !is.null(credentials) + use_git2r <- !is_standalone() && pkg_installed("git2r") + + if (url_has_embedded_token || has_credentials) { + if (!quiet) + message(wrap(exdent = 2L, paste0("Attempting git_remote"))) + } else if (has_access_token && !has_credentials && use_git2r) { + if (!quiet) + message(wrap(exdent = 2L, paste0("Attempting git_remote using ", + "credentials: username='gitlab-ci-token', password="))) + + credentials <- getExportedValue("git2r", "cred_user_pass")( + username = "gitlab-ci-token", + password = auth_token + ) + } else if (has_access_token && !has_credentials && !use_git2r) { + url_protocol <- gsub("((.*)://)?.*", "\\1", url) + url_path <- gsub("((.*)://)?", "", url) + url <- paste0(url_protocol, "gitlab-ci-token:", utils::URLencode(auth_token), "@", url_path) + + if (!quiet) + message(wrap(exdent = 2L, paste0("Attempting git_remote using ", + sprintf("url=%sgitlab-ci-token:@%s", url_protocol, url_path)))) + } + + git_remote( + url = url, subdir = subdir, - username = meta$username, - ref = meta$ref, - sha = sha, - auth_token = auth_token + ref = ref, + credentials = credentials, + ... ) } @@ -5674,6 +5823,11 @@ function(...) { Encoding(res) <- "UTF-8" res } + + wrap <- function(x, ..., simplify = FALSE) { + lines <- unlist(strwrap(unlist(strsplit(x, "\n")), ..., simplify = simplify)) + paste(lines, collapse = "\n") + } ## Standalone mode, make sure that we restore the env var on exit diff --git a/man/git_anon_url.Rd b/man/git_anon_url.Rd new file mode 100644 index 00000000..c71d8d40 --- /dev/null +++ b/man/git_anon_url.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/install-git.R +\name{git_anon_url} +\alias{git_anon_url} +\title{Anonymize a git-style url} +\usage{ +git_anon_url(url) +} +\arguments{ +\item{url}{A \code{character} vector of urls to parse} +} +\description{ +Strip a url of user-specific username and password if embedded as part of a +url string. +} diff --git a/man/git_censored_url.Rd b/man/git_censored_url.Rd new file mode 100644 index 00000000..50a42b78 --- /dev/null +++ b/man/git_censored_url.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/install-git.R +\name{git_censored_url} +\alias{git_censored_url} +\title{Censor user password in a git-style url} +\usage{ +git_censored_url(url) +} +\arguments{ +\item{url}{A \code{character} vector of urls to parse} +} +\description{ +If a password is provided as part of a url string, censor the url string, +replacing the password with a series of asterisks. +} diff --git a/man/install_gitlab.Rd b/man/install_gitlab.Rd index 2f463f9e..4d762824 100644 --- a/man/install_gitlab.Rd +++ b/man/install_gitlab.Rd @@ -19,7 +19,9 @@ install_gitlab( build_vignettes = FALSE, repos = getOption("repos"), type = getOption("pkgType"), - ... + ..., + git_fallback = getOption("remotes.gitlab_git_fallback", TRUE), + credentials = git_credentials() ) } \arguments{ @@ -83,6 +85,16 @@ since the previous install.} \item{type}{Type of package to \code{update}.} \item{...}{Other arguments passed on to \code{\link[utils:install.packages]{utils::install.packages()}}.} + +\item{git_fallback}{A \code{logical} value indicating whether to defer to using +a \code{git} remote if the GitLab api is inaccessible. This can be a helpful +mitigating measure when an access token does not have the necessary scopes +for accessing the GitLab api, but still provides access for git +authentication. Defaults to the value of option +\code{"remotes.gitlab_git_fallback"}, or \code{TRUE} if the option is not set.} + +\item{credentials}{A git2r credentials object passed through to clone. +Supplying this argument implies using \code{git2r} with \code{git}.} } \description{ This function is vectorised on \code{repo} so you can install multiple diff --git a/man/parse_git_url.Rd b/man/parse_git_url.Rd new file mode 100644 index 00000000..1b388cb5 --- /dev/null +++ b/man/parse_git_url.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/install-git.R +\name{parse_git_url} +\alias{parse_git_url} +\title{Extract URL parts from a git-style url} +\usage{ +parse_git_url(url) +} +\arguments{ +\item{url}{A \code{character} vector of urls to parse} +} +\description{ +Although not a full url parser, this expression captures and separates url +protocol (\code{prot}), full authentication prefix (\code{auth}, containing \code{username} +and \code{password}), the host and path (\code{url}) and git reference (\code{ref}). +} diff --git a/tests/testthat/test-git.R b/tests/testthat/test-git.R index e7c35ce7..92925177 100644 --- a/tests/testthat/test-git.R +++ b/tests/testthat/test-git.R @@ -69,3 +69,104 @@ test_that("check_git_path", { "Git does not seem to be installed on your system" ) }) + + +test_that("git urls are properly parsed, anonymized and censored", { + prot <- "http://" + username <- "janedoe" + password <- "12345" + asterisks <- strrep("*", 8L) + url <- "www.gitzone.com/namespace/repo.git" + ref <- "HEAD" + + df <- expand.grid( + prot = c("", prot), + username = c("", username), + password = c("", password), + url = url, + ref = c("", ref), + stringsAsFactors = FALSE + ) + + # filter invalid urls with password but no username + df <- df[!(!nchar(df$username) & nchar(df$password)),] + + # format url components and build permuted urls + df$auth <- with(df, paste0( + username, + ifelse(nzchar(password), paste0(":", password), ""), + ifelse(nzchar(username), "@", "") + )) + df$ref_str <- with(df, ifelse(nzchar(ref), paste0("@", ref), "")) + df$full_url <- with(df, paste0(prot, auth, url, ref_str)) + + for (i in seq_len(nrow(df))) { + meta <- parse_git_url(df[i,"full_url"]) + expect_equal(meta$prot, df$prot[i]) + expect_equal(meta$auth, df$auth[i]) + expect_equal(meta$username, df$username[i]) + expect_equal(meta$password, df$password[i]) + expect_equal(meta$url, df$url[i]) + expect_equal(meta$ref, df$ref[i]) + } + + expect_true(!any(grepl(password, git_anon_url(df$full_url)))) + expect_true(!any(grepl(paste0(username, "|", password), git_anon_url(df$full_url)))) + + expect_equal(git_anon_url(url), url) + expect_equal(git_anon_url(i <- paste0(prot, url)), i) + expect_equal(git_anon_url(paste0(prot, url, "@", ref)), paste0(prot, url)) + expect_equal(git_anon_url(paste0(url, "@", ref)), url) + expect_equal(git_anon_url(paste0(username, "@", url, "@", ref)), url) + expect_equal(git_anon_url(paste0(prot, username, "@", url, "@", ref)), paste0(prot, url)) + expect_equal(git_anon_url(paste0(username, ":", password, "@", url)), url) + expect_equal(git_anon_url(paste0(username, ":", password, "@", url, "@", ref)), url) + expect_equal(git_anon_url(paste0(prot, username, ":", password, "@", url, "@", ref)), paste0(prot, url)) + + expect_true(!any(grepl(password, git_censored_url(df$full_url)))) + expect_equal(git_censored_url(df$full_url), gsub(password, asterisks, paste0(df$prot, df$auth, df$url))) + + expect_equal(git_censored_url(url), url) + expect_equal(git_censored_url(i <- paste0(prot, url)), i) + expect_equal(git_censored_url(paste0(prot, url, "@", ref)), paste0(prot, url)) + expect_equal(git_censored_url(paste0(url, "@", ref)), url) + expect_equal( + git_censored_url(paste0(username, "@", url, "@", ref)), + paste0(username, "@", url)) + expect_equal( + git_censored_url(paste0(prot, username, "@", url, "@", ref)), + paste0(prot, username, "@", url)) + expect_equal( + git_censored_url(paste0(username, ":", password, "@", url)), + paste0(username, ":", asterisks, "@", url)) + expect_equal( + git_censored_url(paste0(username, ":", password, "@", url, "@", ref)), + paste0(username, ":", asterisks, "@", url)) + expect_equal( + git_censored_url(paste0(prot, username, ":", password, "@", url, "@", ref)), + paste0(prot, username, ":", asterisks, "@", url)) +}) + + +test_that("parse_git_url handles ssh-style repo urls", { + username <- "git" + url <- "gitzone.com:namespace/repo.git" + git_url <- paste0(username, "@", url) + ref <- "HEAD" + + meta <- parse_git_url(git_url) + expect_equal(meta$prot, "") + expect_equal(meta$auth, "git@") + expect_equal(meta$username, "git") + expect_equal(meta$password, "") + expect_equal(meta$url, url) + expect_equal(meta$ref, "") + + meta <- parse_git_url(paste0(git_url, "@", ref)) + expect_equal(meta$prot, "") + expect_equal(meta$auth, paste0(username, "@")) + expect_equal(meta$username, username) + expect_equal(meta$password, "") + expect_equal(meta$url, url) + expect_equal(meta$ref, ref) +}) diff --git a/tests/testthat/test-install-gitlab.R b/tests/testthat/test-install-gitlab.R index 661cd821..3d85d40f 100644 --- a/tests/testthat/test-install-gitlab.R +++ b/tests/testthat/test-install-gitlab.R @@ -1,9 +1,9 @@ context("Install from GitLab") test_that("install_gitlab", { - skip_on_cran() skip_if_offline() + withr::local_envvar(c(GITLAB_PAT="")) Sys.unsetenv("R_TESTS") @@ -31,9 +31,9 @@ test_that("install_gitlab", { }) test_that("install_gitlab with subgroups and special characters", { - skip_on_cran() skip_if_offline() + withr::local_envvar(c(GITLAB_PAT="")) Sys.unsetenv("R_TESTS") @@ -67,9 +67,9 @@ test_that("install_gitlab with subgroups and special characters", { }) test_that("error if not username, warning if given as argument", { - skip_on_cran() skip_if_offline() + withr::local_envvar(c(GITLAB_PAT="")) Sys.unsetenv("R_TESTS") @@ -85,6 +85,7 @@ test_that("error if not username, warning if given as argument", { test_that("remote_download.gitlab_remote messages", { skip_on_cran() skip_if_offline() + withr::local_envvar(c(GITLAB_PAT="")) mockery::stub(remote_download.gitlab_remote, "download", TRUE) expect_message( @@ -101,9 +102,9 @@ test_that("remote_download.gitlab_remote messages", { }) test_that("remote_sha.gitlab_remote", { - skip_on_cran() skip_if_offline() + withr::local_envvar(c(GITLAB_PAT="")) expect_equal( remote_sha( @@ -132,9 +133,9 @@ test_that("remote_sha.gitlab_remote", { }) test_that("gitlab_project_id", { - skip_on_cran() skip_if_offline() + withr::local_envvar(c(GITLAB_PAT="")) expect_equal( gitlab_project_id( @@ -148,3 +149,95 @@ test_that("gitlab_project_id", { }) +test_that("gitlab_remote reverts to git2r_remote when git_fallback with git2r", { + skip_if_not_installed("git2r") # needed for credential creation + withr::local_envvar(c(GITLAB_PAT="badcafe")) + + # assume git2r available + stubbed_gitlab_to_git_remote <- gitlab_to_git_remote + stubbed_git_remote <- git_remote + mockery::stub(stubbed_gitlab_to_git_remote, "pkg_installed", TRUE) + mockery::stub(stubbed_git_remote, "pkg_installed", TRUE) + mockery::stub(stubbed_gitlab_to_git_remote, "git_remote", stubbed_git_remote) + mockery::stub(gitlab_remote, "gitlab_to_git_remote", stubbed_gitlab_to_git_remote) + + expect_s3_class( + expect_message( + gitlab_remote("fakenamespace/namespace/repo", git_fallback = FALSE), + "GITLAB_PAT" + ), + "gitlab_remote" + ) + + expect_message({ + r <- gitlab_remote( + "fakenamespace/namespace/repo", + git_fallback = TRUE + ) + }, "auth_token does not") + expect_s3_class(r, "git2r_remote") + expect_equal(r$credentials$username, "gitlab-ci-token") + expect_equal(r$credentials$password, "badcafe") + + withr::local_envvar(c(GITLAB_PAT="")) + expect_message({ + r <- gitlab_remote( + "fakenamespace/namespace/repo", + git_fallback = TRUE + ) + }, "Unable to establish api access") + expect_s3_class(r, "git2r_remote") + expect_equal(r$credentials, NULL) + + r <- gitlab_remote("fakenamespace/namespace/repo", git_fallback = FALSE) + expect_s3_class(r, "gitlab_remote") +}) + +test_that("gitlab_remote reverts to xgit_remote when git_fallback and no git2r", { + withr::local_envvar(c(GITLAB_PAT="")) + + # assume git2r unavailable + stubbed_gitlab_to_git_remote <- gitlab_to_git_remote + stubbed_git_remote <- git_remote + mockery::stub(stubbed_gitlab_to_git_remote, "pkg_installed", FALSE) + mockery::stub(stubbed_git_remote, "pkg_installed", FALSE) + mockery::stub(stubbed_gitlab_to_git_remote, "git_remote", stubbed_git_remote) + mockery::stub(gitlab_remote, "gitlab_to_git_remote", stubbed_gitlab_to_git_remote) + + expect_s3_class( + expect_silent(gitlab_remote("fakenamespace/namespace/repo", git_fallback = FALSE)), + "gitlab_remote" + ) + + expect_message({ + r <- gitlab_remote( + "fakenamespace/namespace/repo", + git_fallback = TRUE + ) + }, "Unable to establish api access") + expect_s3_class(r, "xgit_remote") + expect_equal(r$url, "https://gitlab.com/fakenamespace/namespace/repo.git") + + withr::local_envvar(c(GITLAB_PAT="badcafe")) + + expect_message({ + r <- gitlab_remote( + "fakenamespace/namespace/repo", + git_fallback = TRUE + ) + }, "auth_token does not") + expect_s3_class(r, "xgit_remote") + expect_equal(r$url, "https://gitlab-ci-token:badcafe@gitlab.com/fakenamespace/namespace/repo.git") + + expect_message({ + r <- gitlab_remote( + "fakenamespace/namespace/repo", + auth_token = "goodcafe", + host = "github.com", + git_fallback = TRUE + ) + }, "Attempting git_remote.*@") + expect_s3_class(r, "xgit_remote") + expect_equal(r$url, "https://gitlab-ci-token:goodcafe@github.com/fakenamespace/namespace/repo.git") +}) +