Skip to content

Commit

Permalink
Merge pull request #34 from chainsawriot/ref
Browse files Browse the repository at this point in the history
fix #33
  • Loading branch information
chainsawriot authored Feb 12, 2023
2 parents 417c7ae + 3605d74 commit cf240cf
Show file tree
Hide file tree
Showing 25 changed files with 507 additions and 257 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: gran
Title: Reconstructing Reproducible R Computational Environments With Ease
Version: 0.0.4
Version: 0.0.5
Authors@R:
c(person("Chung-hong", "Chan", , "chainsawtiney@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0000-0002-6232-7530")),
Expand Down
139 changes: 105 additions & 34 deletions R/installation.R
Original file line number Diff line number Diff line change
@@ -1,49 +1,78 @@
##granlist <- readRDS("tests/testdata/graph.RDS")

.safe_get_uid <- function(pkgref, uid) {
if (is.null(uid$get(pkgref))) {
return(NA_character_)
} else {
return(uid$get(pkgref))
}
}

.determine_installation_order <- function(granlist) {
dep <- fastmap::fastmap()
version <- fastmap::fastmap()
uid <- fastmap::fastmap()
## package name as per DESCRIPTION, aka. x
## can't use x here because it is too generic
pkgname <- fastmap::fastmap()
for (gran in granlist$grans) {
current_pkg <- unique(gran$original$x)
current_pkgref <- unique(gran$original$x_pkgref)
current_ver <- unique(gran$original$x_version)
current_dep <- .keep_queryable_dependencies(dep_df = gran$original, no_enhances = gran$no_enhances, no_suggests = gran$no_suggests)
dep$set(current_pkg, current_dep)
version$set(current_pkg, current_ver)
current_pkgname <- unique(gran$original$x)
dep$set(current_pkgref, current_dep)
version$set(current_pkgref, current_ver)
pkgname$set(current_pkgref, current_pkgname)
if ("x_uid" %in% colnames(gran$original)) {
uid$set(current_pkgref, unique(gran$original$x_uid))
}
for (dep_df in gran$deps) {
current_pkg <- unique(dep_df$x)
current_pkgref <- unique(dep_df$x_pkgref)
current_ver <- unique(dep_df$x_version)
current_dep <- .keep_queryable_dependencies(dep_df = dep_df, no_enhances = gran$no_enhances, no_suggests = gran$no_suggests)
dep$set(current_pkg, current_dep)
version$set(current_pkg, current_ver)
current_pkgname <- unique(dep_df$x)
dep$set(current_pkgref, current_dep)
version$set(current_pkgref, current_ver)
pkgname$set(current_pkgref, current_pkgname)
if ("x_uid" %in% colnames(dep_df)) { ## Not supported, but no harm to add it now
uid$set(current_pkgref, unique(dep_df$x_uid))
}
}
}
## installation simulation
installed_packages <- c()
noncran_packages <- c()
needed_packages <- dep$keys()
installed_pkgrefs <- c()
noncran_pkgrefs <- c()
needed_pkgrefs <- dep$keys()
## install all terminal nodes
for (package in needed_packages) {
if(.is_github(package)){
noncran_packages <- c(noncran_packages,package)
for (pkgref in needed_pkgrefs) {
if(.is_github(pkgref)){
noncran_pkgrefs <- c(noncran_pkgrefs, pkgref)
next()
}
if (is.null(dep$get(package))) {
installed_packages <- c(installed_packages, package)
if (is.null(dep$get(pkgref))) {
installed_pkgrefs <- c(installed_pkgrefs, pkgref)
}
}
while(length(setdiff(needed_packages, c(installed_packages,noncran_packages))) != 0) {
for (package in needed_packages) {
while(length(setdiff(needed_pkgrefs, c(installed_pkgrefs, noncran_pkgrefs))) != 0) {
for (pkgref in needed_pkgrefs) {
##print(package)
if (!package %in% installed_packages & !package%in% noncran_packages) {
if (!pkgref %in% installed_pkgrefs & !pkgref %in% noncran_pkgrefs) {
## check requirement
requirement_fulfilled <- length(setdiff(dep$get(package), installed_packages)) == 0
requirement_fulfilled <- length(setdiff(dep$get(pkgref), installed_pkgrefs)) == 0
if (requirement_fulfilled) {
installed_packages <- c(installed_packages, package)
installed_pkgrefs <- c(installed_pkgrefs, pkgref)
}
}
}
}
vapply(c(installed_packages,noncran_packages), function(x) version$get(x), character(1))
ordered_pkgrefs <- c(installed_pkgrefs, noncran_pkgrefs)
ordered_x <- vapply(ordered_pkgrefs, function(x) pkgname$get(x), character(1), USE.NAMES = FALSE)
ordered_version <- vapply(ordered_pkgrefs, function(x) version$get(x), character(1), USE.NAMES = FALSE)
ordered_source <- vapply(ordered_pkgrefs, function(x) .parse_pkgref(x, return_handle = FALSE), character(1), USE.NAMES = FALSE)
ordered_handle <- vapply(ordered_pkgrefs, function(x) .parse_pkgref(x, return_handle = TRUE), character(1), USE.NAMES = FALSE)
ordered_uid <- vapply(ordered_pkgrefs, .safe_get_uid, character(1), uid = uid, USE.NAMES = FALSE)
data.frame(x = ordered_x, version = ordered_version, source = ordered_source, handle = ordered_handle,
uid = ordered_uid)
}

## .install_from_cran <- function(x, lib, path = tempdir()) {
Expand Down Expand Up @@ -80,7 +109,7 @@
debs[debs == "libcurl4-openssl-dev"] <- "libcurl4-gnutls-dev"
}
}
cmd <- paste("apt-get install -y", paste(debs, collapse = " "))
cmd <- paste("apt-get install -y", paste(sort(debs), collapse = " "))
if ("default-jdk" %in% debs) {
cmd <- paste(cmd, "liblzma-dev libpcre3-dev libbz2-dev && R CMD javareconf")
}
Expand All @@ -96,9 +125,12 @@
prefix <- ""
cmd <- .group_apt_cmds(cmds, fix_libgit2 = TRUE)
} else {
update_index <- which("apt-get update" == granlist$deps_sysreqs)
cmds <- granlist$deps_sysreqs[(update_index + 1):length(granlist$deps_sysreqs)]
prefix <- paste0(paste(granlist$deps_sysreqs[1:update_index], collapse = " && "), " && ")
cmds <- setdiff(granlist$deps_sysreqs, c("apt-get install -y software-properties-common", "apt-get update"))
ppa_lines <- c("apt-get install -y software-properties-common",
grep("^add-apt-repository", granlist$deps_sysreqs, value = TRUE),
"apt-get update")
cmds <- setdiff(granlist$deps_sysreqs, ppa_lines)
prefix <- paste0(paste0(ppa_lines, collapse = " && "), " && ")
cmd <- .group_apt_cmds(cmds, fix_libgit2 = FALSE)
}
paste0("apt-get update -qq && ", prefix, cmd)
Expand Down Expand Up @@ -152,30 +184,69 @@
}
}

.cache_r_package <- function(x, cache_dir, cran_mirror, verbose) {
url <- paste(cran_mirror, "src/contrib/Archive/", names(x), "/", names(x), "_", x, ".tar.gz", sep = "")
tarball_path <- file.path(cache_dir, paste(names(x), "_", x, ".tar.gz", sep = ""))
.cache_cran <- function(x, version, cache_dir, cran_mirror, verbose) {
url <- paste(cran_mirror, "src/contrib/Archive/", x, "/", x, "_", version, ".tar.gz", sep = "")
tarball_path <- file.path(cache_dir, paste(x, "_", version, ".tar.gz", sep = ""))
tryCatch({
suppressWarnings(utils::download.file(url, destfile = tarball_path, quiet = !verbose))
}, error = function(e) {
## is the current latest
url <- paste(cran_mirror, "src/contrib/", names(x), "_", x, ".tar.gz", sep = "")
url <- paste(cran_mirror, "src/contrib/", x, "_", version, ".tar.gz", sep = "")
utils::download.file(url, destfile = tarball_path, quiet = !verbose)
})
if (!file.exists(tarball_path)) {
warning(names(x), "(", x,") can't be cache.")
}
}

.cache_cran <- function(granlist, output_dir, cran_mirror, verbose) {
.gen_temp_dir <- function() {
file.path(tempdir(), paste(sample(c(LETTERS, letters), 20, replace = TRUE), collapse = ""))
}

.cache_github <- function(x, version, handle, source, uid, cache_dir, verbose) {
sha <- uid
short_sha <- substr(sha, 1, 7)
dest_zip <- tempfile(fileext = ".zip")
tmp_dir <- .gen_temp_dir()
## unlike inside the container, we use zip here because it is less buggy
utils::download.file(paste("https://api.github.com/repos/", handle, "/zipball/", sha, sep = ""), destfile = dest_zip,
quiet = !verbose)
utils::unzip(dest_zip, exdir = tmp_dir)
dlist <- list.dirs(path = tmp_dir, recursive = FALSE)
pkg_dir <- dlist[grepl(short_sha, dlist)]
if (length(pkg_dir) != 1) {
stop(paste0("couldn't uniquely locate the unzipped package source in ", tmp_dir))
}
res <- system(command = paste("R", "CMD", "build", pkg_dir), intern = TRUE)
expected_tarball_path <- paste(x, "_", version, ".tar.gz", sep = "")
if (!file.exists(expected_tarball_path)) {
stop("Cannot locate the built tarball.")
}
file.rename(from = expected_tarball_path, to = file.path(cache_dir, expected_tarball_path))
unlink(expected_tarball_path)
}

.cache_pkgs <- function(granlist, output_dir, cran_mirror, verbose) {
install_order <- .determine_installation_order(granlist)
cache_dir <- file.path(output_dir, "cache")
if (!dir.exists(cache_dir)) {
dir.create(cache_dir)
}
for (i in seq_along(install_order)) {
.cache_r_package(x = install_order[i], cache_dir = cache_dir,
cran_mirror = cran_mirror, verbose = verbose)
for (i in seq(from = 1, to = nrow(install_order), by = 1)) {
x <- install_order$x[i]
source <- install_order$source[i]
version <- install_order$version[i]
handle <- install_order$handle[i]
uid <- install_order$uid[i]
if (source == "cran") {
.cache_cran(x = x, version = version, cache_dir = cache_dir,
cran_mirror = cran_mirror, verbose = verbose)
}
if (source == "github") {
.cache_github(x = x, version = version, handle = handle,
source = source, uid = uid,
cache_dir = cache_dir, verbose = verbose)
}
}
## For #14, cache R source in the future here
invisible(output_dir)
Expand Down Expand Up @@ -287,7 +358,7 @@ export_granlist <- function(granlist, path, granlist_as_comment = TRUE, verbose
#' @param materials_dir character, path to the directiry containing dditional resources (e.g. analysis scripts) to be copied into `output_dir` and in turn into the Docker container
#' @param image character, which versioned Rocker image to use. Can only be "r-ver", "rstudio", "tidyverse", "verse", "geospatial"
#' This applies only to R version <= 3.1
#' @param cache logical, whether to cache the content from CRAN now. Please note that the system requirements are not cached
#' @param cache logical, whether to cache the packages now. Please note that the system requirements are not cached. For query with non-CRAN packages, this option is strongly recommended.
#' @param ... arguments to be passed to `dockerize`
#' @return `output_dir`, invisibly
#' @inheritParams export_granlist
Expand Down Expand Up @@ -330,7 +401,7 @@ dockerize <- function(granlist, output_dir, materials_dir = NULL, image = c("r-v
verbose = verbose, lib = lib, cran_mirror = cran_mirror,
check_cran_mirror = check_cran_mirror)
if (isTRUE(cache)) {
.cache_cran(granlist, output_dir, cran_mirror, verbose)
.cache_pkgs(granlist, output_dir, cran_mirror, verbose)
}
if (utils::compareVersion(granlist$r_version, "3.1") == -1) {
file.copy(system.file("compile_r.sh", package = "gran"), file.path(output_dir, "compile_r.sh"),
Expand Down
103 changes: 103 additions & 0 deletions R/pkgref.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
## we follow:
## https://r-lib.github.io/pkgdepends/reference/pkg_refs.html

## syntax of a ref: source::handle
## source can be: "cran", "github" (as of now)
## a `handle` indicates how the `package` is sourced from the `source`:
## if source == "cran", handle <- package name as per DESCRIPTION, e.g. "rtoot"
## if source == "github", handle <- username/reponame, e.g. "schochastics/rtoot"

## Similar to pak::pak()
## `pkgs` parameter of resolve() can either be shorthands (e.g. "rtoot", "schochastics/rtoot")
## or pkgrefs (e.g. "cran::rtoot", "github::schochastics/rtoot")

## For `dep_df`
## compulsory columns
## `x`, `x_version`, `x_pubdate` are the information as per DESCRIPTION
## `x_pkgref` is x in pkgref, for internal storage, we don't use @ to indicate version / uid

## optional columns
## 1. if there are dependecies: `y`, `y_raw_version`, `type`, `y_pkgref`: as per DESCRIPTION; `y_raw_version` is not useful

## 2. for "github" (and possible "local" in the future)
## `x_uid` and `y_uid` are extra unique identifier for pinning down the package, if `?_version` isn't sufficient for this purpose
## if `source` == "github", `?_uid` <- "sha"

## `installation_order` should be an ordered data.frame
## not using snake case in column names for backward compatibility in containers, and not needed
## columns: x, version, source, handle, uid

.clean_suffixes <- function(pkgref) {
## remove all @, ?, or # suffixes, we don't support them
gsub("[@#\\?].+", "", pkgref)
}

.parse_pkgref <- function(pkgref, return_handle = TRUE) {
if (isFALSE(.is_pkgref(pkgref))) {
stop(pkgref, "is not a valid `pkgref`", call. = FALSE)
}
## remove all @, ?, or # suffixes, we don't support them
pkgref <- .clean_suffixes(pkgref)
res <- strsplit(pkgref, "::")[[1]]
if (length(res) == 1) {
source <- "cran"
handle <- res[1]
} else {
source <- res[1]
handle <- res[2]
}
if (isTRUE(return_handle)) {
return(handle)
}
return(source)
}

.is_github <- function(pkg){
## For now, this is sufficient.
## If "local" and "url" are supported, this is not
grepl("/", pkg)
}

.is_pkgref <- function(pkg) {
grepl("::", pkg)
}

## TBI: .is_valid_pkgref
## pkgref is only valid if: exactly one "::", source %in% c("cran", "github"), if "github", .is_github is TRUE

.extract_github_handle <- function(url) {
url <- gsub("^github::", "", url)
if (isTRUE(grepl("@github\\.com", url))) {
## remote string
info <- strsplit(url, ":")[[1]]
return(gsub("\\.git$", "", info[2]))
}
info <- strsplit(url, "github\\.com")[[1]]
path <- gsub("^/", "", info[length(info)])
path_components <- strsplit(path, "/")[[1]]
return(paste0(path_components[1], "/", path_components[2]))
}

## to normalize a pkg to pkgref
.normalize_pkg <- function(pkg) {
if (pkg == "" || is.na(pkg)) {
stop("Invalid `pkg`.", call. = FALSE)
}
if (isTRUE(.is_github(pkg))) {
if (isTRUE(grepl("github\\.com", pkg))) {
pkg <- .extract_github_handle(pkg)
}
}
if (isTRUE(.is_pkgref(pkg))) {
return(.clean_suffixes(pkg))
}
if (isTRUE(.is_github(pkg))) {
return(paste0("github::", .clean_suffixes(pkg)))
}
return(paste0("cran::", .clean_suffixes(pkg)))
}

## vectorize
.normalize_pkgs <- function(pkgs) {
vapply(X = pkgs, FUN = .normalize_pkg, FUN.VALUE = character(1), USE.NAMES = FALSE)
}
Loading

0 comments on commit cf240cf

Please sign in to comment.