From 78bbdede0df5390a169491d5296955be8130b2c3 Mon Sep 17 00:00:00 2001
From: Joshua Nelson <jyn514@gmail.com>
Date: Fri, 30 Oct 2020 12:01:46 -0400
Subject: [PATCH] Redeploy 'configurable index location' (#1148)

* Revert "Revert "Support custom indexes""

This reverts commit 5085d57363ca8e725743e625732f6901eb02841f.

* Revert "Revert "Update Cargo.toml""

This reverts commit 02d3cfd71e1d4b214682f0680799f6da1d889074.

* Revert "Revert "Use registry from config, instead of cli argument""

This reverts commit 1edaace07900e7cec510c277ea2a74cbd21a1026.

* Revert "Revert "Use 0.11 rustwide""

This reverts commit 0128d2dddffcc53d0265746be37dd06403ff899c.

* Revert "Revert "Favor registry for queued crates""

This reverts commit 7d81b24d86586dc42b43fd9274a61e8c70408345.

* Revert "Revert "Don`t check for deleted CRATE_VERSION""

This reverts commit c2765227d037e30eeba6d899a4ffe620fede5646.
---
 Cargo.lock                         | 25 +++++++++---------
 Cargo.toml                         |  4 +--
 src/bin/cratesfyi.rs               | 29 ++++++++++++++++-----
 src/build_queue.rs                 | 34 +++++++++++++++---------
 src/config.rs                      |  2 ++
 src/db/migrate.rs                  | 14 ++++++++++
 src/docbuilder/mod.rs              |  2 +-
 src/docbuilder/queue.rs            | 20 +++++++++-----
 src/docbuilder/rustwide_builder.rs | 25 +++++++++++++-----
 src/index/mod.rs                   | 42 +++++++++++++++++++++++++-----
 src/lib.rs                         |  1 +
 src/test/mod.rs                    |  5 +++-
 src/web/releases.rs                |  6 ++---
 13 files changed, 152 insertions(+), 57 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 63c058655..fd65ffba9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -402,9 +402,9 @@ dependencies = [
 
 [[package]]
 name = "crates-index-diff"
-version = "7.0.1"
+version = "7.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e6bb290b5bb11353fbb46ca4c68ad2e8f54ab6674e4ee6a94c102054fdaf00f"
+checksum = "64af39a9a6805d715f8b72307d70815ed1ee38ef84e9de250fcdd56fe75a0e19"
 dependencies = [
  "git2",
  "serde",
@@ -1077,9 +1077,9 @@ checksum = "bcc8e0c9bce37868955864dbecd2b1ab2bdf967e6f28066d65aaac620444b65c"
 
 [[package]]
 name = "git2"
-version = "0.13.6"
+version = "0.13.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11e4b2082980e751c4bf4273e9cbb4a02c655729c8ee8a79f66cad03c8f4d31e"
+checksum = "ca6f1a0238d7f8f8fd5ee642f4ebac4dbc03e03d1f78fbe7a3ede35dcf7e2224"
 dependencies = [
  "bitflags",
  "libc",
@@ -1509,9 +1509,9 @@ checksum = "3286f09f7d4926fc486334f28d8d2e6ebe4f7f9994494b6dab27ddfad2c9b11b"
 
 [[package]]
 name = "libgit2-sys"
-version = "0.12.6+1.0.0"
+version = "0.12.14+1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf81b43f9b45ab07897a780c9b7b26b1504497e469c7a78162fc29e3b8b1c1b3"
+checksum = "8f25af58e6495f7caf2919d08f212de550cfa3ed2f5e744988938ea292b9f549"
 dependencies = [
  "cc",
  "libc",
@@ -1523,9 +1523,9 @@ dependencies = [
 
 [[package]]
 name = "libssh2-sys"
-version = "0.2.18"
+version = "0.2.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eafa907407504b0e683786d4aba47acf250f114d37357d56608333fd167dd0fc"
+checksum = "ca46220853ba1c512fc82826d0834d87b06bcd3c2a42241b7de72f3d2fe17056"
 dependencies = [
  "cc",
  "libc",
@@ -1537,9 +1537,9 @@ dependencies = [
 
 [[package]]
 name = "libz-sys"
-version = "1.0.25"
+version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2eb5e43362e38e2bca2fd5f5134c4d4564a23a5c28e9b95411652021a8675ebe"
+checksum = "602113192b08db8f38796c4e85c39e960c145965140e918018bcde1952429655"
 dependencies = [
  "cc",
  "libc",
@@ -2871,9 +2871,9 @@ dependencies = [
 
 [[package]]
 name = "rustwide"
-version = "0.10.0"
+version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "417d578ebc7fa963bcd06f365f7987c091abeba70eac22dba94b7fd922a95c09"
+checksum = "17a9c66cf835ece6742443f3a2c2874df15db3dfc060ced53feeb210a463fd93"
 dependencies = [
  "base64 0.12.1",
  "failure",
@@ -2881,6 +2881,7 @@ dependencies = [
  "fs2",
  "futures-util",
  "getrandom",
+ "git2",
  "lazy_static",
  "log 0.4.8",
  "nix",
diff --git a/Cargo.toml b/Cargo.toml
index 460b48d4f..c867f190d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -20,7 +20,7 @@ log = "0.4"
 regex = "1"
 structopt = "0.3"
 crates-index = "0.15.1"
-crates-index-diff = "7"
+crates-index-diff = "7.1.1"
 reqwest = { version = "0.10.6", features = ["blocking", "json"] } # TODO: Remove blocking when async is ready
 semver = { version = "0.9", features = ["serde"] }
 slug = "=0.1.1"
@@ -40,7 +40,7 @@ schemamama = "0.3"
 schemamama_postgres = "0.3"
 systemstat = "0.1.4"
 prometheus = { version = "0.10.0", default-features = false }
-rustwide = "0.10.0"
+rustwide = "0.11"
 mime_guess = "2"
 dotenv = "0.15"
 zstd = "0.5"
diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs
index 542d5692b..3f9c55de7 100644
--- a/src/bin/cratesfyi.rs
+++ b/src/bin/cratesfyi.rs
@@ -6,7 +6,8 @@ use std::sync::Arc;
 use docs_rs::db::{self, add_path_into_database, Pool, PoolClient};
 use docs_rs::utils::{remove_crate_priority, set_crate_priority};
 use docs_rs::{
-    BuildQueue, Config, Context, DocBuilder, Index, Metrics, RustwideBuilder, Server, Storage,
+    BuildQueue, Config, Context, DocBuilder, Index, Metrics, PackageKind, RustwideBuilder, Server,
+    Storage,
 };
 use failure::{err_msg, Error, ResultExt};
 use once_cell::sync::OnceCell;
@@ -169,9 +170,12 @@ impl QueueSubcommand {
                 crate_name,
                 crate_version,
                 build_priority,
-            } => ctx
-                .build_queue()?
-                .add_crate(&crate_name, &crate_version, build_priority)?,
+            } => ctx.build_queue()?.add_crate(
+                &crate_name,
+                &crate_version,
+                build_priority,
+                ctx.config()?.registry_url.as_deref(),
+            )?,
 
             Self::DefaultPriority { subcommand } => subcommand.handle_args(ctx)?,
         }
@@ -308,12 +312,16 @@ impl BuildSubcommand {
                         .build_local_package(&path)
                         .context("Building documentation failed")?;
                 } else {
+                    let registry_url = ctx.config()?.registry_url.clone();
                     builder
                         .build_package(
                             &crate_name.ok_or_else(|| err_msg("must specify name if not local"))?,
                             &crate_version
                                 .ok_or_else(|| err_msg("must specify version if not local"))?,
-                            None,
+                            registry_url
+                                .as_ref()
+                                .map(|s| PackageKind::Registry(s.as_str()))
+                                .unwrap_or(PackageKind::CratesIo),
                         )
                         .context("Building documentation failed")?;
                 }
@@ -593,7 +601,16 @@ impl Context for BinContext {
     fn index(&self) -> Result<Arc<Index>, Error> {
         Ok(self
             .index
-            .get_or_try_init::<_, Error>(|| Ok(Arc::new(Index::new(&*self.config()?)?)))?
+            .get_or_try_init::<_, Error>(|| {
+                let config = self.config()?;
+                Ok(Arc::new(
+                    if let Some(registry_url) = config.registry_url.clone() {
+                        Index::from_url(config.registry_index_path.clone(), registry_url)
+                    } else {
+                        Index::new(config.registry_index_path.clone())
+                    }?,
+                ))
+            })?
             .clone())
     }
 }
diff --git a/src/build_queue.rs b/src/build_queue.rs
index 869aa9b38..e2b8d4708 100644
--- a/src/build_queue.rs
+++ b/src/build_queue.rs
@@ -11,6 +11,7 @@ pub(crate) struct QueuedCrate {
     pub(crate) name: String,
     pub(crate) version: String,
     pub(crate) priority: i32,
+    pub(crate) registry: Option<String>,
 }
 
 #[derive(Debug)]
@@ -29,10 +30,16 @@ impl BuildQueue {
         }
     }
 
-    pub fn add_crate(&self, name: &str, version: &str, priority: i32) -> Result<()> {
+    pub fn add_crate(
+        &self,
+        name: &str,
+        version: &str,
+        priority: i32,
+        registry: Option<&str>,
+    ) -> Result<()> {
         self.db.get()?.execute(
-            "INSERT INTO queue (name, version, priority) VALUES ($1, $2, $3);",
-            &[&name, &version, &priority],
+            "INSERT INTO queue (name, version, priority, registry) VALUES ($1, $2, $3, $4);",
+            &[&name, &version, &priority, &registry],
         )?;
         Ok(())
     }
@@ -63,7 +70,7 @@ impl BuildQueue {
 
     pub(crate) fn queued_crates(&self) -> Result<Vec<QueuedCrate>> {
         let query = self.db.get()?.query(
-            "SELECT id, name, version, priority
+            "SELECT id, name, version, priority, registry
              FROM queue
              WHERE attempt < $1
              ORDER BY priority ASC, attempt ASC, id ASC",
@@ -77,6 +84,7 @@ impl BuildQueue {
                 name: row.get("name"),
                 version: row.get("version"),
                 priority: row.get("priority"),
+                registry: row.get("registry"),
             })
             .collect())
     }
@@ -149,7 +157,7 @@ mod tests {
                 ("high-priority-baz", "1.0.0", -1000),
             ];
             for krate in &test_crates {
-                queue.add_crate(krate.0, krate.1, krate.2)?;
+                queue.add_crate(krate.0, krate.1, krate.2, None)?;
             }
 
             let assert_next = |name| -> Result<()> {
@@ -214,9 +222,9 @@ mod tests {
             let queue = env.build_queue();
 
             assert_eq!(queue.pending_count()?, 0);
-            queue.add_crate("foo", "1.0.0", 0)?;
+            queue.add_crate("foo", "1.0.0", 0, None)?;
             assert_eq!(queue.pending_count()?, 1);
-            queue.add_crate("bar", "1.0.0", 0)?;
+            queue.add_crate("bar", "1.0.0", 0, None)?;
             assert_eq!(queue.pending_count()?, 2);
 
             queue.process_next_crate(|krate| {
@@ -235,11 +243,11 @@ mod tests {
             let queue = env.build_queue();
 
             assert_eq!(queue.prioritized_count()?, 0);
-            queue.add_crate("foo", "1.0.0", 0)?;
+            queue.add_crate("foo", "1.0.0", 0, None)?;
             assert_eq!(queue.prioritized_count()?, 1);
-            queue.add_crate("bar", "1.0.0", -100)?;
+            queue.add_crate("bar", "1.0.0", -100, None)?;
             assert_eq!(queue.prioritized_count()?, 2);
-            queue.add_crate("baz", "1.0.0", 100)?;
+            queue.add_crate("baz", "1.0.0", 100, None)?;
             assert_eq!(queue.prioritized_count()?, 2);
 
             queue.process_next_crate(|krate| {
@@ -262,9 +270,9 @@ mod tests {
             let queue = env.build_queue();
 
             assert_eq!(queue.failed_count()?, 0);
-            queue.add_crate("foo", "1.0.0", -100)?;
+            queue.add_crate("foo", "1.0.0", -100, None)?;
             assert_eq!(queue.failed_count()?, 0);
-            queue.add_crate("bar", "1.0.0", 0)?;
+            queue.add_crate("bar", "1.0.0", 0, None)?;
 
             for _ in 0..MAX_ATTEMPTS {
                 assert_eq!(queue.failed_count()?, 0);
@@ -296,7 +304,7 @@ mod tests {
                 ("baz", "1.0.0", 10),
             ];
             for krate in &test_crates {
-                queue.add_crate(krate.0, krate.1, krate.2)?;
+                queue.add_crate(krate.0, krate.1, krate.2, None)?;
             }
 
             assert_eq!(
diff --git a/src/config.rs b/src/config.rs
index ce57ab219..adf75b967 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -12,6 +12,7 @@ pub struct Config {
 
     pub prefix: PathBuf,
     pub registry_index_path: PathBuf,
+    pub registry_url: Option<String>,
 
     // Database connection params
     pub(crate) database_url: String,
@@ -56,6 +57,7 @@ impl Config {
 
             prefix: prefix.clone(),
             registry_index_path: env("REGISTRY_INDEX_PATH", prefix.join("crates.io-index"))?,
+            registry_url: maybe_env("REGISTRY_URL")?,
 
             database_url: require_env("CRATESFYI_DATABASE_URL")?,
             max_pool_size: env("DOCSRS_MAX_POOL_SIZE", 90)?,
diff --git a/src/db/migrate.rs b/src/db/migrate.rs
index c3a450ed0..36d4feb7d 100644
--- a/src/db/migrate.rs
+++ b/src/db/migrate.rs
@@ -475,6 +475,20 @@ pub fn migrate(version: Option<Version>, conn: &mut Client) -> CratesfyiResult<(
                 DROP TYPE feature;                         
             "
         ),
+        migration!(
+            context,
+            20,
+            // description
+            "Support alternative registries",
+            // upgrade query
+            "
+                ALTER TABLE queue ADD COLUMN registry TEXT DEFAULT NULL;
+            ",
+            // downgrade query
+            "
+                ALTER TABLE queue DROP COLUMN registry;
+            "
+        )
     ];
 
     for migration in migrations {
diff --git a/src/docbuilder/mod.rs b/src/docbuilder/mod.rs
index 568571255..c5244fa18 100644
--- a/src/docbuilder/mod.rs
+++ b/src/docbuilder/mod.rs
@@ -4,8 +4,8 @@ mod queue;
 mod rustwide_builder;
 
 pub(crate) use self::limits::Limits;
-pub use self::rustwide_builder::RustwideBuilder;
 pub(crate) use self::rustwide_builder::{BuildResult, DocCoverage};
+pub use self::rustwide_builder::{PackageKind, RustwideBuilder};
 
 use crate::db::Pool;
 use crate::error::Result;
diff --git a/src/docbuilder/queue.rs b/src/docbuilder/queue.rs
index 5dfb674b0..66600dd02 100644
--- a/src/docbuilder/queue.rs
+++ b/src/docbuilder/queue.rs
@@ -1,6 +1,6 @@
 //! Updates registry index and builds new packages
 
-use super::{DocBuilder, RustwideBuilder};
+use super::{DocBuilder, PackageKind, RustwideBuilder};
 use crate::error::Result;
 use crate::utils::get_crate_priority;
 use crate::Index;
@@ -45,10 +45,12 @@ impl DocBuilder {
                 ChangeKind::Added => {
                     let priority = get_crate_priority(&mut conn, &krate.name)?;
 
-                    match self
-                        .build_queue
-                        .add_crate(&krate.name, &krate.version, priority)
-                    {
+                    match self.build_queue.add_crate(
+                        &krate.name,
+                        &krate.version,
+                        priority,
+                        index.repository_url(),
+                    ) {
                         Ok(()) => {
                             debug!("{}-{} added into build queue", krate.name, krate.version);
                             crates_added += 1;
@@ -79,7 +81,13 @@ impl DocBuilder {
         queue.process_next_crate(|krate| {
             processed = true;
 
-            builder.build_package(&krate.name, &krate.version, None)?;
+            let kind = krate
+                .registry
+                .as_ref()
+                .map(|r| PackageKind::Registry(r.as_str()))
+                .unwrap_or(PackageKind::CratesIo);
+
+            builder.build_package(&krate.name, &krate.version, kind)?;
             Ok(())
         })?;
 
diff --git a/src/docbuilder/rustwide_builder.rs b/src/docbuilder/rustwide_builder.rs
index 7d6c1929d..ce53f771e 100644
--- a/src/docbuilder/rustwide_builder.rs
+++ b/src/docbuilder/rustwide_builder.rs
@@ -56,6 +56,12 @@ const ESSENTIAL_FILES_UNVERSIONED: &[&str] = &[
 const DUMMY_CRATE_NAME: &str = "empty-library";
 const DUMMY_CRATE_VERSION: &str = "1.0.0";
 
+pub enum PackageKind<'a> {
+    Local(&'a Path),
+    CratesIo,
+    Registry(&'a str),
+}
+
 pub struct RustwideBuilder {
     workspace: Workspace,
     toolchain: Toolchain,
@@ -259,7 +265,12 @@ impl RustwideBuilder {
         crates_from_path(
             &self.config.registry_index_path.clone(),
             &mut |name, version| {
-                if let Err(err) = self.build_package(name, version, None) {
+                let registry_url = self.config.registry_url.clone();
+                let package_kind = registry_url
+                    .as_ref()
+                    .map(|r| PackageKind::Registry(r.as_str()))
+                    .unwrap_or(PackageKind::CratesIo);
+                if let Err(err) = self.build_package(name, version, package_kind) {
                     warn!("failed to build package {} {}: {}", name, version, err);
                 }
             },
@@ -273,14 +284,14 @@ impl RustwideBuilder {
                 err.context(format!("failed to load local package {}", path.display()))
             })?;
         let package = metadata.root();
-        self.build_package(&package.name, &package.version, Some(path))
+        self.build_package(&package.name, &package.version, PackageKind::Local(path))
     }
 
     pub fn build_package(
         &mut self,
         name: &str,
         version: &str,
-        local: Option<&Path>,
+        kind: PackageKind<'_>,
     ) -> Result<bool> {
         let mut conn = self.db.get()?;
 
@@ -302,10 +313,10 @@ impl RustwideBuilder {
         let mut build_dir = self.workspace.build_dir(&format!("{}-{}", name, version));
         build_dir.purge()?;
 
-        let krate = if let Some(path) = local {
-            Crate::local(path)
-        } else {
-            Crate::crates_io(name, version)
+        let krate = match kind {
+            PackageKind::Local(path) => Crate::local(path),
+            PackageKind::CratesIo => Crate::crates_io(name, version),
+            PackageKind::Registry(registry) => Crate::registry(registry, name, version),
         };
         krate.fetch(&self.workspace)?;
 
diff --git a/src/index/mod.rs b/src/index/mod.rs
index ec415fd49..894e282ab 100644
--- a/src/index/mod.rs
+++ b/src/index/mod.rs
@@ -3,7 +3,7 @@ use std::{path::PathBuf, process::Command};
 use url::Url;
 
 use self::{api::Api, crates::Crates};
-use crate::{error::Result, Config};
+use crate::error::Result;
 use failure::ResultExt;
 
 pub(crate) mod api;
@@ -12,9 +12,10 @@ mod crates;
 pub struct Index {
     path: PathBuf,
     api: Api,
+    repository_url: Option<String>,
 }
 
-#[derive(serde::Deserialize, Clone)]
+#[derive(Debug, serde::Deserialize, Clone)]
 #[serde(rename_all = "kebab-case")]
 struct IndexConfig {
     dl: String,
@@ -40,19 +41,44 @@ fn load_config(repo: &git2::Repository) -> Result<IndexConfig> {
 }
 
 impl Index {
-    pub fn new(app_config: &Config) -> Result<Self> {
-        let path = app_config.registry_index_path.clone();
+    pub fn from_url(path: PathBuf, repository_url: String) -> Result<Self> {
+        let url = repository_url.clone();
+        let diff = crates_index_diff::Index::from_path_or_cloned_with_options(
+            &path,
+            crates_index_diff::CloneOptions { repository_url },
+        )
+        .context("initialising registry index repository")?;
+
+        let config = load_config(diff.repository()).context("loading registry config")?;
+        let api = Api::new(config.api).context("initialising registry api client")?;
+        Ok(Self {
+            path,
+            api,
+            repository_url: Some(url),
+        })
+    }
+
+    pub fn new(path: PathBuf) -> Result<Self> {
         // This initializes the repository, then closes it afterwards to avoid leaking file descriptors.
         // See https://github.com/rust-lang/docs.rs/pull/847
         let diff = crates_index_diff::Index::from_path_or_cloned(&path)
             .context("initialising registry index repository")?;
         let config = load_config(diff.repository()).context("loading registry config")?;
         let api = Api::new(config.api).context("initialising registry api client")?;
-        Ok(Self { path, api })
+        Ok(Self {
+            path,
+            api,
+            repository_url: None,
+        })
     }
 
     pub(crate) fn diff(&self) -> Result<crates_index_diff::Index> {
-        let diff = crates_index_diff::Index::from_path_or_cloned(&self.path)
+        let options = self
+            .repository_url
+            .clone()
+            .map(|repository_url| crates_index_diff::CloneOptions { repository_url })
+            .unwrap_or_default();
+        let diff = crates_index_diff::Index::from_path_or_cloned_with_options(&self.path, options)
             .context("re-opening registry index for diff")?;
         Ok(diff)
     }
@@ -87,4 +113,8 @@ impl Index {
             );
         }
     }
+
+    pub fn repository_url(&self) -> Option<&str> {
+        self.repository_url.as_deref()
+    }
 }
diff --git a/src/lib.rs b/src/lib.rs
index c129cda08..41adf3e3f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -6,6 +6,7 @@ pub use self::build_queue::BuildQueue;
 pub use self::config::Config;
 pub use self::context::Context;
 pub use self::docbuilder::DocBuilder;
+pub use self::docbuilder::PackageKind;
 pub use self::docbuilder::RustwideBuilder;
 pub use self::index::Index;
 pub use self::metrics::Metrics;
diff --git a/src/test/mod.rs b/src/test/mod.rs
index 55beee427..97c8e25f4 100644
--- a/src/test/mod.rs
+++ b/src/test/mod.rs
@@ -197,7 +197,10 @@ impl TestEnvironment {
     pub(crate) fn index(&self) -> Arc<Index> {
         self.index
             .get_or_init(|| {
-                Arc::new(Index::new(&*self.config()).expect("failed to initialize the index"))
+                Arc::new(
+                    Index::new(self.config().registry_index_path.clone())
+                        .expect("failed to initialize the index"),
+                )
             })
             .clone()
     }
diff --git a/src/web/releases.rs b/src/web/releases.rs
index 087cd652c..d0f04b109 100644
--- a/src/web/releases.rs
+++ b/src/web/releases.rs
@@ -1126,9 +1126,9 @@ mod tests {
                 .expect("missing heading")
                 .any(|el| el.text_contents().contains("nothing")));
 
-            queue.add_crate("foo", "1.0.0", 0)?;
-            queue.add_crate("bar", "0.1.0", -10)?;
-            queue.add_crate("baz", "0.0.1", 10)?;
+            queue.add_crate("foo", "1.0.0", 0, None)?;
+            queue.add_crate("bar", "0.1.0", -10, None)?;
+            queue.add_crate("baz", "0.0.1", 10, None)?;
 
             let full = kuchiki::parse_html().one(web.get("/releases/queue").send()?.text()?);
             let items = full