Auto merge of #10754 - Muscraft:benchsuite, r=epage

Add a benchmark for workspace initialization It [was suggested](#10736 (comment)) that a benchmark for workspace initialization should be added. This was suggested because there were issues with the performance of [workspace inheritance](#10747) as well as a general way to track the workspace initialization time across cargo changes ### Changes - Moved common functions out of `resolve.rs` to a shared `lib.rs` - Added a new struct to be used when creating a new benchmark - This was done because `env!("CARGO_TARGET_TMPDIR")` would fail to compile when put inside of the new `lib.rs` - Added a new workspace test for workspace inheritance - This new workspace does not have a repo that it was built from and if one needs to be made I can change that
rust-lang · Jun 18, 2022 · 17d4db0 · 17d4db0
2 parents 8d42b0e + f182411
commit 17d4db0
Show file tree

Hide file tree

Showing 5 changed files with 243 additions and 192 deletions.
diff --git a/benches/benchsuite/Cargo.toml b/benches/benchsuite/Cargo.toml
@@ -10,12 +10,20 @@ description = "Benchmarking suite for Cargo."
 
 [dependencies]
 cargo = { path = "../.." }
+cargo-test-support = { path = "../../crates/cargo-test-support" }
 # Consider removing html_reports in 0.4 and switching to `cargo criterion`.
 criterion = { version = "0.3.5", features = ["html_reports"] }
 flate2 = { version = "1.0.3", default-features = false, features = ["zlib"] }
 tar = { version = "0.4.38", default-features = false }
 url = "2.2.2"
 
+[lib]
+bench = false
+
 [[bench]]
 name = "resolve"
 harness = false
+
+[[bench]]
+name = "workspace_initialization"
+harness = false
diff --git a/benches/benchsuite/benches/resolve.rs b/benches/benchsuite/benches/resolve.rs
@@ -1,145 +1,12 @@
+use benchsuite::fixtures;
 use cargo::core::compiler::{CompileKind, RustcTargetData};
-use cargo::core::resolver::features::{CliFeatures, FeatureOpts, FeatureResolver, ForceAllTargets};
-use cargo::core::resolver::{HasDevUnits, ResolveBehavior};
+use cargo::core::resolver::features::{FeatureOpts, FeatureResolver};
+use cargo::core::resolver::{CliFeatures, ForceAllTargets, HasDevUnits, ResolveBehavior};
 use cargo::core::{PackageIdSpec, Workspace};
 use cargo::ops::WorkspaceResolve;
 use cargo::Config;
 use criterion::{criterion_group, criterion_main, Criterion};
-use std::fs;
-use std::path::{Path, PathBuf};
-use std::process::Command;
-use url::Url;
-
-// This is an arbitrary commit that existed when I started. This helps
-// ensure consistent results. It can be updated if needed, but that can
-// make it harder to compare results with older versions of cargo.
-const CRATES_IO_COMMIT: &str = "85f7bfd61ea4fee08ec68c468762e886b2aebec6";
-
-fn setup() {
-    create_home();
-    create_target_dir();
-    clone_index();
-    unpack_workspaces();
-}
-
-fn root() -> PathBuf {
-    let mut p = PathBuf::from(env!("CARGO_TARGET_TMPDIR"));
-    p.push("bench");
-    p
-}
-
-fn target_dir() -> PathBuf {
-    let mut p = root();
-    p.push("target");
-    p
-}
-
-fn cargo_home() -> PathBuf {
-    let mut p = root();
-    p.push("chome");
-    p
-}
-
-fn index() -> PathBuf {
-    let mut p = root();
-    p.push("index");
-    p
-}
-
-fn workspaces_path() -> PathBuf {
-    let mut p = root();
-    p.push("workspaces");
-    p
-}
-
-fn registry_url() -> Url {
-    Url::from_file_path(index()).unwrap()
-}
-
-fn create_home() {
-    let home = cargo_home();
-    if !home.exists() {
-        fs::create_dir_all(&home).unwrap();
-    }
-    fs::write(
-        home.join("config.toml"),
-        format!(
-            r#"
-                [source.crates-io]
-                replace-with = 'local-snapshot'
-
-                [source.local-snapshot]
-                registry = '{}'
-            "#,
-            registry_url()
-        ),
-    )
-    .unwrap();
-}
-
-fn create_target_dir() {
-    // This is necessary to ensure the .rustc_info.json file is written.
-    // Otherwise it won't be written, and it is very expensive to create.
-    if !target_dir().exists() {
-        std::fs::create_dir_all(target_dir()).unwrap();
-    }
-}
-
-/// This clones crates.io at a specific point in time into tmp/index.
-fn clone_index() {
-    let index = index();
-    let maybe_git = |command: &str| {
-        let status = Command::new("git")
-            .current_dir(&index)
-            .args(command.split_whitespace().collect::<Vec<_>>())
-            .status()
-            .expect("git should be installed");
-        status.success()
-    };
-    let git = |command: &str| {
-        if !maybe_git(command) {
-            panic!("failed to run git command: {}", command);
-        }
-    };
-    if index.exists() {
-        if maybe_git(&format!(
-            "rev-parse -q --verify {}^{{commit}}",
-            CRATES_IO_COMMIT
-        )) {
-            // Already fetched.
-            return;
-        }
-    } else {
-        fs::create_dir_all(&index).unwrap();
-        git("init --bare");
-        git("remote add origin https://github.com/rust-lang/crates.io-index");
-    }
-    git(&format!("fetch origin {}", CRATES_IO_COMMIT));
-    git("branch -f master FETCH_HEAD");
-}
-
-/// This unpacks the compressed workspace skeletons into tmp/workspaces.
-fn unpack_workspaces() {
-    let ws_dir = Path::new(env!("CARGO_MANIFEST_DIR"))
-        .parent()
-        .unwrap()
-        .join("workspaces");
-    let archives = fs::read_dir(ws_dir)
-        .unwrap()
-        .map(|e| e.unwrap().path())
-        .filter(|p| p.extension() == Some(std::ffi::OsStr::new("tgz")));
-    for archive in archives {
-        let name = archive.file_stem().unwrap();
-        let f = fs::File::open(&archive).unwrap();
-        let f = flate2::read::GzDecoder::new(f);
-        let dest = workspaces_path().join(&name);
-        if dest.exists() {
-            fs::remove_dir_all(&dest).unwrap();
-        }
-        let mut archive = tar::Archive::new(f);
-        archive.unpack(workspaces_path()).unwrap();
-    }
-}
+use std::path::Path;
 
 struct ResolveInfo<'cfg> {
     ws: Workspace<'cfg>,
@@ -152,36 +19,12 @@ struct ResolveInfo<'cfg> {
     ws_resolve: WorkspaceResolve<'cfg>,
 }
 
-/// Vec of `(ws_name, ws_root)`.
-fn workspaces() -> Vec<(String, PathBuf)> {
-    // CARGO_BENCH_WORKSPACES can be used to override, otherwise it just uses
-    // the workspaces in the workspaces directory.
-    let mut ps: Vec<_> = match std::env::var_os("CARGO_BENCH_WORKSPACES") {
-        Some(s) => std::env::split_paths(&s).collect(),
-        None => fs::read_dir(workspaces_path())
-            .unwrap()
-            .map(|e| e.unwrap().path())
-            // These currently fail in most cases on Windows due to long
-            // filenames in the git checkouts.
-            .filter(|p| {
-                !(cfg!(windows)
-                    && matches!(p.file_name().unwrap().to_str().unwrap(), "servo" | "tikv"))
-            })
-            .collect(),
-    };
-    // Sort so it is consistent.
-    ps.sort();
-    ps.into_iter()
-        .map(|p| (p.file_name().unwrap().to_str().unwrap().to_owned(), p))
-        .collect()
-}
-
 /// Helper for resolving a workspace. This will run the resolver once to
 /// download everything, and returns all the data structures that are used
 /// during resolution.
 fn do_resolve<'cfg>(config: &'cfg Config, ws_root: &Path) -> ResolveInfo<'cfg> {
     let requested_kinds = [CompileKind::Host];
-    let ws = cargo::core::Workspace::new(&ws_root.join("Cargo.toml"), config).unwrap();
+    let ws = Workspace::new(&ws_root.join("Cargo.toml"), config).unwrap();
     let target_data = RustcTargetData::new(&ws, &requested_kinds).unwrap();
     let cli_features = CliFeatures::from_command_line(&[], false, true).unwrap();
     let pkgs = cargo::ops::Packages::Default;
@@ -212,38 +55,14 @@ fn do_resolve<'cfg>(config: &'cfg Config, ws_root: &Path) -> ResolveInfo<'cfg> {
     }
 }
 
-/// Creates a new Config.
-///
-/// This is separate from `do_resolve` to deal with the ownership and lifetime.
-fn make_config(ws_root: &Path) -> Config {
-    let shell = cargo::core::Shell::new();
-    let mut config = cargo::util::Config::new(shell, ws_root.to_path_buf(), cargo_home());
-    // Configure is needed to set the target_dir which is needed to write
-    // the .rustc_info.json file which is very expensive.
-    config
-        .configure(
-            0,
-            false,
-            None,
-            false,
-            false,
-            false,
-            &Some(target_dir()),
-            &[],
-            &[],
-        )
-        .unwrap();
-    config
-}
-
 /// Benchmark of the full `resolve_ws_with_opts` which runs the resolver
 /// twice, the feature resolver, and more. This is a major component of a
 /// regular cargo build.
 fn resolve_ws(c: &mut Criterion) {
-    setup();
+    let fixtures = fixtures!();
     let mut group = c.benchmark_group("resolve_ws");
-    for (ws_name, ws_root) in workspaces() {
-        let config = make_config(&ws_root);
+    for (ws_name, ws_root) in fixtures.workspaces() {
+        let config = fixtures.make_config(&ws_root);
         // The resolver info is initialized only once in a lazy fashion. This
         // allows criterion to skip this workspace if the user passes a filter
         // on the command-line (like `cargo bench -- resolve_ws/tikv`).
@@ -282,10 +101,10 @@ fn resolve_ws(c: &mut Criterion) {
 
 /// Benchmark of the feature resolver.
 fn feature_resolver(c: &mut Criterion) {
-    setup();
+    let fixtures = fixtures!();
     let mut group = c.benchmark_group("feature_resolver");
-    for (ws_name, ws_root) in workspaces() {
-        let config = make_config(&ws_root);
+    for (ws_name, ws_root) in fixtures.workspaces() {
+        let config = fixtures.make_config(&ws_root);
         let mut lazy_info = None;
         group.bench_function(&ws_name, |b| {
             let ResolveInfo {

diff --git a/benches/benchsuite/benches/workspace_initialization.rs b/benches/benchsuite/benches/workspace_initialization.rs
@@ -0,0 +1,27 @@
+use benchsuite::fixtures;
+use cargo::core::Workspace;
+use criterion::{criterion_group, criterion_main, Criterion};
+
+fn workspace_initialization(c: &mut Criterion) {
+    let fixtures = fixtures!();
+    let mut group = c.benchmark_group("workspace_initialization");
+    for (ws_name, ws_root) in fixtures.workspaces() {
+        let config = fixtures.make_config(&ws_root);
+        // The resolver info is initialized only once in a lazy fashion. This
+        // allows criterion to skip this workspace if the user passes a filter
+        // on the command-line (like `cargo bench -- workspace_initialization/tikv`).
+        group.bench_function(ws_name, |b| {
+            b.iter(|| Workspace::new(&ws_root.join("Cargo.toml"), &config).unwrap())
+        });
+    }
+    group.finish();
+}
+
+// Criterion complains about the measurement time being too small, but the
+// measurement time doesn't seem important to me, what is more important is
+// the number of iterations which defaults to 100, which seems like a
+// reasonable default. Otherwise, the measurement time would need to be
+// changed per workspace. We wouldn't want to spend 60s on every workspace,
+// that would take too long and isn't necessary for the smaller workspaces.
+criterion_group!(benches, workspace_initialization);
+criterion_main!(benches);