Skip to content

Commit

Permalink
Auto merge of #6764 - matthiaskrgr:lintcheck_par_iter, r=flip1995
Browse files Browse the repository at this point in the history
lintcheck: parallelize

By default we use a single thread and one target dir as before.

If `-j n` is passed, use `n` target dirs and run one clippy in each of them.
We need several target dirs because cargo would lock them for a single process otherwise which would prevent the parallelism.
`-j 0` makes rayon use  $thread_count/2 (which I assume is the number of physical cores of a machine) for the number of threads.

Other change:
Show output of clippy being compiled when building it for lintcheck (makes it easier to spot compiler errors etc)
Show some progress indication in the "Linting... foo 1.2.3"  message.
Sort crates before linting (previously crates would be split randomly between target dirs, with the sorting, we try to make sure that even crates land in target dir 0 and odd ones in target dir 1 etc..)

*Please write a short comment explaining your change (or "none" for internal only changes)*
changelog: parallelize lintcheck with rayon
  • Loading branch information
bors committed Feb 20, 2021
2 parents 0f70e88 + 8499a32 commit 23de801
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 15 deletions.
3 changes: 2 additions & 1 deletion clippy_dev/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ shell-escape = "0.1"
tar = { version = "0.4.30", optional = true }
toml = { version = "0.5", optional = true }
ureq = { version = "2.0.0-rc3", optional = true }
rayon = { version = "1.5.0", optional = true }
walkdir = "2"

[features]
lintcheck = ["flate2", "serde_json", "tar", "toml", "ureq", "serde", "fs_extra"]
lintcheck = ["flate2", "serde_json", "tar", "toml", "ureq", "serde", "fs_extra", "rayon"]
deny-warnings = []
89 changes: 76 additions & 13 deletions clippy_dev/src/lintcheck.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@ use crate::clippy_project_root;

use std::collections::HashMap;
use std::process::Command;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::{env, fmt, fs::write, path::PathBuf};

use clap::ArgMatches;
use rayon::prelude::*;
use serde::{Deserialize, Serialize};
use serde_json::Value;

Expand All @@ -37,7 +39,7 @@ struct TomlCrate {

/// Represents an archive we download from crates.io, or a git repo, or a local repo/folder
/// Once processed (downloaded/extracted/cloned/copied...), this will be translated into a `Crate`
#[derive(Debug, Serialize, Deserialize, Eq, Hash, PartialEq)]
#[derive(Debug, Serialize, Deserialize, Eq, Hash, PartialEq, Ord, PartialOrd)]
enum CrateSource {
CratesIo {
name: String,
Expand Down Expand Up @@ -215,11 +217,34 @@ impl CrateSource {
impl Crate {
/// Run `cargo clippy` on the `Crate` and collect and return all the lint warnings that clippy
/// issued
fn run_clippy_lints(&self, cargo_clippy_path: &PathBuf) -> Vec<ClippyWarning> {
println!("Linting {} {}...", &self.name, &self.version);
fn run_clippy_lints(
&self,
cargo_clippy_path: &PathBuf,
target_dir_index: &AtomicUsize,
thread_limit: usize,
total_crates_to_lint: usize,
) -> Vec<ClippyWarning> {
// advance the atomic index by one
let index = target_dir_index.fetch_add(1, Ordering::SeqCst);
// "loop" the index within 0..thread_limit
let target_dir_index = index % thread_limit;
let perc = ((index * 100) as f32 / total_crates_to_lint as f32) as u8;

if thread_limit == 1 {
println!(
"{}/{} {}% Linting {} {}",
index, total_crates_to_lint, perc, &self.name, &self.version
);
} else {
println!(
"{}/{} {}% Linting {} {} in target dir {:?}",
index, total_crates_to_lint, perc, &self.name, &self.version, target_dir_index
);
}

let cargo_clippy_path = std::fs::canonicalize(cargo_clippy_path).unwrap();

let shared_target_dir = clippy_project_root().join("target/lintcheck/shared_target_dir/");
let shared_target_dir = clippy_project_root().join("target/lintcheck/shared_target_dir");

let mut args = vec!["--", "--message-format=json", "--", "--cap-lints=warn"];

Expand All @@ -232,7 +257,11 @@ impl Crate {
}

let all_output = std::process::Command::new(&cargo_clippy_path)
.env("CARGO_TARGET_DIR", shared_target_dir)
// use the looping index to create individual target dirs
.env(
"CARGO_TARGET_DIR",
shared_target_dir.join(format!("_{:?}", target_dir_index)),
)
// lint warnings will look like this:
// src/cargo/ops/cargo_compile.rs:127:35: warning: usage of `FromIterator::from_iter`
.args(&args)
Expand Down Expand Up @@ -283,13 +312,13 @@ fn filter_clippy_warnings(line: &str) -> bool {

/// Builds clippy inside the repo to make sure we have a clippy executable we can use.
fn build_clippy() {
let output = Command::new("cargo")
let status = Command::new("cargo")
.arg("build")
.output()
.status()
.expect("Failed to build clippy!");
if !output.status.success() {
eprintln!("Failed to compile Clippy");
eprintln!("stderr: {}", String::from_utf8_lossy(&output.stderr))
if !status.success() {
eprintln!("Error: Failed to compile Clippy!");
std::process::exit(1);
}
}

Expand Down Expand Up @@ -356,6 +385,9 @@ fn read_crates(toml_path: Option<&str>) -> (String, Vec<CrateSource>) {
unreachable!("Failed to translate TomlCrate into CrateSource!");
}
});
// sort the crates
crate_sources.sort();

(toml_filename, crate_sources)
}

Expand Down Expand Up @@ -454,15 +486,46 @@ pub fn run(clap_config: &ArgMatches) {
.into_iter()
.map(|krate| krate.download_and_extract())
.filter(|krate| krate.name == only_one_crate)
.map(|krate| krate.run_clippy_lints(&cargo_clippy_path))
.map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &AtomicUsize::new(0), 1, 1))
.flatten()
.collect()
} else {
let counter = std::sync::atomic::AtomicUsize::new(0);

// Ask rayon for thread count. Assume that half of that is the number of physical cores
// Use one target dir for each core so that we can run N clippys in parallel.
// We need to use different target dirs because cargo would lock them for a single build otherwise,
// killing the parallelism. However this also means that deps will only be reused half/a
// quarter of the time which might result in a longer wall clock runtime

// This helps when we check many small crates with dep-trees that don't have a lot of branches in
// order to achive some kind of parallelism

// by default, use a single thread
let num_cpus = match clap_config.value_of("threads") {
Some(threads) => {
let threads: usize = threads
.parse()
.expect(&format!("Failed to parse '{}' to a digit", threads));
if threads == 0 {
// automatic choice
// Rayon seems to return thread count so half that for core count
(rayon::current_num_threads() / 2) as usize
} else {
threads
}
},
// no -j passed, use a single thread
None => 1,
};

let num_crates = crates.len();

// check all crates (default)
crates
.into_iter()
.into_par_iter()
.map(|krate| krate.download_and_extract())
.map(|krate| krate.run_clippy_lints(&cargo_clippy_path))
.map(|krate| krate.run_clippy_lints(&cargo_clippy_path, &counter, num_cpus, num_crates))
.flatten()
.collect()
};
Expand Down
8 changes: 8 additions & 0 deletions clippy_dev/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,14 @@ fn get_clap_config<'a>() -> ArgMatches<'a> {
.value_name("CRATES-SOURCES-TOML-PATH")
.long("crates-toml")
.help("set the path for a crates.toml where lintcheck should read the sources from"),
)
.arg(
Arg::with_name("threads")
.takes_value(true)
.value_name("N")
.short("j")
.long("jobs")
.help("number of threads to use, 0 automatic choice"),
);

let app = App::new("Clippy developer tooling")
Expand Down
2 changes: 1 addition & 1 deletion lintcheck-logs/lintcheck_crates_logs.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
clippy 0.1.52 (bed115d55 2021-02-15)
clippy 0.1.52 (bb5f9d18a 2021-02-19)

cargo-0.49.0/build.rs:1:null clippy::cargo_common_metadata "package `cargo` is missing `package.categories` metadata"
cargo-0.49.0/build.rs:1:null clippy::cargo_common_metadata "package `cargo` is missing `package.keywords` metadata"
Expand Down

0 comments on commit 23de801

Please sign in to comment.