Skip to content

Commit

Permalink
More stability checker options (#5299)
Browse files Browse the repository at this point in the history
## Summary

This contains three changes:
* repos in `check_ecosystem.py` are stored as `org:name` instead of
`org/name` to create a flat directory layout
* `check_ecosystem.py` performs a maximum of 50 parallel jobs at the
same time to avoid consuming to much RAM
* `check-formatter-stability` gets a new option `--multi-project` so
it's possible to do `cargo run --bin ruff_dev --
check-formatter-stability --multi-project target/checkouts`
With these three changes it becomes easy to check the formatter
stability over a larger number of repositories. This is part of the
integration of integrating formatter regressions checks into the
ecosystem checks.

## Test Plan

```shell
python scripts/check_ecosystem.py --checkouts target/checkouts --projects github_search.jsonl -v $(which true) $(which true)
cargo run --bin ruff_dev -- check-formatter-stability --multi-project target/checkouts
```
  • Loading branch information
konstin authored Jun 22, 2023
1 parent f9f0cf7 commit 03694ef
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 17 deletions.
53 changes: 46 additions & 7 deletions crates/ruff_dev/src/check_formatter_stability.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ pub(crate) struct Args {
/// Print only the first error and exit, `-x` is same as pytest
#[arg(long, short = 'x')]
pub(crate) exit_first_error: bool,
/// Checks each project inside a directory
#[arg(long)]
pub(crate) multi_project: bool,
}

/// Generate ourself a `try_parse_from` impl for `CheckArgs`. This is a strange way to use clap but
Expand All @@ -54,6 +57,35 @@ struct WrapperArgs {
}

pub(crate) fn main(args: &Args) -> anyhow::Result<ExitCode> {
let all_success = if args.multi_project {
let mut all_success = true;
for base_dir in &args.files {
for dir in base_dir.read_dir()? {
let dir = dir?;
println!("Starting {}", dir.path().display());
let success = check_repo(&Args {
files: vec![dir.path().clone()],
..*args
});
println!("Finished {}: {:?}", dir.path().display(), success);
if !matches!(success, Ok(true)) {
all_success = false;
}
}
}
all_success
} else {
check_repo(args)?
};
if all_success {
Ok(ExitCode::SUCCESS)
} else {
Ok(ExitCode::FAILURE)
}
}

/// Returns whether the check was successful
pub(crate) fn check_repo(args: &Args) -> anyhow::Result<bool> {
let start = Instant::now();

// Find files to check (or in this case, format twice). Adapted from ruff_cli
Expand All @@ -77,13 +109,20 @@ pub(crate) fn main(args: &Args) -> anyhow::Result<ExitCode> {
let (paths, _resolver) = python_files_in_path(&cli.files, &pyproject_config, &overrides)?;
assert!(!paths.is_empty(), "no python files in {:?}", cli.files);

let mut formatted_counter = 0;
let errors = paths
.into_iter()
.map(|dir_entry| {
// Doesn't make sense to recover here in this test script
let file = dir_entry
.expect("Iterating the files in the repository failed")
.into_path();
dir_entry.expect("Iterating the files in the repository failed")
})
.filter(|dir_entry| {
// For some reason it does not filter in the beginning
dir_entry.file_name() != "pyproject.toml"
})
.map(|dir_entry| {
let file = dir_entry.path().to_path_buf();
formatted_counter += 1;
// Handle panics (mostly in `debug_assert!`)
let result = match catch_unwind(|| check_file(&file)) {
Ok(result) => result,
Expand Down Expand Up @@ -166,20 +205,20 @@ Formatted twice:
}

if args.exit_first_error {
return Ok(ExitCode::FAILURE);
return Ok(false);
}
}
let duration = start.elapsed();
println!(
"Formatting {} files twice took {:.2}s",
cli.files.len(),
formatted_counter,
duration.as_secs_f32()
);

if any_errors {
Ok(ExitCode::FAILURE)
Ok(false)
} else {
Ok(ExitCode::SUCCESS)
Ok(true)
}
}

Expand Down
32 changes: 22 additions & 10 deletions scripts/check_ecosystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ class Repository(NamedTuple):
async def clone(self: Self, checkout_dir: Path) -> AsyncIterator[Path]:
"""Shallow clone this repository to a temporary directory."""
if checkout_dir.exists():
logger.debug(f"Reusing {self.org}/{self.repo}")
logger.debug(f"Reusing {self.org}:{self.repo}")
yield Path(checkout_dir)
return

logger.debug(f"Cloning {self.org}/{self.repo}")
logger.debug(f"Cloning {self.org}:{self.repo}")
git_command = [
"git",
"clone",
Expand Down Expand Up @@ -177,18 +177,17 @@ async def compare(
"""Check a specific repository against two versions of ruff."""
removed, added = set(), set()

# Allows to keep the checkouts locations
# By the default, the git clone are transient, but if the user provides a
# directory for permanent storage we keep it there
if checkouts:
checkout_parent = checkouts.joinpath(repo.org)
# Don't create the repodir itself, we need that for checking for existing
# clones
checkout_parent.mkdir(exist_ok=True, parents=True)
location_context = nullcontext(checkout_parent)
location_context = nullcontext(checkouts)
else:
location_context = tempfile.TemporaryDirectory()

with location_context as checkout_parent:
checkout_dir = Path(checkout_parent).joinpath(repo.repo)
assert ":" not in repo.org
assert ":" not in repo.repo
checkout_dir = Path(checkout_parent).joinpath(f"{repo.org}:{repo.repo}")
async with repo.clone(checkout_dir) as path:
try:
async with asyncio.TaskGroup() as tg:
Expand Down Expand Up @@ -284,8 +283,19 @@ async def main(

logger.debug(f"Checking {len(repositories)} projects")

# https://stackoverflow.com/a/61478547/3549270
# Otherwise doing 3k repositories can take >8GB RAM
semaphore = asyncio.Semaphore(50)

async def limited_parallelism(coroutine): # noqa: ANN
async with semaphore:
return await coroutine

results = await asyncio.gather(
*[compare(ruff1, ruff2, repo, checkouts) for repo in repositories.values()],
*[
limited_parallelism(compare(ruff1, ruff2, repo, checkouts))
for repo in repositories.values()
],
return_exceptions=True,
)

Expand Down Expand Up @@ -433,6 +443,8 @@ async def main(
logging.basicConfig(level=logging.INFO)

loop = asyncio.get_event_loop()
if args.checkouts:
args.checkouts.mkdir(exist_ok=True, parents=True)
main_task = asyncio.ensure_future(
main(
ruff1=args.ruff1,
Expand Down

0 comments on commit 03694ef

Please sign in to comment.