diff --git a/Cargo.lock b/Cargo.lock index c6d076d2532e..bdc4e896fe22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4467,8 +4467,10 @@ dependencies = [ "distribution-types", "fs-err", "nanoid", + "rustc-hash", "serde", "tempfile", + "tracing", "url", "uv-fs", "uv-normalize", diff --git a/crates/uv-cache/Cargo.toml b/crates/uv-cache/Cargo.toml index dcf46a7ba9d4..4a47d5666af9 100644 --- a/crates/uv-cache/Cargo.toml +++ b/crates/uv-cache/Cargo.toml @@ -24,7 +24,9 @@ clap = { workspace = true, features = ["derive", "env"], optional = true } directories = { workspace = true } fs-err = { workspace = true, features = ["tokio"] } nanoid = { workspace = true } +rustc-hash = { workspace = true } serde = { workspace = true, features = ["derive"] } tempfile = { workspace = true } +tracing = { workspace = true } url = { workspace = true } walkdir = { workspace = true } diff --git a/crates/uv-cache/src/lib.rs b/crates/uv-cache/src/lib.rs index 98998623cc9a..b5ff0e87e141 100644 --- a/crates/uv-cache/src/lib.rs +++ b/crates/uv-cache/src/lib.rs @@ -6,10 +6,12 @@ use std::ops::Deref; use std::path::{Path, PathBuf}; use std::sync::Arc; -use distribution_types::InstalledDist; use fs_err as fs; +use rustc_hash::FxHashSet; use tempfile::{tempdir, TempDir}; +use tracing::debug; +use distribution_types::InstalledDist; use uv_fs::directories; use uv_normalize::PackageName; @@ -283,17 +285,72 @@ impl Cache { /// Returns the number of entries removed from the cache. pub fn remove(&self, name: &PackageName) -> Result { let mut summary = Removal::default(); - for bucket in [ - CacheBucket::Wheels, - CacheBucket::BuiltWheels, - CacheBucket::Git, - CacheBucket::Interpreter, - CacheBucket::Simple, - ] { + for bucket in CacheBucket::iter() { summary += bucket.remove(self, name)?; } Ok(summary) } + + /// Run the garbage collector on the cache, removing any dangling entries. + pub fn prune(&self) -> Result { + let mut summary = Removal::default(); + + // First, remove any top-level directories that are unused. These typically represent + // outdated cache buckets (e.g., `wheels-v0`, when latest is `wheels-v1`). + for entry in fs::read_dir(&self.root)? { + let entry = entry?; + let metadata = entry.metadata()?; + + if entry.file_name() == "CACHEDIR.TAG" + || entry.file_name() == ".gitignore" + || entry.file_name() == ".git" + { + continue; + } + + if metadata.is_dir() { + // If the directory is not a cache bucket, remove it. + if CacheBucket::iter().all(|bucket| entry.file_name() != bucket.to_str()) { + let path = entry.path(); + debug!("Removing dangling cache entry: {}", path.display()); + summary += rm_rf(path)?; + } + } else { + // If the file is not a marker file, remove it. + let path = entry.path(); + debug!("Removing dangling cache entry: {}", path.display()); + summary += rm_rf(path)?; + } + } + + // Second, remove any unused archives (by searching for archives that are not symlinked). + // TODO(charlie): Remove any unused source distributions. This requires introspecting the + // cache contents, e.g., reading and deserializing the manifests. + let mut references = FxHashSet::default(); + + for bucket in CacheBucket::iter() { + let bucket = self.bucket(bucket); + if bucket.is_dir() { + for entry in walkdir::WalkDir::new(bucket) { + let entry = entry?; + if entry.file_type().is_symlink() { + references.insert(entry.path().canonicalize()?); + } + } + } + } + + for entry in fs::read_dir(self.bucket(CacheBucket::Archive))? { + let entry = entry?; + let path = entry.path().canonicalize()?; + if !references.contains(&path) { + debug!("Removing dangling cache entry: {}", path.display()); + summary += rm_rf(path)?; + } + } + + Ok(summary) + } } /// The different kinds of data in the cache are stored in different bucket, which in our case @@ -633,6 +690,21 @@ impl CacheBucket { } Ok(summary) } + + /// Return an iterator over all cache buckets. + pub fn iter() -> impl Iterator { + [ + CacheBucket::Wheels, + CacheBucket::BuiltWheels, + CacheBucket::FlatIndex, + CacheBucket::Git, + CacheBucket::Interpreter, + CacheBucket::Simple, + CacheBucket::Archive, + ] + .iter() + .copied() + } } impl Display for CacheBucket { diff --git a/crates/uv/src/commands/cache_clean.rs b/crates/uv/src/commands/cache_clean.rs index 60e669f71e57..cf0eb50fe6ee 100644 --- a/crates/uv/src/commands/cache_clean.rs +++ b/crates/uv/src/commands/cache_clean.rs @@ -7,10 +7,10 @@ use uv_cache::Cache; use uv_fs::Simplified; use uv_normalize::PackageName; -use crate::commands::ExitStatus; +use crate::commands::{human_readable_bytes, ExitStatus}; use crate::printer::Printer; -/// Clear the cache. +/// Clear the cache, removing all entries or those linked to specific packages. pub(crate) fn cache_clean( packages: &[PackageName], cache: &Cache, @@ -123,19 +123,3 @@ pub(crate) fn cache_clean( Ok(ExitStatus::Success) } - -/// Formats a number of bytes into a human readable SI-prefixed size. -/// -/// Returns a tuple of `(quantity, units)`. -#[allow( - clippy::cast_possible_truncation, - clippy::cast_possible_wrap, - clippy::cast_precision_loss, - clippy::cast_sign_loss -)] -fn human_readable_bytes(bytes: u64) -> (f32, &'static str) { - static UNITS: [&str; 7] = ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"]; - let bytes = bytes as f32; - let i = ((bytes.log2() / 10.0) as usize).min(UNITS.len() - 1); - (bytes / 1024_f32.powi(i as i32), UNITS[i]) -} diff --git a/crates/uv/src/commands/cache_prune.rs b/crates/uv/src/commands/cache_prune.rs new file mode 100644 index 000000000000..c5c4b60d91fd --- /dev/null +++ b/crates/uv/src/commands/cache_prune.rs @@ -0,0 +1,66 @@ +use std::fmt::Write; + +use anyhow::{Context, Result}; +use owo_colors::OwoColorize; + +use uv_cache::Cache; +use uv_fs::Simplified; + +use crate::commands::{human_readable_bytes, ExitStatus}; +use crate::printer::Printer; + +/// Prune all unreachable objects from the cache. +pub(crate) fn cache_prune(cache: &Cache, printer: Printer) -> Result { + if !cache.root().exists() { + writeln!( + printer.stderr(), + "No cache found at: {}", + cache.root().user_display().cyan() + )?; + return Ok(ExitStatus::Success); + } + + writeln!( + printer.stderr(), + "Pruning cache at: {}", + cache.root().user_display().cyan() + )?; + + let summary = cache + .prune() + .with_context(|| format!("Failed to prune cache at: {}", cache.root().user_display()))?; + + // Write a summary of the number of files and directories removed. + match (summary.num_files, summary.num_dirs) { + (0, 0) => { + write!(printer.stderr(), "No unused entries found")?; + } + (0, 1) => { + write!(printer.stderr(), "Removed 1 directory")?; + } + (0, num_dirs_removed) => { + write!(printer.stderr(), "Removed {num_dirs_removed} directories")?; + } + (1, _) => { + write!(printer.stderr(), "Removed 1 file")?; + } + (num_files_removed, _) => { + write!(printer.stderr(), "Removed {num_files_removed} files")?; + } + } + + // If any, write a summary of the total byte count removed. + if summary.total_bytes > 0 { + let bytes = if summary.total_bytes < 1024 { + format!("{}B", summary.total_bytes) + } else { + let (bytes, unit) = human_readable_bytes(summary.total_bytes); + format!("{bytes:.1}{unit}") + }; + write!(printer.stderr(), " ({})", bytes.green())?; + } + + writeln!(printer.stderr())?; + + Ok(ExitStatus::Success) +} diff --git a/crates/uv/src/commands/mod.rs b/crates/uv/src/commands/mod.rs index 1b3e5d22dbe2..fa8c2cd815b4 100644 --- a/crates/uv/src/commands/mod.rs +++ b/crates/uv/src/commands/mod.rs @@ -6,6 +6,7 @@ use owo_colors::OwoColorize; pub(crate) use cache_clean::cache_clean; pub(crate) use cache_dir::cache_dir; +pub(crate) use cache_prune::cache_prune; use distribution_types::InstalledMetadata; pub(crate) use pip_check::pip_check; pub(crate) use pip_compile::{extra_name_with_clap_error, pip_compile}; @@ -28,6 +29,7 @@ use crate::printer::Printer; mod cache_clean; mod cache_dir; +mod cache_prune; mod pip_check; mod pip_compile; mod pip_freeze; @@ -155,3 +157,19 @@ pub(super) async fn compile_bytecode( )?; Ok(()) } + +/// Formats a number of bytes into a human readable SI-prefixed size. +/// +/// Returns a tuple of `(quantity, units)`. +#[allow( + clippy::cast_possible_truncation, + clippy::cast_possible_wrap, + clippy::cast_precision_loss, + clippy::cast_sign_loss +)] +pub(super) fn human_readable_bytes(bytes: u64) -> (f32, &'static str) { + static UNITS: [&str; 7] = ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"]; + let bytes = bytes as f32; + let i = ((bytes.log2() / 10.0) as usize).min(UNITS.len() - 1); + (bytes / 1024_f32.powi(i as i32), UNITS[i]) +} diff --git a/crates/uv/src/main.rs b/crates/uv/src/main.rs index cd3c8b4abf2e..b171f601392a 100644 --- a/crates/uv/src/main.rs +++ b/crates/uv/src/main.rs @@ -137,7 +137,7 @@ enum Commands { /// Manage the `uv` executable. #[clap(name = "self")] Self_(SelfNamespace), - /// Remove all items from the cache. + /// Clear the cache, removing all entries or those linked to specific packages. #[clap(hide = true)] Clean(CleanArgs), /// Display uv's version @@ -170,8 +170,10 @@ struct CacheNamespace { #[derive(Subcommand)] enum CacheCommand { - /// Remove all items from the cache. + /// Clear the cache, removing all entries or those linked to specific packages. Clean(CleanArgs), + /// Prune all unreachable objects from the cache. + Prune, /// Show the cache directory. Dir, } @@ -1759,6 +1761,9 @@ async fn run() -> Result { command: CacheCommand::Clean(args), }) | Commands::Clean(args) => commands::cache_clean(&args.package, &cache, printer), + Commands::Cache(CacheNamespace { + command: CacheCommand::Prune, + }) => commands::cache_prune(&cache, printer), Commands::Cache(CacheNamespace { command: CacheCommand::Dir, }) => { diff --git a/crates/uv/tests/cache_prune.rs b/crates/uv/tests/cache_prune.rs new file mode 100644 index 000000000000..0fd85238fcd6 --- /dev/null +++ b/crates/uv/tests/cache_prune.rs @@ -0,0 +1,171 @@ +#![cfg(all(feature = "python", feature = "pypi"))] + +use std::process::Command; + +use anyhow::Result; +use assert_cmd::prelude::*; +use assert_fs::prelude::*; + +use common::uv_snapshot; + +use crate::common::{get_bin, TestContext, INSTA_FILTERS}; + +mod common; + +/// Create a `cache prune` command with options shared across scenarios. +fn prune_command(context: &TestContext) -> Command { + let mut command = Command::new(get_bin()); + command + .arg("cache") + .arg("prune") + .arg("--cache-dir") + .arg(context.cache_dir.path()) + .env("VIRTUAL_ENV", context.venv.as_os_str()) + .current_dir(&context.temp_dir); + + if cfg!(all(windows, debug_assertions)) { + // TODO(konstin): Reduce stack usage in debug mode enough that the tests pass with the + // default windows stack of 1MB + command.env("UV_STACK_SIZE", (8 * 1024 * 1024).to_string()); + } + + command +} + +/// Create a `pip sync` command with options shared across scenarios. +fn sync_command(context: &TestContext) -> Command { + let mut command = Command::new(get_bin()); + command + .arg("pip") + .arg("sync") + .arg("--cache-dir") + .arg(context.cache_dir.path()) + .env("VIRTUAL_ENV", context.venv.as_os_str()) + .current_dir(&context.temp_dir); + + if cfg!(all(windows, debug_assertions)) { + // TODO(konstin): Reduce stack usage in debug mode enough that the tests pass with the + // default windows stack of 1MB + command.env("UV_STACK_SIZE", (8 * 1024 * 1024).to_string()); + } + + command +} + +/// `cache prune` should be a no-op if there's nothing out-of-date in the cache. +#[test] +fn prune_no_op() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str("anyio")?; + + // Install a requirement, to populate the cache. + sync_command(&context) + .arg("requirements.txt") + .assert() + .success(); + + let filters = [(r"Pruning cache at: .*", "Pruning cache at: [CACHE_DIR]")] + .into_iter() + .chain(INSTA_FILTERS.to_vec()) + .collect::>(); + + uv_snapshot!(filters, prune_command(&context).arg("--verbose"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Pruning cache at: [CACHE_DIR] + No unused entries found + "###); + + Ok(()) +} + +/// `cache prune` should remove any stale top-level directories from the cache. +#[test] +fn prune_stale_directory() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str("anyio")?; + + // Install a requirement, to populate the cache. + sync_command(&context) + .arg("requirements.txt") + .assert() + .success(); + + // Add a stale directory to the cache. + let simple = context.cache_dir.child("simple-v4"); + simple.create_dir_all()?; + + let filters = [ + (r"Pruning cache at: .*", "Pruning cache at: [CACHE_DIR]"), + ( + r"Removing dangling cache entry: .*[\\|/]simple-v4", + "Pruning cache at: [CACHE_DIR]/simple-v4", + ), + ] + .into_iter() + .chain(INSTA_FILTERS.to_vec()) + .collect::>(); + + uv_snapshot!(filters, prune_command(&context).arg("--verbose"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Pruning cache at: [CACHE_DIR] + DEBUG Pruning cache at: [CACHE_DIR]/simple-v4 + Removed 1 directory + "###); + + Ok(()) +} + +/// `cache prune` should remove any stale symlink from the cache. +#[test] +fn prune_stale_symlink() -> Result<()> { + let context = TestContext::new("3.12"); + + let requirements_txt = context.temp_dir.child("requirements.txt"); + requirements_txt.write_str("anyio")?; + + // Install a requirement, to populate the cache. + sync_command(&context) + .arg("requirements.txt") + .assert() + .success(); + + // Remove the wheels directory, causing the symlink to become stale. + let wheels = context.cache_dir.child("wheels-v0"); + fs_err::remove_dir_all(wheels)?; + + let filters = [ + (r"Pruning cache at: .*", "Pruning cache at: [CACHE_DIR]"), + ( + r"Removing dangling cache entry: .*[\\|/]archive-v0[\\|/].*", + "Pruning cache at: [CACHE_DIR]/archive-v0/anyio", + ), + ] + .into_iter() + .chain(INSTA_FILTERS.to_vec()) + .collect::>(); + + uv_snapshot!(filters, prune_command(&context).arg("--verbose"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Pruning cache at: [CACHE_DIR] + DEBUG Pruning cache at: [CACHE_DIR]/archive-v0/anyio + Removed 44 files ([SIZE]) + "###); + + Ok(()) +}