Skip to content

Commit

Permalink
Add a garbage collection mechanism to the CLI
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Feb 1, 2024
1 parent 51e8609 commit 1b2531c
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 9 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/puffin-cache/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ clap = { workspace = true, features = ["derive"], optional = true }
directories = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
nanoid = { workspace = true }
rustc-hash = { workspace = true }
serde = { workspace = true, features = ["derive"] }
tempfile = { workspace = true }
tracing = { workspace = true }
Expand Down
111 changes: 104 additions & 7 deletions crates/puffin-cache/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ use std::path::{Path, PathBuf};
use std::sync::Arc;

use fs_err as fs;
use rustc_hash::FxHashSet;
use tempfile::{tempdir, TempDir};
use tracing::debug;

use puffin_fs::directories;
use puffin_normalize::PackageName;
Expand Down Expand Up @@ -254,17 +256,97 @@ impl Cache {
/// Returns the number of entries removed from the cache.
pub fn remove(&self, name: &PackageName) -> Result<Removal, io::Error> {
let mut summary = Removal::default();
for bucket in [
CacheBucket::Wheels,
CacheBucket::BuiltWheels,
CacheBucket::Git,
CacheBucket::Interpreter,
CacheBucket::Simple,
] {
for bucket in CacheBucket::iter() {
summary += bucket.remove(self, name)?;
}
Ok(summary)
}

/// Run the garbage collector on the cache, removing any dangling entries.
pub fn prune(&self) -> Result<Removal, io::Error> {
let mut summary = Removal::default();

// First, remove any top-level directories that are unused. These typically represent
// outdated cache buckets (e.g., `wheels-v0`, when latest is `wheels-v1`).
for entry in fs::read_dir(&self.root)? {
let entry = entry?;
let metadata = entry.metadata()?;

if metadata.is_dir() {
// If the directory is not a cache bucket, remove it.
if !CacheBucket::iter().any(|bucket| entry.file_name() == bucket.to_str()) {
let path = entry.path();
debug!("Removing dangling cache entry {}", path.display());
summary += rm_rf(path)?;
}
} else {
// If the file is not a marker file, remove it.
if entry.file_name() != "CACHEDIR.TAG" && entry.file_name() != ".gitignore" {
let path = entry.path();
debug!("Removing dangling cache entry {}", path.display());
summary += rm_rf(path)?;
}
}
}

// Second, remove any unused source distributions.
let bucket = self.bucket(CacheBucket::BuiltWheels);
if bucket.is_dir() {
for entry in walkdir::WalkDir::new(self.bucket(CacheBucket::BuiltWheels)) {
let entry = entry?;
if entry.file_type().is_dir() {
// Identify the manifest file. The presence of a manifest file indicates a
// source distribution directory.
if entry.path().join("manifest.msgpack").exists() {
// Delete any entries older than the most recent directory. The most recent
// directory represent the latest source distribution build; any older
// directories are unused.
if let Some(created) = fs::read_dir(entry.path())?
.filter_map(Result::ok)
.filter_map(|entry| entry.metadata().ok())
.filter(std::fs::Metadata::is_dir)
.filter_map(|metadata| metadata.created().ok())
.max()
{
for entry in fs::read_dir(entry.path())? {
let entry = entry?;
if entry.metadata()?.created()? < created {
let path = entry.path();
debug!("Removing dangling cache entry {}", path.display());
summary += rm_rf(path)?;
}
}
}
}
}
}
}

// Third, remove any unused archives (by searching for archives that are not symlinked).
let mut references = FxHashSet::default();
for bucket in CacheBucket::iter() {
let bucket = self.bucket(bucket);
if bucket.is_dir() {
for entry in walkdir::WalkDir::new(bucket).contents_first(true) {
let entry = entry?;
if entry.file_type().is_symlink() {
references.insert(entry.path().read_link()?);
}
}
}
}

for entry in fs::read_dir(self.bucket(CacheBucket::Archive))? {
let entry = entry?;
if !references.contains(&entry.path()) {
let path = entry.path();
debug!("Removing dangling cache entry {}", path.display());
summary += rm_rf(path)?;
}
}

Ok(summary)
}
}

/// The different kinds of data in the cache are stored in different bucket, which in our case
Expand Down Expand Up @@ -604,6 +686,21 @@ impl CacheBucket {
}
Ok(summary)
}

/// Return an iterator over all cache buckets.
pub fn iter() -> impl Iterator<Item = CacheBucket> {
[
CacheBucket::Wheels,
CacheBucket::BuiltWheels,
CacheBucket::FlatIndex,
CacheBucket::Git,
CacheBucket::Interpreter,
CacheBucket::Simple,
CacheBucket::Archive,
]
.iter()
.copied()
}
}

impl Display for CacheBucket {
Expand Down
59 changes: 59 additions & 0 deletions crates/puffin/src/commands/clean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,65 @@ pub(crate) fn clean(
Ok(ExitStatus::Success)
}

/// Run garbage collection on the cache.
pub(crate) fn prune(cache: &Cache, mut printer: Printer) -> Result<ExitStatus> {
if !cache.root().exists() {
writeln!(
printer,
"No cache found at: {}",
cache.root().normalized_display().cyan()
)?;
return Ok(ExitStatus::Success);
}

writeln!(
printer,
"Pruning cache at: {}",
cache.root().normalized_display().cyan()
)?;

let summary = cache.prune().with_context(|| {
format!(
"Failed to prune cache at: {}",
cache.root().normalized_display()
)
})?;

// Write a summary of the number of files and directories removed.
match (summary.num_files, summary.num_dirs) {
(0, 0) => {
write!(printer, "No unused entries found")?;
}
(0, 1) => {
write!(printer, "Removed 1 directory")?;
}
(0, num_dirs_removed) => {
write!(printer, "Removed {num_dirs_removed} directories")?;
}
(1, _) => {
write!(printer, "Removed 1 file")?;
}
(num_files_removed, _) => {
write!(printer, "Removed {num_files_removed} files")?;
}
}

// If any, write a summary of the total byte count removed.
if summary.total_bytes > 0 {
let bytes = if summary.total_bytes < 1024 {
format!("{}B", summary.total_bytes)
} else {
let (bytes, unit) = human_readable_bytes(summary.total_bytes);
format!("{bytes:.1}{unit}")
};
write!(printer, " ({})", bytes.green())?;
}

writeln!(printer)?;

Ok(ExitStatus::Success)
}

/// Formats a number of bytes into a human readable SI-prefixed size.
///
/// Returns a tuple of `(quantity, units)`.
Expand Down
2 changes: 1 addition & 1 deletion crates/puffin/src/commands/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::process::ExitCode;
use std::time::Duration;

pub(crate) use add::add;
pub(crate) use clean::clean;
pub(crate) use clean::{clean, prune};
use distribution_types::InstalledMetadata;
pub(crate) use freeze::freeze;
pub(crate) use pip_compile::{extra_name_with_clap_error, pip_compile, Upgrade};
Expand Down
5 changes: 4 additions & 1 deletion crates/puffin/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,10 @@ enum Commands {
/// Create a virtual environment.
#[clap(alias = "virtualenv", alias = "v")]
Venv(VenvArgs),
/// Clear the cache.
/// Clear the cache, removing all entries or those linked to specific packages.
Clean(CleanArgs),
/// Prune all unreachable objects from the cache.
Prune,
/// Add a dependency to the workspace.
#[clap(hide = true)]
Add(AddArgs),
Expand Down Expand Up @@ -857,6 +859,7 @@ async fn run() -> Result<ExitStatus> {
command: PipCommand::Freeze(args),
}) => commands::freeze(&cache, args.strict, printer),
Commands::Clean(args) => commands::clean(&cache, &args.package, printer),
Commands::Prune => commands::prune(&cache, printer),
Commands::Venv(args) => {
let index_locations = IndexLocations::from_args(
args.index_url,
Expand Down

0 comments on commit 1b2531c

Please sign in to comment.