Skip to content

Commit

Permalink
walk: Add a cache for DirEntry metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
tavianator authored and sharkdp committed Oct 13, 2021
1 parent 02e9850 commit 7b5b3ec
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 24 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Upcoming release

## Performance improvements

- File metadata is now cached between the different filters that require it (e.g. `--owner`,
`--size`), reducing the number of `stat` syscalls when multiple filters are used; see #863

## Features
- Don't buffer command output from `--exec` when using a single thread. See #522

Expand All @@ -15,6 +20,8 @@
- Properly handle write errors to devices that are full, see #737
- Use local time zone for time functions (`--change-newer-than`, `--change-older-than`), see #631 (@jacobmischka)
- Support `--list-details` on more platforms (like BusyBox), see #783
- The filters `--owner`, `--size`, and `--changed-{within,before}` now apply to symbolic links
themselves, rather than the link target, except when `--follow` is specified; see #863

## Changes

Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ anyhow = "1.0"
dirs-next = "2.0"
normpath = "0.3"
chrono = "0.4"
once_cell = "1.8.0"

[dependencies.clap]
version = "2.31.3"
Expand Down
2 changes: 1 addition & 1 deletion src/filetypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ impl FileTypes {
|| (self.executables_only
&& !entry
.metadata()
.map(|m| filesystem::is_executable(&m))
.map(|m| filesystem::is_executable(m))
.unwrap_or(false))
|| (self.empty_only && !filesystem::is_empty(entry))
|| !(entry_type.is_file()
Expand Down
64 changes: 42 additions & 22 deletions src/walk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use std::time;
use anyhow::{anyhow, Result};
use ignore::overrides::OverrideBuilder;
use ignore::{self, WalkBuilder};
use once_cell::unsync::OnceCell;
use regex::bytes::Regex;

use crate::config::Config;
Expand Down Expand Up @@ -295,39 +296,58 @@ fn spawn_receiver(
})
}

pub enum DirEntry {
enum DirEntryInner {
Normal(ignore::DirEntry),
BrokenSymlink(PathBuf),
}

pub struct DirEntry {
inner: DirEntryInner,
metadata: OnceCell<Option<Metadata>>,
}

impl DirEntry {
fn normal(e: ignore::DirEntry) -> Self {
Self {
inner: DirEntryInner::Normal(e),
metadata: OnceCell::new(),
}
}

fn broken_symlink(path: PathBuf) -> Self {
Self {
inner: DirEntryInner::BrokenSymlink(path),
metadata: OnceCell::new(),
}
}

pub fn path(&self) -> &Path {
match self {
DirEntry::Normal(e) => e.path(),
DirEntry::BrokenSymlink(pathbuf) => pathbuf.as_path(),
match &self.inner {
DirEntryInner::Normal(e) => e.path(),
DirEntryInner::BrokenSymlink(pathbuf) => pathbuf.as_path(),
}
}

pub fn file_type(&self) -> Option<FileType> {
match self {
DirEntry::Normal(e) => e.file_type(),
DirEntry::BrokenSymlink(pathbuf) => {
pathbuf.symlink_metadata().map(|m| m.file_type()).ok()
}
match &self.inner {
DirEntryInner::Normal(e) => e.file_type(),
DirEntryInner::BrokenSymlink(_) => self.metadata().map(|m| m.file_type()),
}
}

pub fn metadata(&self) -> Option<Metadata> {
match self {
DirEntry::Normal(e) => e.metadata().ok(),
DirEntry::BrokenSymlink(_) => None,
}
pub fn metadata(&self) -> Option<&Metadata> {
self.metadata
.get_or_init(|| match &self.inner {
DirEntryInner::Normal(e) => e.metadata().ok(),
DirEntryInner::BrokenSymlink(path) => path.symlink_metadata().ok(),
})
.as_ref()
}

pub fn depth(&self) -> Option<usize> {
match self {
DirEntry::Normal(e) => Some(e.depth()),
DirEntry::BrokenSymlink(_) => None,
match &self.inner {
DirEntryInner::Normal(e) => Some(e.depth()),
DirEntryInner::BrokenSymlink(_) => None,
}
}
}
Expand Down Expand Up @@ -355,7 +375,7 @@ fn spawn_senders(
// Skip the root directory entry.
return ignore::WalkState::Continue;
}
Ok(e) => DirEntry::Normal(e),
Ok(e) => DirEntry::normal(e),
Err(ignore::Error::WithPath {
path,
err: inner_err,
Expand All @@ -367,7 +387,7 @@ fn spawn_senders(
.ok()
.map_or(false, |m| m.file_type().is_symlink()) =>
{
DirEntry::BrokenSymlink(path)
DirEntry::broken_symlink(path)
}
_ => {
return match tx_thread.send(WorkerResult::Error(ignore::Error::WithPath {
Expand Down Expand Up @@ -436,7 +456,7 @@ fn spawn_senders(
#[cfg(unix)]
{
if let Some(ref owner_constraint) = config.owner_constraint {
if let Ok(ref metadata) = entry_path.metadata() {
if let Some(metadata) = entry.metadata() {
if !owner_constraint.matches(metadata) {
return ignore::WalkState::Continue;
}
Expand All @@ -449,7 +469,7 @@ fn spawn_senders(
// Filter out unwanted sizes if it is a file and we have been given size constraints.
if !config.size_constraints.is_empty() {
if entry_path.is_file() {
if let Ok(metadata) = entry_path.metadata() {
if let Some(metadata) = entry.metadata() {
let file_size = metadata.len();
if config
.size_constraints
Expand All @@ -469,7 +489,7 @@ fn spawn_senders(
// Filter out unwanted modification times
if !config.time_constraints.is_empty() {
let mut matched = false;
if let Ok(metadata) = entry_path.metadata() {
if let Some(metadata) = entry.metadata() {
if let Ok(modified) = metadata.modified() {
matched = config
.time_constraints
Expand Down
16 changes: 15 additions & 1 deletion tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1652,9 +1652,22 @@ fn create_file_with_modified<P: AsRef<Path>>(path: P, duration_in_secs: u64) {
filetime::set_file_times(&path, ft, ft).expect("time modification failed");
}

#[cfg(test)]
fn remove_symlink<P: AsRef<Path>>(path: P) {
#[cfg(unix)]
fs::remove_file(path).expect("remove symlink");

// On Windows, symlinks remember whether they point to files or directories, so try both
#[cfg(windows)]
fs::remove_file(path.as_ref())
.or_else(|_| fs::remove_dir(path.as_ref()))
.expect("remove symlink");
}

#[test]
fn test_modified_relative() {
let te = TestEnv::new(&[], &[]);
remove_symlink(te.test_root().join("symlink"));
create_file_with_modified(te.test_root().join("foo_0_now"), 0);
create_file_with_modified(te.test_root().join("bar_1_min"), 60);
create_file_with_modified(te.test_root().join("foo_10_min"), 600);
Expand Down Expand Up @@ -1692,8 +1705,9 @@ fn change_file_modified<P: AsRef<Path>>(path: P, iso_date: &str) {
}

#[test]
fn test_modified_asolute() {
fn test_modified_absolute() {
let te = TestEnv::new(&[], &["15mar2018", "30dec2017"]);
remove_symlink(te.test_root().join("symlink"));
change_file_modified(te.test_root().join("15mar2018"), "2018-03-15T12:00:00Z");
change_file_modified(te.test_root().join("30dec2017"), "2017-12-30T23:59:00Z");

Expand Down

0 comments on commit 7b5b3ec

Please sign in to comment.