Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

walk: Add a cache for DirEntry metadata #863

Merged
merged 1 commit into from
Oct 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Upcoming release

## Performance improvements

- File metadata is now cached between the different filters that require it (e.g. `--owner`,
`--size`), reducing the number of `stat` syscalls when multiple filters are used; see #863

## Features
- Don't buffer command output from `--exec` when using a single thread. See #522

Expand All @@ -15,6 +20,8 @@
- Properly handle write errors to devices that are full, see #737
- Use local time zone for time functions (`--change-newer-than`, `--change-older-than`), see #631 (@jacobmischka)
- Support `--list-details` on more platforms (like BusyBox), see #783
- The filters `--owner`, `--size`, and `--changed-{within,before}` now apply to symbolic links
themselves, rather than the link target, except when `--follow` is specified; see #863

## Changes

Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ anyhow = "1.0"
dirs-next = "2.0"
normpath = "0.3"
chrono = "0.4"
once_cell = "1.8.0"

[dependencies.clap]
version = "2.31.3"
Expand Down
2 changes: 1 addition & 1 deletion src/filetypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ impl FileTypes {
|| (self.executables_only
&& !entry
.metadata()
.map(|m| filesystem::is_executable(&m))
.map(|m| filesystem::is_executable(m))
.unwrap_or(false))
|| (self.empty_only && !filesystem::is_empty(entry))
|| !(entry_type.is_file()
Expand Down
64 changes: 42 additions & 22 deletions src/walk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use std::time;
use anyhow::{anyhow, Result};
use ignore::overrides::OverrideBuilder;
use ignore::{self, WalkBuilder};
use once_cell::unsync::OnceCell;
use regex::bytes::Regex;

use crate::config::Config;
Expand Down Expand Up @@ -295,39 +296,58 @@ fn spawn_receiver(
})
}

pub enum DirEntry {
enum DirEntryInner {
Normal(ignore::DirEntry),
BrokenSymlink(PathBuf),
}

pub struct DirEntry {
inner: DirEntryInner,
metadata: OnceCell<Option<Metadata>>,
}

impl DirEntry {
fn normal(e: ignore::DirEntry) -> Self {
Self {
inner: DirEntryInner::Normal(e),
metadata: OnceCell::new(),
}
}

fn broken_symlink(path: PathBuf) -> Self {
Self {
inner: DirEntryInner::BrokenSymlink(path),
metadata: OnceCell::new(),
}
}

pub fn path(&self) -> &Path {
match self {
DirEntry::Normal(e) => e.path(),
DirEntry::BrokenSymlink(pathbuf) => pathbuf.as_path(),
match &self.inner {
DirEntryInner::Normal(e) => e.path(),
DirEntryInner::BrokenSymlink(pathbuf) => pathbuf.as_path(),
}
}

pub fn file_type(&self) -> Option<FileType> {
match self {
DirEntry::Normal(e) => e.file_type(),
DirEntry::BrokenSymlink(pathbuf) => {
pathbuf.symlink_metadata().map(|m| m.file_type()).ok()
}
match &self.inner {
DirEntryInner::Normal(e) => e.file_type(),
DirEntryInner::BrokenSymlink(_) => self.metadata().map(|m| m.file_type()),
}
}

pub fn metadata(&self) -> Option<Metadata> {
match self {
DirEntry::Normal(e) => e.metadata().ok(),
DirEntry::BrokenSymlink(_) => None,
}
pub fn metadata(&self) -> Option<&Metadata> {
self.metadata
.get_or_init(|| match &self.inner {
DirEntryInner::Normal(e) => e.metadata().ok(),
DirEntryInner::BrokenSymlink(path) => path.symlink_metadata().ok(),
})
.as_ref()
}

pub fn depth(&self) -> Option<usize> {
match self {
DirEntry::Normal(e) => Some(e.depth()),
DirEntry::BrokenSymlink(_) => None,
match &self.inner {
DirEntryInner::Normal(e) => Some(e.depth()),
DirEntryInner::BrokenSymlink(_) => None,
}
}
}
Expand Down Expand Up @@ -355,7 +375,7 @@ fn spawn_senders(
// Skip the root directory entry.
return ignore::WalkState::Continue;
}
Ok(e) => DirEntry::Normal(e),
Ok(e) => DirEntry::normal(e),
Err(ignore::Error::WithPath {
path,
err: inner_err,
Expand All @@ -367,7 +387,7 @@ fn spawn_senders(
.ok()
.map_or(false, |m| m.file_type().is_symlink()) =>
{
DirEntry::BrokenSymlink(path)
DirEntry::broken_symlink(path)
}
_ => {
return match tx_thread.send(WorkerResult::Error(ignore::Error::WithPath {
Expand Down Expand Up @@ -436,7 +456,7 @@ fn spawn_senders(
#[cfg(unix)]
{
if let Some(ref owner_constraint) = config.owner_constraint {
if let Ok(ref metadata) = entry_path.metadata() {
if let Some(metadata) = entry.metadata() {
if !owner_constraint.matches(metadata) {
return ignore::WalkState::Continue;
}
Expand All @@ -449,7 +469,7 @@ fn spawn_senders(
// Filter out unwanted sizes if it is a file and we have been given size constraints.
if !config.size_constraints.is_empty() {
if entry_path.is_file() {
if let Ok(metadata) = entry_path.metadata() {
if let Some(metadata) = entry.metadata() {
let file_size = metadata.len();
if config
.size_constraints
Expand All @@ -469,7 +489,7 @@ fn spawn_senders(
// Filter out unwanted modification times
if !config.time_constraints.is_empty() {
let mut matched = false;
if let Ok(metadata) = entry_path.metadata() {
if let Some(metadata) = entry.metadata() {
if let Ok(modified) = metadata.modified() {
matched = config
.time_constraints
Expand Down
16 changes: 15 additions & 1 deletion tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1652,9 +1652,22 @@ fn create_file_with_modified<P: AsRef<Path>>(path: P, duration_in_secs: u64) {
filetime::set_file_times(&path, ft, ft).expect("time modification failed");
}

#[cfg(test)]
fn remove_symlink<P: AsRef<Path>>(path: P) {
#[cfg(unix)]
fs::remove_file(path).expect("remove symlink");

// On Windows, symlinks remember whether they point to files or directories, so try both
#[cfg(windows)]
fs::remove_file(path.as_ref())
.or_else(|_| fs::remove_dir(path.as_ref()))
.expect("remove symlink");
}

#[test]
fn test_modified_relative() {
let te = TestEnv::new(&[], &[]);
remove_symlink(te.test_root().join("symlink"));
create_file_with_modified(te.test_root().join("foo_0_now"), 0);
create_file_with_modified(te.test_root().join("bar_1_min"), 60);
create_file_with_modified(te.test_root().join("foo_10_min"), 600);
Expand Down Expand Up @@ -1692,8 +1705,9 @@ fn change_file_modified<P: AsRef<Path>>(path: P, iso_date: &str) {
}

#[test]
fn test_modified_asolute() {
fn test_modified_absolute() {
let te = TestEnv::new(&[], &["15mar2018", "30dec2017"]);
remove_symlink(te.test_root().join("symlink"));
change_file_modified(te.test_root().join("15mar2018"), "2018-03-15T12:00:00Z");
change_file_modified(te.test_root().join("30dec2017"), "2017-12-30T23:59:00Z");

Expand Down