diff --git a/CHANGELOG.md b/CHANGELOG.md index 72119e243..d48c3177c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Upcoming release +## Performance improvements + +- File metadata is now cached between the different filters that require it (e.g. `--owner`, + `--size`), reducing the number of `stat` syscalls when multiple filters are used; see #863 + ## Features - Don't buffer command output from `--exec` when using a single thread. See #522 @@ -15,6 +20,8 @@ - Properly handle write errors to devices that are full, see #737 - Use local time zone for time functions (`--change-newer-than`, `--change-older-than`), see #631 (@jacobmischka) - Support `--list-details` on more platforms (like BusyBox), see #783 +- The filters `--owner`, `--size`, and `--changed-{within,before}` now apply to symbolic links + themselves, rather than the link target, except when `--follow` is specified; see #863 ## Changes diff --git a/Cargo.lock b/Cargo.lock index 71a75eb2e..7cfa182d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -177,6 +177,7 @@ dependencies = [ "lscolors", "normpath", "num_cpus", + "once_cell", "regex", "regex-syntax", "tempdir", diff --git a/Cargo.toml b/Cargo.toml index 20bb1f788..93fe84560 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,7 @@ anyhow = "1.0" dirs-next = "2.0" normpath = "0.3" chrono = "0.4" +once_cell = "1.8.0" [dependencies.clap] version = "2.31.3" diff --git a/src/filetypes.rs b/src/filetypes.rs index 3f9281d05..10872a070 100644 --- a/src/filetypes.rs +++ b/src/filetypes.rs @@ -37,7 +37,7 @@ impl FileTypes { || (self.executables_only && !entry .metadata() - .map(|m| filesystem::is_executable(&m)) + .map(|m| filesystem::is_executable(m)) .unwrap_or(false)) || (self.empty_only && !filesystem::is_empty(entry)) || !(entry_type.is_file() diff --git a/src/walk.rs b/src/walk.rs index cb50c4d78..7850ad7e7 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -13,6 +13,7 @@ use std::time; use anyhow::{anyhow, Result}; use ignore::overrides::OverrideBuilder; use ignore::{self, WalkBuilder}; +use once_cell::unsync::OnceCell; use regex::bytes::Regex; use crate::config::Config; @@ -295,39 +296,58 @@ fn spawn_receiver( }) } -pub enum DirEntry { +enum DirEntryInner { Normal(ignore::DirEntry), BrokenSymlink(PathBuf), } +pub struct DirEntry { + inner: DirEntryInner, + metadata: OnceCell>, +} + impl DirEntry { + fn normal(e: ignore::DirEntry) -> Self { + Self { + inner: DirEntryInner::Normal(e), + metadata: OnceCell::new(), + } + } + + fn broken_symlink(path: PathBuf) -> Self { + Self { + inner: DirEntryInner::BrokenSymlink(path), + metadata: OnceCell::new(), + } + } + pub fn path(&self) -> &Path { - match self { - DirEntry::Normal(e) => e.path(), - DirEntry::BrokenSymlink(pathbuf) => pathbuf.as_path(), + match &self.inner { + DirEntryInner::Normal(e) => e.path(), + DirEntryInner::BrokenSymlink(pathbuf) => pathbuf.as_path(), } } pub fn file_type(&self) -> Option { - match self { - DirEntry::Normal(e) => e.file_type(), - DirEntry::BrokenSymlink(pathbuf) => { - pathbuf.symlink_metadata().map(|m| m.file_type()).ok() - } + match &self.inner { + DirEntryInner::Normal(e) => e.file_type(), + DirEntryInner::BrokenSymlink(_) => self.metadata().map(|m| m.file_type()), } } - pub fn metadata(&self) -> Option { - match self { - DirEntry::Normal(e) => e.metadata().ok(), - DirEntry::BrokenSymlink(_) => None, - } + pub fn metadata(&self) -> Option<&Metadata> { + self.metadata + .get_or_init(|| match &self.inner { + DirEntryInner::Normal(e) => e.metadata().ok(), + DirEntryInner::BrokenSymlink(path) => path.symlink_metadata().ok(), + }) + .as_ref() } pub fn depth(&self) -> Option { - match self { - DirEntry::Normal(e) => Some(e.depth()), - DirEntry::BrokenSymlink(_) => None, + match &self.inner { + DirEntryInner::Normal(e) => Some(e.depth()), + DirEntryInner::BrokenSymlink(_) => None, } } } @@ -355,7 +375,7 @@ fn spawn_senders( // Skip the root directory entry. return ignore::WalkState::Continue; } - Ok(e) => DirEntry::Normal(e), + Ok(e) => DirEntry::normal(e), Err(ignore::Error::WithPath { path, err: inner_err, @@ -367,7 +387,7 @@ fn spawn_senders( .ok() .map_or(false, |m| m.file_type().is_symlink()) => { - DirEntry::BrokenSymlink(path) + DirEntry::broken_symlink(path) } _ => { return match tx_thread.send(WorkerResult::Error(ignore::Error::WithPath { @@ -436,7 +456,7 @@ fn spawn_senders( #[cfg(unix)] { if let Some(ref owner_constraint) = config.owner_constraint { - if let Ok(ref metadata) = entry_path.metadata() { + if let Some(metadata) = entry.metadata() { if !owner_constraint.matches(metadata) { return ignore::WalkState::Continue; } @@ -449,7 +469,7 @@ fn spawn_senders( // Filter out unwanted sizes if it is a file and we have been given size constraints. if !config.size_constraints.is_empty() { if entry_path.is_file() { - if let Ok(metadata) = entry_path.metadata() { + if let Some(metadata) = entry.metadata() { let file_size = metadata.len(); if config .size_constraints @@ -469,7 +489,7 @@ fn spawn_senders( // Filter out unwanted modification times if !config.time_constraints.is_empty() { let mut matched = false; - if let Ok(metadata) = entry_path.metadata() { + if let Some(metadata) = entry.metadata() { if let Ok(modified) = metadata.modified() { matched = config .time_constraints diff --git a/tests/tests.rs b/tests/tests.rs index 6278afb43..a7c04f49f 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1652,9 +1652,22 @@ fn create_file_with_modified>(path: P, duration_in_secs: u64) { filetime::set_file_times(&path, ft, ft).expect("time modification failed"); } +#[cfg(test)] +fn remove_symlink>(path: P) { + #[cfg(unix)] + fs::remove_file(path).expect("remove symlink"); + + // On Windows, symlinks remember whether they point to files or directories, so try both + #[cfg(windows)] + fs::remove_file(path.as_ref()) + .or_else(|_| fs::remove_dir(path.as_ref())) + .expect("remove symlink"); +} + #[test] fn test_modified_relative() { let te = TestEnv::new(&[], &[]); + remove_symlink(te.test_root().join("symlink")); create_file_with_modified(te.test_root().join("foo_0_now"), 0); create_file_with_modified(te.test_root().join("bar_1_min"), 60); create_file_with_modified(te.test_root().join("foo_10_min"), 600); @@ -1692,8 +1705,9 @@ fn change_file_modified>(path: P, iso_date: &str) { } #[test] -fn test_modified_asolute() { +fn test_modified_absolute() { let te = TestEnv::new(&[], &["15mar2018", "30dec2017"]); + remove_symlink(te.test_root().join("symlink")); change_file_modified(te.test_root().join("15mar2018"), "2018-03-15T12:00:00Z"); change_file_modified(te.test_root().join("30dec2017"), "2017-12-30T23:59:00Z");