From 84f4b4ef688413c227be226aa071bbc9298c394b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Behnam=20Esfahbod=20=E2=9C=85?= Date: Wed, 12 Jul 2017 20:06:08 -0600 Subject: [PATCH] [ignore] Add extensive test for gitignore matching (#551) [ignore] tests and new matched_path_or_any_parents method The test data (gitignore rules and expected result) is based on the test repo at . The new `matched_path_or_any_parents` method fixes a bug in gitignore matching where rules of form `/*` result in ignoring only first-level files, but no deep files. This is not correct, as `/*` matches the first-level directories under ``, resulting all to be ignored. The new method fixes it by trying to match all parents in the path against the gitignore rules. The new method is necessary because it necessarily entails a performance hit for trying to match all parents. --- .travis.yml | 1 + appveyor.yml | 1 + ignore/src/gitignore.rs | 46 ++- ...atched_path_or_any_parents_tests.gitignore | 216 ++++++++++++++ ...gnore_matched_path_or_any_parents_tests.rs | 263 ++++++++++++++++++ 5 files changed, 524 insertions(+), 3 deletions(-) create mode 100644 ignore/tests/gitignore_matched_path_or_any_parents_tests.gitignore create mode 100644 ignore/tests/gitignore_matched_path_or_any_parents_tests.rs diff --git a/.travis.yml b/.travis.yml index 7c3b872d0..52022134f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,7 @@ language: rust env: global: - PROJECT_NAME=ripgrep + - RUST_BACKTRACE: full matrix: include: # Nightly channel. diff --git a/appveyor.yml b/appveyor.yml index d2de7e3b2..2aa4c526c 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,6 +1,7 @@ environment: global: PROJECT_NAME: ripgrep + RUST_BACKTRACE: full matrix: - TARGET: i686-pc-windows-gnu CHANNEL: stable diff --git a/ignore/src/gitignore.rs b/ignore/src/gitignore.rs index 7ae91bf80..b44428a37 100644 --- a/ignore/src/gitignore.rs +++ b/ignore/src/gitignore.rs @@ -169,8 +169,8 @@ impl Gitignore { self.num_whitelists } - /// Returns whether the given file path matched a pattern in this gitignore - /// matcher. + /// Returns whether the given path (file or directory) matched a pattern in + /// this gitignore matcher. /// /// `is_dir` should be true if the path refers to a directory and false /// otherwise. @@ -191,6 +191,46 @@ impl Gitignore { self.matched_stripped(self.strip(path.as_ref()), is_dir) } + /// Returns whether the given path (file or directory, and expected to be + /// under the root) or any of its parent directories (up to the root) + /// matched a pattern in this gitignore matcher. + /// + /// NOTE: This method is more expensive than walking the directory hierarchy + /// top-to-bottom and matching the entries. But, is easier to use in cases + /// when a list of paths are available without a hierarchy. + /// + /// `is_dir` should be true if the path refers to a directory and false + /// otherwise. + /// + /// The given path is matched relative to the path given when building + /// the matcher. Specifically, before matching `path`, its prefix (as + /// determined by a common suffix of the directory containing this + /// gitignore) is stripped. If there is no common suffix/prefix overlap, + /// then `path` is assumed to be relative to this matcher. + pub fn matched_path_or_any_parents>( + &self, + path: P, + is_dir: bool, + ) -> Match<&Glob> { + if self.is_empty() { + return Match::None; + } + let mut path = self.strip(path.as_ref()); + debug_assert!( + !path.has_root(), + "path is expect to be under the root" + ); + loop { + match self.matched_stripped(path, is_dir) { + Match::None => match path.parent() { + Some(parent) => path = parent, + None => return Match::None, + }, + a_match => return a_match, + } + } + } + /// Like matched, but takes a path that has already been stripped. fn matched_stripped>( &self, @@ -440,7 +480,7 @@ impl GitignoreBuilder { } /// Toggle whether the globs should be matched case insensitively or not. - /// + /// /// This is disabled by default. pub fn case_insensitive( &mut self, yes: bool diff --git a/ignore/tests/gitignore_matched_path_or_any_parents_tests.gitignore b/ignore/tests/gitignore_matched_path_or_any_parents_tests.gitignore new file mode 100644 index 000000000..ac09e12f7 --- /dev/null +++ b/ignore/tests/gitignore_matched_path_or_any_parents_tests.gitignore @@ -0,0 +1,216 @@ +# Based on https://github.com/behnam/gitignore-test/blob/master/.gitignore + +### file in root + +# MATCH /file_root_1 +file_root_00 + +# NO_MATCH +file_root_01/ + +# NO_MATCH +file_root_02/* + +# NO_MATCH +file_root_03/** + + +# MATCH /file_root_10 +/file_root_10 + +# NO_MATCH +/file_root_11/ + +# NO_MATCH +/file_root_12/* + +# NO_MATCH +/file_root_13/** + + +# NO_MATCH +*/file_root_20 + +# NO_MATCH +*/file_root_21/ + +# NO_MATCH +*/file_root_22/* + +# NO_MATCH +*/file_root_23/** + + +# MATCH /file_root_30 +**/file_root_30 + +# NO_MATCH +**/file_root_31/ + +# NO_MATCH +**/file_root_32/* + +# NO_MATCH +**/file_root_33/** + + +### file in sub-dir + +# MATCH /parent_dir/file_deep_1 +file_deep_00 + +# NO_MATCH +file_deep_01/ + +# NO_MATCH +file_deep_02/* + +# NO_MATCH +file_deep_03/** + + +# NO_MATCH +/file_deep_10 + +# NO_MATCH +/file_deep_11/ + +# NO_MATCH +/file_deep_12/* + +# NO_MATCH +/file_deep_13/** + + +# MATCH /parent_dir/file_deep_20 +*/file_deep_20 + +# NO_MATCH +*/file_deep_21/ + +# NO_MATCH +*/file_deep_22/* + +# NO_MATCH +*/file_deep_23/** + + +# MATCH /parent_dir/file_deep_30 +**/file_deep_30 + +# NO_MATCH +**/file_deep_31/ + +# NO_MATCH +**/file_deep_32/* + +# NO_MATCH +**/file_deep_33/** + + +### dir in root + +# MATCH /dir_root_00 +dir_root_00 + +# MATCH /dir_root_01 +dir_root_01/ + +# MATCH /dir_root_02 +dir_root_02/* + +# MATCH /dir_root_03 +dir_root_03/** + + +# MATCH /dir_root_10 +/dir_root_10 + +# MATCH /dir_root_11 +/dir_root_11/ + +# MATCH /dir_root_12 +/dir_root_12/* + +# MATCH /dir_root_13 +/dir_root_13/** + + +# NO_MATCH +*/dir_root_20 + +# NO_MATCH +*/dir_root_21/ + +# NO_MATCH +*/dir_root_22/* + +# NO_MATCH +*/dir_root_23/** + + +# MATCH /dir_root_30 +**/dir_root_30 + +# MATCH /dir_root_31 +**/dir_root_31/ + +# MATCH /dir_root_32 +**/dir_root_32/* + +# MATCH /dir_root_33 +**/dir_root_33/** + + +### dir in sub-dir + +# MATCH /parent_dir/dir_deep_00 +dir_deep_00 + +# MATCH /parent_dir/dir_deep_01 +dir_deep_01/ + +# NO_MATCH +dir_deep_02/* + +# NO_MATCH +dir_deep_03/** + + +# NO_MATCH +/dir_deep_10 + +# NO_MATCH +/dir_deep_11/ + +# NO_MATCH +/dir_deep_12/* + +# NO_MATCH +/dir_deep_13/** + + +# MATCH /parent_dir/dir_deep_20 +*/dir_deep_20 + +# MATCH /parent_dir/dir_deep_21 +*/dir_deep_21/ + +# MATCH /parent_dir/dir_deep_22 +*/dir_deep_22/* + +# MATCH /parent_dir/dir_deep_23 +*/dir_deep_23/** + + +# MATCH /parent_dir/dir_deep_30 +**/dir_deep_30 + +# MATCH /parent_dir/dir_deep_31 +**/dir_deep_31/ + +# MATCH /parent_dir/dir_deep_32 +**/dir_deep_32/* + +# MATCH /parent_dir/dir_deep_33 +**/dir_deep_33/** diff --git a/ignore/tests/gitignore_matched_path_or_any_parents_tests.rs b/ignore/tests/gitignore_matched_path_or_any_parents_tests.rs new file mode 100644 index 000000000..1b3f61842 --- /dev/null +++ b/ignore/tests/gitignore_matched_path_or_any_parents_tests.rs @@ -0,0 +1,263 @@ +extern crate ignore; + + +use std::path::Path; + +use ignore::gitignore::{Gitignore, GitignoreBuilder}; + + +const IGNORE_FILE: &'static str = "tests/gitignore_matched_path_or_any_parents_tests.gitignore"; + + +fn get_gitignore() -> Gitignore { + let mut builder = GitignoreBuilder::new("ROOT"); + let error = builder.add(IGNORE_FILE); + assert!(error.is_none(), "failed to open gitignore file"); + builder.build().unwrap() +} + + +#[test] +#[should_panic(expected = "path is expect to be under the root")] +fn test_path_should_be_under_root() { + let gitignore = get_gitignore(); + let path = "/tmp/some_file"; + gitignore.matched_path_or_any_parents(Path::new(path), false); + assert!(false); +} + + +#[test] +fn test_files_in_root() { + let gitignore = get_gitignore(); + let m = |path: &str| gitignore.matched_path_or_any_parents(Path::new(path), false); + + // 0x + assert!(m("ROOT/file_root_00").is_ignore()); + assert!(m("ROOT/file_root_01").is_none()); + assert!(m("ROOT/file_root_02").is_none()); + assert!(m("ROOT/file_root_03").is_none()); + + // 1x + assert!(m("ROOT/file_root_10").is_ignore()); + assert!(m("ROOT/file_root_11").is_none()); + assert!(m("ROOT/file_root_12").is_none()); + assert!(m("ROOT/file_root_13").is_none()); + + // 2x + assert!(m("ROOT/file_root_20").is_none()); + assert!(m("ROOT/file_root_21").is_none()); + assert!(m("ROOT/file_root_22").is_none()); + assert!(m("ROOT/file_root_23").is_none()); + + // 3x + assert!(m("ROOT/file_root_30").is_ignore()); + assert!(m("ROOT/file_root_31").is_none()); + assert!(m("ROOT/file_root_32").is_none()); + assert!(m("ROOT/file_root_33").is_none()); +} + + +#[test] +fn test_files_in_deep() { + let gitignore = get_gitignore(); + let m = |path: &str| gitignore.matched_path_or_any_parents(Path::new(path), false); + + // 0x + assert!(m("ROOT/parent_dir/file_deep_00").is_ignore()); + assert!(m("ROOT/parent_dir/file_deep_01").is_none()); + assert!(m("ROOT/parent_dir/file_deep_02").is_none()); + assert!(m("ROOT/parent_dir/file_deep_03").is_none()); + + // 1x + assert!(m("ROOT/parent_dir/file_deep_10").is_none()); + assert!(m("ROOT/parent_dir/file_deep_11").is_none()); + assert!(m("ROOT/parent_dir/file_deep_12").is_none()); + assert!(m("ROOT/parent_dir/file_deep_13").is_none()); + + // 2x + assert!(m("ROOT/parent_dir/file_deep_20").is_ignore()); + assert!(m("ROOT/parent_dir/file_deep_21").is_none()); + assert!(m("ROOT/parent_dir/file_deep_22").is_none()); + assert!(m("ROOT/parent_dir/file_deep_23").is_none()); + + // 3x + assert!(m("ROOT/parent_dir/file_deep_30").is_ignore()); + assert!(m("ROOT/parent_dir/file_deep_31").is_none()); + assert!(m("ROOT/parent_dir/file_deep_32").is_none()); + assert!(m("ROOT/parent_dir/file_deep_33").is_none()); +} + + +#[test] +fn test_dirs_in_root() { + let gitignore = get_gitignore(); + let m = |path: &str| gitignore.matched_path_or_any_parents(Path::new(path), true); + + // 00 + assert!(m("ROOT/dir_root_00").is_ignore()); + assert!(m("ROOT/dir_root_00/file").is_ignore()); + assert!(m("ROOT/dir_root_00/child_dir/file").is_ignore()); + + // 01 + assert!(m("ROOT/dir_root_01").is_ignore()); + assert!(m("ROOT/dir_root_01/file").is_ignore()); + assert!(m("ROOT/dir_root_01/child_dir/file").is_ignore()); + + // 02 + assert!(m("ROOT/dir_root_02").is_none()); // dir itself doesn't match + assert!(m("ROOT/dir_root_02/file").is_ignore()); + assert!(m("ROOT/dir_root_02/child_dir/file").is_ignore()); + + // 03 + assert!(m("ROOT/dir_root_03").is_none()); // dir itself doesn't match + assert!(m("ROOT/dir_root_03/file").is_ignore()); + assert!(m("ROOT/dir_root_03/child_dir/file").is_ignore()); + + // 10 + assert!(m("ROOT/dir_root_10").is_ignore()); + assert!(m("ROOT/dir_root_10/file").is_ignore()); + assert!(m("ROOT/dir_root_10/child_dir/file").is_ignore()); + + // 11 + assert!(m("ROOT/dir_root_11").is_ignore()); + assert!(m("ROOT/dir_root_11/file").is_ignore()); + assert!(m("ROOT/dir_root_11/child_dir/file").is_ignore()); + + // 12 + assert!(m("ROOT/dir_root_12").is_none()); // dir itself doesn't match + assert!(m("ROOT/dir_root_12/file").is_ignore()); + assert!(m("ROOT/dir_root_12/child_dir/file").is_ignore()); + + // 13 + assert!(m("ROOT/dir_root_13").is_none()); + assert!(m("ROOT/dir_root_13/file").is_ignore()); + assert!(m("ROOT/dir_root_13/child_dir/file").is_ignore()); + + // 20 + assert!(m("ROOT/dir_root_20").is_none()); + assert!(m("ROOT/dir_root_20/file").is_none()); + assert!(m("ROOT/dir_root_20/child_dir/file").is_none()); + + // 21 + assert!(m("ROOT/dir_root_21").is_none()); + assert!(m("ROOT/dir_root_21/file").is_none()); + assert!(m("ROOT/dir_root_21/child_dir/file").is_none()); + + // 22 + assert!(m("ROOT/dir_root_22").is_none()); + assert!(m("ROOT/dir_root_22/file").is_none()); + assert!(m("ROOT/dir_root_22/child_dir/file").is_none()); + + // 23 + assert!(m("ROOT/dir_root_23").is_none()); + assert!(m("ROOT/dir_root_23/file").is_none()); + assert!(m("ROOT/dir_root_23/child_dir/file").is_none()); + + // 30 + assert!(m("ROOT/dir_root_30").is_ignore()); + assert!(m("ROOT/dir_root_30/file").is_ignore()); + assert!(m("ROOT/dir_root_30/child_dir/file").is_ignore()); + + // 31 + assert!(m("ROOT/dir_root_31").is_ignore()); + assert!(m("ROOT/dir_root_31/file").is_ignore()); + assert!(m("ROOT/dir_root_31/child_dir/file").is_ignore()); + + // 32 + assert!(m("ROOT/dir_root_32").is_none()); // dir itself doesn't match + assert!(m("ROOT/dir_root_32/file").is_ignore()); + assert!(m("ROOT/dir_root_32/child_dir/file").is_ignore()); + + // 33 + assert!(m("ROOT/dir_root_33").is_none()); // dir itself doesn't match + assert!(m("ROOT/dir_root_33/file").is_ignore()); + assert!(m("ROOT/dir_root_33/child_dir/file").is_ignore()); +} + + +#[test] +fn test_dirs_in_deep() { + let gitignore = get_gitignore(); + let m = |path: &str| gitignore.matched_path_or_any_parents(Path::new(path), true); + + // 00 + assert!(m("ROOT/parent_dir/dir_deep_00").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_00/file").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_00/child_dir/file").is_ignore()); + + // 01 + assert!(m("ROOT/parent_dir/dir_deep_01").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_01/file").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_01/child_dir/file").is_ignore()); + + // 02 + assert!(m("ROOT/parent_dir/dir_deep_02").is_none()); // dir itself doesn't match + assert!(m("ROOT/parent_dir/dir_deep_02/file").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_02/child_dir/file").is_ignore()); + + // 03 + assert!(m("ROOT/parent_dir/dir_deep_03").is_none()); // dir itself doesn't match + assert!(m("ROOT/parent_dir/dir_deep_03/file").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_03/child_dir/file").is_ignore()); + + // 10 + assert!(m("ROOT/parent_dir/dir_deep_10").is_none()); + assert!(m("ROOT/parent_dir/dir_deep_10/file").is_none()); + assert!(m("ROOT/parent_dir/dir_deep_10/child_dir/file").is_none()); + + // 11 + assert!(m("ROOT/parent_dir/dir_deep_11").is_none()); + assert!(m("ROOT/parent_dir/dir_deep_11/file").is_none()); + assert!(m("ROOT/parent_dir/dir_deep_11/child_dir/file").is_none()); + + // 12 + assert!(m("ROOT/parent_dir/dir_deep_12").is_none()); + assert!(m("ROOT/parent_dir/dir_deep_12/file").is_none()); + assert!(m("ROOT/parent_dir/dir_deep_12/child_dir/file").is_none()); + + // 13 + assert!(m("ROOT/parent_dir/dir_deep_13").is_none()); + assert!(m("ROOT/parent_dir/dir_deep_13/file").is_none()); + assert!(m("ROOT/parent_dir/dir_deep_13/child_dir/file").is_none()); + + // 20 + assert!(m("ROOT/parent_dir/dir_deep_20").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_20/file").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_20/child_dir/file").is_ignore()); + + // 21 + assert!(m("ROOT/parent_dir/dir_deep_21").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_21/file").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_21/child_dir/file").is_ignore()); + + // 22 + assert!(m("ROOT/parent_dir/dir_deep_22").is_none()); // dir itself doesn't match + assert!(m("ROOT/parent_dir/dir_deep_22/file").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_22/child_dir/file").is_ignore()); + + // 23 + assert!(m("ROOT/parent_dir/dir_deep_23").is_none()); // dir itself doesn't match + assert!(m("ROOT/parent_dir/dir_deep_23/file").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_23/child_dir/file").is_ignore()); + + // 30 + assert!(m("ROOT/parent_dir/dir_deep_30").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_30/file").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_30/child_dir/file").is_ignore()); + + // 31 + assert!(m("ROOT/parent_dir/dir_deep_31").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_31/file").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_31/child_dir/file").is_ignore()); + + // 32 + assert!(m("ROOT/parent_dir/dir_deep_32").is_none()); // dir itself doesn't match + assert!(m("ROOT/parent_dir/dir_deep_32/file").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_32/child_dir/file").is_ignore()); + + // 33 + assert!(m("ROOT/parent_dir/dir_deep_33").is_none()); // dir itself doesn't match + assert!(m("ROOT/parent_dir/dir_deep_33/file").is_ignore()); + assert!(m("ROOT/parent_dir/dir_deep_33/child_dir/file").is_ignore()); +}