From bc246aaa81cd7023e8533a006211a800621e8907 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 31 Mar 2022 18:56:56 +0800 Subject: [PATCH 01/56] some TODOs related to precomposed unicode on MacOS (#364) --- git-repository/src/config.rs | 1 + git-repository/src/repository/location.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/git-repository/src/config.rs b/git-repository/src/config.rs index 3caad67c79c..f9f27b4949c 100644 --- a/git-repository/src/config.rs +++ b/git-repository/src/config.rs @@ -26,6 +26,7 @@ pub(crate) struct Cache { pub object_hash: git_hash::Kind, /// If true, multi-pack indices, whether present or not, may be used by the object database. pub use_multi_pack_index: bool, + // TODO: make core.precomposeUnicode available as well. } mod cache { diff --git a/git-repository/src/repository/location.rs b/git-repository/src/repository/location.rs index bc391b914ed..f3998dbbf25 100644 --- a/git-repository/src/repository/location.rs +++ b/git-repository/src/repository/location.rs @@ -9,7 +9,7 @@ impl crate::Repository { self.work_tree.as_deref() } - // TODO: tests + // TODO: tests, respect precomposeUnicode /// The directory of the binary path of the current process. pub fn install_dir(&self) -> std::io::Result { std::env::current_exe().and_then(|exe| { From b3efc94134a32018db1d6a2d7f8cc397c4371999 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 7 Apr 2022 16:49:20 +0800 Subject: [PATCH 02/56] Add git-glob crate with pattern matching parsing from git-attributes::ignore (#301) --- Cargo.lock | 11 +++ Cargo.toml | 1 + Makefile | 1 + README.md | 1 + crate-status.md | 12 ++-- git-glob/CHANGELOG.md | 7 ++ git-glob/Cargo.toml | 27 +++++++ git-glob/src/lib.rs | 22 ++++++ git-glob/src/parse.rs | 69 ++++++++++++++++++ git-glob/tests/attributes.rs | 1 + git-glob/tests/parse/mod.rs | 134 +++++++++++++++++++++++++++++++++++ 11 files changed, 278 insertions(+), 8 deletions(-) create mode 100644 git-glob/CHANGELOG.md create mode 100644 git-glob/Cargo.toml create mode 100644 git-glob/src/lib.rs create mode 100644 git-glob/src/parse.rs create mode 100644 git-glob/tests/attributes.rs create mode 100644 git-glob/tests/parse/mod.rs diff --git a/Cargo.lock b/Cargo.lock index c65bc235437..7a0b7f4433e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1179,6 +1179,17 @@ dependencies = [ "walkdir", ] +[[package]] +name = "git-glob" +version = "0.1.0" +dependencies = [ + "bitflags", + "bstr", + "git-quote", + "git-testtools", + "serde", +] + [[package]] name = "git-hash" version = "0.9.3" diff --git a/Cargo.toml b/Cargo.toml index 473a9df77c8..6f98679dfaf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -135,6 +135,7 @@ members = [ "git-chunk", "git-quote", "git-object", + "git-glob", "git-diff", "git-traverse", "git-index", diff --git a/Makefile b/Makefile index 08877c8d273..11a4416f7ca 100644 --- a/Makefile +++ b/Makefile @@ -91,6 +91,7 @@ check: ## Build all code in suitable configurations cd git-index && cargo check --features serde1 cd git-revision && cargo check --features serde1 cd git-attributes && cargo check --features serde1 + cd git-glob && cargo check --features serde1 cd git-mailmap && cargo check --features serde1 cd git-worktree && cargo check --features serde1 cd git-actor && cargo check --features serde1 diff --git a/README.md b/README.md index d0bd828fcff..a9dfa3fc0ab 100644 --- a/README.md +++ b/README.md @@ -119,6 +119,7 @@ Crates that seem feature complete and need to see some more use before they can * [git-revision](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-revision) * [git-attributes](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-attributes) * [git-quote](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-quote) + * [git-glob](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-glob) * **idea** * [git-note](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-note) * [git-pathspec](https://github.com/Byron/gitoxide/blob/main/crate-status.md#git-pathspec) diff --git a/crate-status.md b/crate-status.md index 9e3513a03fc..0ce6c60b865 100644 --- a/crate-status.md +++ b/crate-status.md @@ -1,5 +1,4 @@ ### git-actor - * [x] read and write a signature that uniquely identifies an actor within a git repository ### git-hash @@ -28,7 +27,6 @@ * [ ] Some examples ### git-pack - * **packs** * [x] traverse pack index * [x] 'object' abstraction @@ -73,7 +71,6 @@ * [ ] Some examples ### git-odb - * **loose object store** * [x] traverse * [x] read @@ -210,25 +207,21 @@ Check out the [performance discussion][git-traverse-performance] as well. * [ ] Some examples ### git-attributes - * [x] parse git-ignore files (aka git-attributes without the attributes or negation) * [x] parse git-attributes files * [ ] create an attributes stack, ideally one that includes 'ignored' status from .gitignore files. * [ ] support for built-in `binary` macro for `-text -diff -merge` ### git-quote - * **ansi-c** * [x] quote * [ ] unquote ### git-mailmap - * [x] parsing * [x] lookup and mapping of author names ### git-pathspec - * [ ] parse * [ ] check for match @@ -238,6 +231,10 @@ A mechanism to associate metadata with any object, and keep revisions of it usin * [ ] CRUD for git notes +### git-glob +* [ ] parse pattern +* [ ] a type for pattern matching + ### git-worktree * handle the working tree/checkout - [x] checkout an index of files, executables and symlinks just as fast as git @@ -263,7 +260,6 @@ A mechanism to associate metadata with any object, and keep revisions of it usin * parse specifications into revisions (like `git rev-parse`) ### git-submodule - * CRUD for submodules * try to handle with all the nifty interactions and be a little more comfortable than what git offers, lay a foundation for smarter git submodules. diff --git a/git-glob/CHANGELOG.md b/git-glob/CHANGELOG.md new file mode 100644 index 00000000000..52e80a16a21 --- /dev/null +++ b/git-glob/CHANGELOG.md @@ -0,0 +1,7 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + diff --git a/git-glob/Cargo.toml b/git-glob/Cargo.toml new file mode 100644 index 00000000000..336945fbdef --- /dev/null +++ b/git-glob/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "git-glob" +version = "0.1.0" +repository = "https://github.com/Byron/gitoxide" +license = "MIT/Apache-2.0" +description = "A WIP crate of the gitoxide project dealing with pattern matching" +authors = ["Sebastian Thiel "] +edition = "2018" + +[lib] +doctest = false + +[features] +## Data structures implement `serde::Serialize` and `serde::Deserialize`. +serde1 = ["serde", "bstr/serde1"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +git-quote = { version = "^0.2.0", path = "../git-quote" } + +bstr = { version = "0.2.13", default-features = false, features = ["std"]} +bitflags = "1.3.2" +serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} + +[dev-dependencies] +git-testtools = { path = "../tests/tools"} diff --git a/git-glob/src/lib.rs b/git-glob/src/lib.rs new file mode 100644 index 00000000000..ded3b2fe43b --- /dev/null +++ b/git-glob/src/lib.rs @@ -0,0 +1,22 @@ +#![forbid(unsafe_code)] +#![deny(rust_2018_idioms)] + +pub mod pattern { + use bitflags::bitflags; + + bitflags! { + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + pub struct Mode: u32 { + /// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one. + const NO_SUB_DIR = 1 << 0; + /// A pattern that is '*literal', meaning that it ends with what's given here + const ENDS_WITH = 1 << 1; + /// The pattern must match a directory, and not a file. + const MUST_BE_DIR = 1 << 2; + const NEGATIVE = 1 << 3; + } + } +} + +mod parse; +pub use parse::parse_line as parse; diff --git a/git-glob/src/parse.rs b/git-glob/src/parse.rs new file mode 100644 index 00000000000..f29727614c7 --- /dev/null +++ b/git-glob/src/parse.rs @@ -0,0 +1,69 @@ +use crate::pattern; +use crate::pattern::Mode; +use bstr::{BString, ByteSlice}; + +#[inline] +pub fn parse_line(mut line: &[u8]) -> Option<(BString, pattern::Mode)> { + let mut mode = Mode::empty(); + if line.is_empty() { + return None; + }; + if line.first() == Some(&b'!') { + mode |= Mode::NEGATIVE; + line = &line[1..]; + } else if line.first() == Some(&b'\\') { + let second = line.get(1); + if second == Some(&b'!') || second == Some(&b'#') { + line = &line[1..]; + } + } + if line.iter().all(|b| b.is_ascii_whitespace()) { + return None; + } + let mut line = truncate_non_escaped_trailing_spaces(line); + if line.last() == Some(&b'/') { + mode |= Mode::MUST_BE_DIR; + line.pop(); + } + if !line.contains(&b'/') { + mode |= Mode::NO_SUB_DIR; + } + if line.first() == Some(&b'*') && line[1..].find_byteset(br"*?[\").is_none() { + mode |= Mode::ENDS_WITH; + } + Some((line, mode)) +} + +/// We always copy just because that's ultimately needed anyway, not because we always have to. +fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> BString { + match buf.rfind_not_byteset(br"\ ") { + Some(pos) if pos + 1 == buf.len() => buf.into(), // does not end in (escaped) whitespace + None => buf.into(), + Some(start_of_non_space) => { + // This seems a bit strange but attempts to recreate the git implementation while + // actually removing the escape characters before spaces. We leave other backslashes + // for escapes to be handled by `glob/globset`. + let mut res: BString = buf[..start_of_non_space + 1].into(); + + let mut trailing_bytes = buf[start_of_non_space + 1..].iter(); + let mut bare_spaces = 0; + while let Some(b) = trailing_bytes.next() { + match b { + b' ' => { + bare_spaces += 1; + } + b'\\' => { + res.extend(std::iter::repeat(b' ').take(bare_spaces)); + bare_spaces = 0; + // Skip what follows, like git does, but keep spaces if possible. + if trailing_bytes.next() == Some(&b' ') { + res.push(b' '); + } + } + _ => unreachable!("BUG: this must be either backslash or space"), + } + } + res + } + } +} diff --git a/git-glob/tests/attributes.rs b/git-glob/tests/attributes.rs new file mode 100644 index 00000000000..06f1a3c69d4 --- /dev/null +++ b/git-glob/tests/attributes.rs @@ -0,0 +1 @@ +mod parse; diff --git a/git-glob/tests/parse/mod.rs b/git-glob/tests/parse/mod.rs new file mode 100644 index 00000000000..80be302764a --- /dev/null +++ b/git-glob/tests/parse/mod.rs @@ -0,0 +1,134 @@ +use git_glob::pattern::Mode; + +#[test] +fn mark_ends_with_pattern_specifically() { + assert_eq!( + git_glob::parse(br"*literal"), + Some((r"*literal".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH)) + ); + assert_eq!( + git_glob::parse(br"**literal"), + Some((r"**literal".into(), Mode::NO_SUB_DIR)), + "double-asterisk won't allow for fast comparisons" + ); + assert_eq!( + git_glob::parse(br"*litera[l]"), + Some((r"*litera[l]".into(), Mode::NO_SUB_DIR)) + ); + assert_eq!( + git_glob::parse(br"*litera?"), + Some((r"*litera?".into(), Mode::NO_SUB_DIR)) + ); + assert_eq!( + git_glob::parse(br"*litera\?"), + Some((r"*litera\?".into(), Mode::NO_SUB_DIR)), + "for now we don't handle escapes properly like git seems to do" + ); +} + +#[test] +fn whitespace_only_is_ignored() { + assert!(git_glob::parse(b"\n\r\n\t\t \n").is_none()); +} + +#[test] +fn hash_symbols_are_not_special() { + assert_eq!( + git_glob::parse(b"# hello world"), + Some(("# hello world".into(), Mode::NO_SUB_DIR)) + ); +} + +#[test] +fn backslashes_before_hashes_are_no_comments() { + assert_eq!(git_glob::parse(br"\#hello"), Some((r"#hello".into(), Mode::NO_SUB_DIR))); +} + +#[test] +fn backslashes_are_part_of_the_pattern_if_not_in_specific_positions() { + assert_eq!( + git_glob::parse(br"\hello\world"), + Some((r"\hello\world".into(), Mode::NO_SUB_DIR)) + ); +} + +#[test] +fn leading_exclamation_mark_negates_pattern() { + assert_eq!( + git_glob::parse(b"!hello"), + Some(("hello".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR)) + ); +} + +#[test] +fn leading_exclamation_marks_can_be_escaped_with_backslash() { + assert_eq!(git_glob::parse(br"\!hello"), Some(("!hello".into(), Mode::NO_SUB_DIR))); +} + +#[test] +fn absence_of_sub_directories_are_marked() { + assert_eq!(git_glob::parse(br"a/b"), Some(("a/b".into(), Mode::empty()))); + assert_eq!(git_glob::parse(br"ab"), Some(("ab".into(), Mode::NO_SUB_DIR))); +} + +#[test] +fn trailing_slashes_are_marked_and_removed() { + assert_eq!( + git_glob::parse(b"dir/"), + Some(("dir".into(), Mode::MUST_BE_DIR | Mode::NO_SUB_DIR)) + ); + assert_eq!( + git_glob::parse(b"dir///"), + Some(("dir//".into(), Mode::MUST_BE_DIR)), + "but only the last slash is removed" + ); +} + +#[test] +fn trailing_spaces_are_ignored() { + assert_eq!(git_glob::parse(br"a "), Some(("a".into(), Mode::NO_SUB_DIR))); + assert_eq!( + git_glob::parse(b"a\t\t "), + Some(("a\t\t".into(), Mode::NO_SUB_DIR)), + "trailing tabs are not ignored" + ); +} + +#[test] +fn trailing_spaces_can_be_escaped_to_be_literal() { + assert_eq!( + git_glob::parse(br"a \ "), + Some(("a ".into(), Mode::NO_SUB_DIR)), + "a single escape in front of the last desired space is enough" + ); + assert_eq!( + git_glob::parse(br"a b c "), + Some(("a b c".into(), Mode::NO_SUB_DIR)), + "spaces in the middle are fine" + ); + assert_eq!( + git_glob::parse(br"a\ \ \ "), + Some(("a ".into(), Mode::NO_SUB_DIR)), + "one can also escape every single one" + ); + assert_eq!( + git_glob::parse(br"a \ "), + Some(("a ".into(), Mode::NO_SUB_DIR)), + "or just the one in the middle, losing the last actual space" + ); + assert_eq!( + git_glob::parse(br"a \"), + Some(("a ".into(), Mode::NO_SUB_DIR)), + "escaping nothing also works as a whitespace protection" + ); + assert_eq!( + git_glob::parse(br"a \\\ "), + Some((r"a ".into(), Mode::NO_SUB_DIR)), + "strange things like these work too" + ); + assert_eq!( + git_glob::parse(br"a \\ "), + Some((r"a ".into(), Mode::NO_SUB_DIR)), + "strange things like these work as well" + ); +} From 2794bb2f6bd80cccba508fa9f251609499167646 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 7 Apr 2022 16:57:16 +0800 Subject: [PATCH 03/56] prepare changelog prior to release (#301) --- git-glob/CHANGELOG.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/git-glob/CHANGELOG.md b/git-glob/CHANGELOG.md index 52e80a16a21..e740bb77f79 100644 --- a/git-glob/CHANGELOG.md +++ b/git-glob/CHANGELOG.md @@ -5,3 +5,25 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## Unreleased + +Initial release with pattern parsing functionality. + +### Commit Statistics + + + + - 1 commit contributed to the release. + - 0 commits where understood as [conventional](https://www.conventionalcommits.org). + - 1 unique issue was worked on: [#298](https://github.com/Byron/gitoxide/issues/298) + +### Commit Details + + + +
view details + + * **[#298](https://github.com/Byron/gitoxide/issues/298)** + - Add git-glob crate with pattern matching parsing from git-attributes::ignore ([`42b02bd`](https://github.com/Byron/gitoxide/commit/42b02bddde147735802058db35254cd96c2388a0)) +
+ From 0f66c5d56bd3f0febff881065911638f22e71158 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 7 Apr 2022 16:58:58 +0800 Subject: [PATCH 04/56] Release git-glob v0.1.0 --- git-glob/CHANGELOG.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/git-glob/CHANGELOG.md b/git-glob/CHANGELOG.md index e740bb77f79..d1105978555 100644 --- a/git-glob/CHANGELOG.md +++ b/git-glob/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## Unreleased +## 0.1.0 (2022-04-07) Initial release with pattern parsing functionality. @@ -13,9 +13,9 @@ Initial release with pattern parsing functionality. - - 1 commit contributed to the release. + - 2 commits contributed to the release. - 0 commits where understood as [conventional](https://www.conventionalcommits.org). - - 1 unique issue was worked on: [#298](https://github.com/Byron/gitoxide/issues/298) + - 1 unique issue was worked on: [#301](https://github.com/Byron/gitoxide/issues/301) ### Commit Details @@ -23,7 +23,8 @@ Initial release with pattern parsing functionality.
view details - * **[#298](https://github.com/Byron/gitoxide/issues/298)** - - Add git-glob crate with pattern matching parsing from git-attributes::ignore ([`42b02bd`](https://github.com/Byron/gitoxide/commit/42b02bddde147735802058db35254cd96c2388a0)) + * **[#301](https://github.com/Byron/gitoxide/issues/301)** + - prepare changelog prior to release ([`2794bb2`](https://github.com/Byron/gitoxide/commit/2794bb2f6bd80cccba508fa9f251609499167646)) + - Add git-glob crate with pattern matching parsing from git-attributes::ignore ([`b3efc94`](https://github.com/Byron/gitoxide/commit/b3efc94134a32018db1d6a2d7f8cc397c4371999))
From 120d9085c35ac72d4b83daee7f2cb59fde91890e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 7 Apr 2022 17:08:40 +0800 Subject: [PATCH 05/56] change!: use git-glob crate for pattern parsing (#301) --- git-attributes/Cargo.toml | 4 +- git-attributes/src/ignore.rs | 16 --- git-attributes/src/lib.rs | 2 - git-attributes/src/parse/attribute.rs | 6 +- git-attributes/src/parse/ignore.rs | 72 +------------ git-attributes/tests/parse/attribute.rs | 5 +- git-attributes/tests/parse/ignore.rs | 129 +----------------------- 7 files changed, 11 insertions(+), 223 deletions(-) delete mode 100644 git-attributes/src/ignore.rs diff --git a/git-attributes/Cargo.toml b/git-attributes/Cargo.toml index fce0ef71a6b..130a0fb2ec6 100644 --- a/git-attributes/Cargo.toml +++ b/git-attributes/Cargo.toml @@ -12,15 +12,15 @@ doctest = false [features] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. -serde1 = ["serde", "bstr/serde1"] +serde1 = ["serde", "bstr/serde1", "git-glob/serde1"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] git-quote = { version = "^0.2.0", path = "../git-quote" } +git-glob = { version = "^0.1.0", path = "../git-glob" } bstr = { version = "0.2.13", default-features = false, features = ["std"]} -bitflags = "1.3.2" unicode-bom = "1.1.4" quick-error = "2.0.0" serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} diff --git a/git-attributes/src/ignore.rs b/git-attributes/src/ignore.rs deleted file mode 100644 index 77a48989d78..00000000000 --- a/git-attributes/src/ignore.rs +++ /dev/null @@ -1,16 +0,0 @@ -pub mod pattern { - use bitflags::bitflags; - - bitflags! { - #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] - pub struct Mode: u32 { - /// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one. - const NO_SUB_DIR = 1 << 0; - /// A pattern that is '*literal', meaning that it ends with what's given here - const ENDS_WITH = 1 << 1; - /// The pattern must match a directory, and not a file. - const MUST_BE_DIR = 1 << 2; - const NEGATIVE = 1 << 3; - } - } -} diff --git a/git-attributes/src/lib.rs b/git-attributes/src/lib.rs index 0d08f1e1963..41866e02f08 100644 --- a/git-attributes/src/lib.rs +++ b/git-attributes/src/lib.rs @@ -18,8 +18,6 @@ pub enum State<'a> { Unspecified, } -pub mod ignore; - pub mod parse; pub fn parse(buf: &[u8]) -> parse::Lines<'_> { diff --git a/git-attributes/src/parse/attribute.rs b/git-attributes/src/parse/attribute.rs index db09daf536f..f8fe2876f89 100644 --- a/git-attributes/src/parse/attribute.rs +++ b/git-attributes/src/parse/attribute.rs @@ -6,7 +6,7 @@ use bstr::{BStr, BString, ByteSlice}; #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub enum Kind { /// A pattern to match paths against - Pattern(BString, crate::ignore::pattern::Mode), + Pattern(BString, git_glob::pattern::Mode), /// The name of the macro to define, always a valid attribute name Macro(BString), } @@ -162,8 +162,8 @@ fn parse_line(line: &BStr, line_number: usize) -> Option, _ => unreachable!("BUG: check_attr() must only return attribute errors"), }), None => { - let (pattern, flags) = super::ignore::parse_line(line.as_ref())?; - if flags.contains(crate::ignore::pattern::Mode::NEGATIVE) { + let (pattern, flags) = git_glob::parse(line.as_ref())?; + if flags.contains(git_glob::pattern::Mode::NEGATIVE) { Err(Error::PatternNegation { line: line.into_owned(), line_number, diff --git a/git-attributes/src/parse/ignore.rs b/git-attributes/src/parse/ignore.rs index 8a3e92df9d1..62ea0111b6a 100644 --- a/git-attributes/src/parse/ignore.rs +++ b/git-attributes/src/parse/ignore.rs @@ -1,7 +1,5 @@ use bstr::{BString, ByteSlice}; -use crate::ignore; - pub struct Lines<'a> { lines: bstr::Lines<'a>, line_no: usize, @@ -18,7 +16,7 @@ impl<'a> Lines<'a> { } impl<'a> Iterator for Lines<'a> { - type Item = (BString, ignore::pattern::Mode, usize); + type Item = (BString, git_glob::pattern::Mode, usize); fn next(&mut self) -> Option { for line in self.lines.by_ref() { @@ -26,7 +24,7 @@ impl<'a> Iterator for Lines<'a> { if line.first() == Some(&b'#') { continue; } - match parse_line(line) { + match git_glob::parse(line) { None => continue, Some((line, flags)) => return Some((line, flags, self.line_no)), } @@ -34,69 +32,3 @@ impl<'a> Iterator for Lines<'a> { None } } - -#[inline] -pub(crate) fn parse_line(mut line: &[u8]) -> Option<(BString, ignore::pattern::Mode)> { - let mut mode = ignore::pattern::Mode::empty(); - if line.is_empty() { - return None; - }; - if line.first() == Some(&b'!') { - mode |= ignore::pattern::Mode::NEGATIVE; - line = &line[1..]; - } else if line.first() == Some(&b'\\') { - let second = line.get(1); - if second == Some(&b'!') || second == Some(&b'#') { - line = &line[1..]; - } - } - if line.iter().all(|b| b.is_ascii_whitespace()) { - return None; - } - let mut line = truncate_non_escaped_trailing_spaces(line); - if line.last() == Some(&b'/') { - mode |= ignore::pattern::Mode::MUST_BE_DIR; - line.pop(); - } - if !line.contains(&b'/') { - mode |= ignore::pattern::Mode::NO_SUB_DIR; - } - if line.first() == Some(&b'*') && line[1..].find_byteset(br"*?[\").is_none() { - mode |= ignore::pattern::Mode::ENDS_WITH; - } - Some((line, mode)) -} - -/// We always copy just because that's ultimately needed anyway, not because we always have to. -fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> BString { - match buf.rfind_not_byteset(br"\ ") { - Some(pos) if pos + 1 == buf.len() => buf.into(), // does not end in (escaped) whitespace - None => buf.into(), - Some(start_of_non_space) => { - // This seems a bit strange but attempts to recreate the git implementation while - // actually removing the escape characters before spaces. We leave other backslashes - // for escapes to be handled by `glob/globset`. - let mut res: BString = buf[..start_of_non_space + 1].into(); - - let mut trailing_bytes = buf[start_of_non_space + 1..].iter(); - let mut bare_spaces = 0; - while let Some(b) = trailing_bytes.next() { - match b { - b' ' => { - bare_spaces += 1; - } - b'\\' => { - res.extend(std::iter::repeat(b' ').take(bare_spaces)); - bare_spaces = 0; - // Skip what follows, like git does, but keep spaces if possible. - if trailing_bytes.next() == Some(&b' ') { - res.push(b' '); - } - } - _ => unreachable!("BUG: this must be either backslash or space"), - } - } - res - } - } -} diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index 09394c18743..3cfa764415d 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -1,5 +1,6 @@ use bstr::{BStr, ByteSlice}; -use git_attributes::{ignore::pattern::Mode, parse, State}; +use git_attributes::{parse, State}; +use git_glob::pattern::Mode; use git_testtools::fixture_bytes; #[test] @@ -250,7 +251,7 @@ fn value<'a, 'b>(attr: &'a str, value: &'b str) -> (&'a BStr, State<'b>) { (attr.as_bytes().as_bstr(), State::Value(value.as_bytes().as_bstr())) } -fn pattern(name: &str, flags: git_attributes::ignore::pattern::Mode) -> parse::Kind { +fn pattern(name: &str, flags: git_glob::pattern::Mode) -> parse::Kind { parse::Kind::Pattern(name.into(), flags) } diff --git a/git-attributes/tests/parse/ignore.rs b/git-attributes/tests/parse/ignore.rs index e39a194e493..3a59f9daa06 100644 --- a/git-attributes/tests/parse/ignore.rs +++ b/git-attributes/tests/parse/ignore.rs @@ -1,4 +1,4 @@ -use git_attributes::ignore::pattern::Mode; +use git_glob::pattern::Mode; use git_testtools::fixture_bytes; #[test] @@ -39,32 +39,6 @@ fn line_endings_can_be_windows_or_unix() { ); } -#[test] -fn mark_ends_with_pattern_specifically() { - assert_eq!( - git_attributes::parse::ignore(br"*literal").next(), - Some((r"*literal".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 1)) - ); - assert_eq!( - git_attributes::parse::ignore(br"**literal").next(), - Some((r"**literal".into(), Mode::NO_SUB_DIR, 1)), - "double-asterisk won't allow for fast comparisons" - ); - assert_eq!( - git_attributes::parse::ignore(br"*litera[l]").next(), - Some((r"*litera[l]".into(), Mode::NO_SUB_DIR, 1)) - ); - assert_eq!( - git_attributes::parse::ignore(br"*litera?").next(), - Some((r"*litera?".into(), Mode::NO_SUB_DIR, 1)) - ); - assert_eq!( - git_attributes::parse::ignore(br"*litera\?").next(), - Some((r"*litera\?".into(), Mode::NO_SUB_DIR, 1)), - "for now we don't handle escapes properly like git seems to do" - ); -} - #[test] fn comments_are_ignored_as_well_as_empty_ones() { assert!(git_attributes::parse::ignore(b"# hello world").next().is_none()); @@ -78,104 +52,3 @@ fn backslashes_before_hashes_are_no_comments() { Some((r"#hello".into(), Mode::NO_SUB_DIR, 1)) ); } - -#[test] -fn backslashes_are_part_of_the_pattern_if_not_in_specific_positions() { - assert_eq!( - git_attributes::parse::ignore(br"\hello\world").next(), - Some((r"\hello\world".into(), Mode::NO_SUB_DIR, 1)) - ); -} - -#[test] -fn leading_exclamation_mark_negates_pattern() { - assert_eq!( - git_attributes::parse::ignore(b"!hello").next(), - Some(("hello".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR, 1)) - ); -} - -#[test] -fn leading_exclamation_marks_can_be_escaped_with_backslash() { - assert_eq!( - git_attributes::parse::ignore(br"\!hello").next(), - Some(("!hello".into(), Mode::NO_SUB_DIR, 1)) - ); -} - -#[test] -fn absence_of_sub_directories_are_marked() { - assert_eq!( - git_attributes::parse::ignore(br"a/b").next(), - Some(("a/b".into(), Mode::empty(), 1)) - ); - assert_eq!( - git_attributes::parse::ignore(br"ab").next(), - Some(("ab".into(), Mode::NO_SUB_DIR, 1)) - ); -} - -#[test] -fn trailing_slashes_are_marked_and_removed() { - assert_eq!( - git_attributes::parse::ignore(b"dir/").next(), - Some(("dir".into(), Mode::MUST_BE_DIR | Mode::NO_SUB_DIR, 1)) - ); - assert_eq!( - git_attributes::parse::ignore(b"dir///").next(), - Some(("dir//".into(), Mode::MUST_BE_DIR, 1)), - "but only the last slash is removed" - ); -} - -#[test] -fn trailing_spaces_are_ignored() { - assert_eq!( - git_attributes::parse::ignore(br"a ").next(), - Some(("a".into(), Mode::NO_SUB_DIR, 1)) - ); - assert_eq!( - git_attributes::parse::ignore(b"a\t\t ").next(), - Some(("a\t\t".into(), Mode::NO_SUB_DIR, 1)), - "trailing tabs are not ignored" - ); -} - -#[test] -fn trailing_spaces_can_be_escaped_to_be_literal() { - assert_eq!( - git_attributes::parse::ignore(br"a \ ").next(), - Some(("a ".into(), Mode::NO_SUB_DIR, 1)), - "a single escape in front of the last desired space is enough" - ); - assert_eq!( - git_attributes::parse::ignore(br"a b c ").next(), - Some(("a b c".into(), Mode::NO_SUB_DIR, 1)), - "spaces in the middle are fine" - ); - assert_eq!( - git_attributes::parse::ignore(br"a\ \ \ ").next(), - Some(("a ".into(), Mode::NO_SUB_DIR, 1)), - "one can also escape every single one" - ); - assert_eq!( - git_attributes::parse::ignore(br"a \ ").next(), - Some(("a ".into(), Mode::NO_SUB_DIR, 1)), - "or just the one in the middle, losing the last actual space" - ); - assert_eq!( - git_attributes::parse::ignore(br"a \").next(), - Some(("a ".into(), Mode::NO_SUB_DIR, 1)), - "escaping nothing also works as a whitespace protection" - ); - assert_eq!( - git_attributes::parse::ignore(br"a \\\ ").next(), - Some((r"a ".into(), Mode::NO_SUB_DIR, 1)), - "strange things like these work too" - ); - assert_eq!( - git_attributes::parse::ignore(br"a \\ ").next(), - Some((r"a ".into(), Mode::NO_SUB_DIR, 1)), - "strange things like these work as well" - ); -} From 8a543410e10326ce506b8a7ba65e662641835849 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 7 Apr 2022 17:14:52 +0800 Subject: [PATCH 06/56] refactor (#301) --- Cargo.lock | 2 +- git-glob/tests/parse/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7a0b7f4433e..8db0a026354 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1075,8 +1075,8 @@ dependencies = [ name = "git-attributes" version = "0.1.0" dependencies = [ - "bitflags", "bstr", + "git-glob", "git-quote", "git-testtools", "quick-error", diff --git a/git-glob/tests/parse/mod.rs b/git-glob/tests/parse/mod.rs index 80be302764a..aa42914a4ba 100644 --- a/git-glob/tests/parse/mod.rs +++ b/git-glob/tests/parse/mod.rs @@ -40,7 +40,7 @@ fn hash_symbols_are_not_special() { } #[test] -fn backslashes_before_hashes_are_no_comments() { +fn backslashes_before_hashes_are_considered_an_escape_sequence() { assert_eq!(git_glob::parse(br"\#hello"), Some((r"#hello".into(), Mode::NO_SUB_DIR))); } From dbe7305d371c7dad02d8888492b60b882b418a46 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 7 Apr 2022 19:48:35 +0800 Subject: [PATCH 07/56] refactor (#301) --- git-glob/src/parse.rs | 6 +++++- git-glob/tests/{attributes.rs => glob.rs} | 0 2 files changed, 5 insertions(+), 1 deletion(-) rename git-glob/tests/{attributes.rs => glob.rs} (100%) diff --git a/git-glob/src/parse.rs b/git-glob/src/parse.rs index f29727614c7..105124f30c5 100644 --- a/git-glob/src/parse.rs +++ b/git-glob/src/parse.rs @@ -3,6 +3,8 @@ use crate::pattern::Mode; use bstr::{BString, ByteSlice}; #[inline] +/// A sloppy parser that performs only the most basic checks, providing additional information +/// using `pattern::Mode` flags. pub fn parse_line(mut line: &[u8]) -> Option<(BString, pattern::Mode)> { let mut mode = Mode::empty(); if line.is_empty() { @@ -28,12 +30,14 @@ pub fn parse_line(mut line: &[u8]) -> Option<(BString, pattern::Mode)> { if !line.contains(&b'/') { mode |= Mode::NO_SUB_DIR; } - if line.first() == Some(&b'*') && line[1..].find_byteset(br"*?[\").is_none() { + if line.first() == Some(&b'*') && line[1..].find_byteset(GLOB_CHARACTERS).is_none() { mode |= Mode::ENDS_WITH; } Some((line, mode)) } +const GLOB_CHARACTERS: &[u8] = br"*?[\"; + /// We always copy just because that's ultimately needed anyway, not because we always have to. fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> BString { match buf.rfind_not_byteset(br"\ ") { diff --git a/git-glob/tests/attributes.rs b/git-glob/tests/glob.rs similarity index 100% rename from git-glob/tests/attributes.rs rename to git-glob/tests/glob.rs From f9ab830df2920387c1cffec048be3a4089f4aa40 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 7 Apr 2022 21:38:07 +0800 Subject: [PATCH 08/56] bring in all ~140 tests for git pattern matching, git-ignore styile (#301) Note that character classes are still missing. --- git-glob/tests/fixtures/make_baseline.sh | 145 +++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 git-glob/tests/fixtures/make_baseline.sh diff --git a/git-glob/tests/fixtures/make_baseline.sh b/git-glob/tests/fixtures/make_baseline.sh new file mode 100644 index 00000000000..66ceaa00415 --- /dev/null +++ b/git-glob/tests/fixtures/make_baseline.sh @@ -0,0 +1,145 @@ +#!/bin/bash +set -eu -o pipefail + + +git init -q +git config commit.gpgsign false +git config core.autocrlf false + +while read -r pattern nomatch; do + echo $pattern $nomatch +done < Date: Fri, 8 Apr 2022 09:04:04 +0800 Subject: [PATCH 09/56] fix clippy - many false positives this time --- cargo-smart-release/src/traverse.rs | 2 +- git-pack/src/data/output/count/objects/mod.rs | 2 +- git-packetline/src/read/async_io.rs | 2 +- git-packetline/src/read/blocking_io.rs | 5 +---- git-ref/src/peel.rs | 2 +- 5 files changed, 5 insertions(+), 8 deletions(-) diff --git a/cargo-smart-release/src/traverse.rs b/cargo-smart-release/src/traverse.rs index cc2e32018d0..7b3dd57f11d 100644 --- a/cargo-smart-release/src/traverse.rs +++ b/cargo-smart-release/src/traverse.rs @@ -335,7 +335,7 @@ fn package_may_be_published(p: &Package) -> bool { fn forward_propagate_breaking_changes_for_publishing( ctx: &Context, - crates: &mut Vec>, + crates: &mut [Dependency<'_>], bump_when_needed: bool, allow_auto_publish_of_stable_crates: bool, ) -> anyhow::Result<()> { diff --git a/git-pack/src/data/output/count/objects/mod.rs b/git-pack/src/data/output/count/objects/mod.rs index 65355a52952..3840391487f 100644 --- a/git-pack/src/data/output/count/objects/mod.rs +++ b/git-pack/src/data/output/count/objects/mod.rs @@ -158,7 +158,7 @@ mod expand { seen_objs: &impl util::InsertImmutable, oids: impl IntoIterator>, buf1: &mut Vec, - buf2: &mut Vec, + #[allow(clippy::ptr_arg)] buf2: &mut Vec, progress: &mut impl Progress, should_interrupt: &AtomicBool, allow_pack_lookups: bool, diff --git a/git-packetline/src/read/async_io.rs b/git-packetline/src/read/async_io.rs index 3eede530d04..e0d53d397ce 100644 --- a/git-packetline/src/read/async_io.rs +++ b/git-packetline/src/read/async_io.rs @@ -18,7 +18,7 @@ where #[allow(clippy::needless_lifetimes)] // TODO: remove once this is clippy false positive is fixed async fn read_line_inner<'a>( reader: &mut T, - buf: &'a mut Vec, + buf: &'a mut [u8], ) -> io::Result, decode::Error>> { let (hex_bytes, data_bytes) = buf.split_at_mut(4); reader.read_exact(hex_bytes).await?; diff --git a/git-packetline/src/read/blocking_io.rs b/git-packetline/src/read/blocking_io.rs index 77a545bb2b7..7465c16032b 100644 --- a/git-packetline/src/read/blocking_io.rs +++ b/git-packetline/src/read/blocking_io.rs @@ -13,10 +13,7 @@ impl StreamingPeekableIter where T: io::Read, { - fn read_line_inner<'a>( - reader: &mut T, - buf: &'a mut Vec, - ) -> io::Result, decode::Error>> { + fn read_line_inner<'a>(reader: &mut T, buf: &'a mut [u8]) -> io::Result, decode::Error>> { let (hex_bytes, data_bytes) = buf.split_at_mut(4); reader.read_exact(hex_bytes)?; let num_data_bytes = match decode::hex_prefix(hex_bytes) { diff --git a/git-ref/src/peel.rs b/git-ref/src/peel.rs index 02913d5ff79..503b94f1044 100644 --- a/git-ref/src/peel.rs +++ b/git-ref/src/peel.rs @@ -1,7 +1,7 @@ /// A function for use in [`crate::file::ReferenceExt::peel_to_id_in_place()`] to indicate no peeling should happen. pub fn none( _id: git_hash::ObjectId, - _buf: &mut Vec, + #[allow(clippy::ptr_arg)] _buf: &mut Vec, ) -> Result, std::convert::Infallible> { Ok(Some((git_object::Kind::Commit, &[]))) } From 621c2cac7eed822cc8226c7b9aa8becf3db6872c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 8 Apr 2022 10:29:06 +0800 Subject: [PATCH 10/56] baseline tests for matches and no-matches (#301) Needs simple parser to translate the expectations into our own implementation providing half-decent reporting. --- git-glob/tests/fixtures/make_baseline.sh | 289 +++++++++++++---------- git-glob/tests/glob.rs | 7 + 2 files changed, 165 insertions(+), 131 deletions(-) diff --git a/git-glob/tests/fixtures/make_baseline.sh b/git-glob/tests/fixtures/make_baseline.sh index 66ceaa00415..00df68dff83 100644 --- a/git-glob/tests/fixtures/make_baseline.sh +++ b/git-glob/tests/fixtures/make_baseline.sh @@ -5,141 +5,168 @@ set -eu -o pipefail git init -q git config commit.gpgsign false git config core.autocrlf false +git config core.ignorecase false while read -r pattern nomatch; do - echo $pattern $nomatch -done < .gitignore + git check-ignore -vn "$nomatch" 2>&1 || : +done <>git-baseline.nmatch +abc?def abc/def +abc*def abc/def +a*b*c abcd +abc*abc*abc abcabcabcabcabcabcabca +some/**/needle.txt some/other/notthis.txt +some/**/**/needle.txt some/other/notthis.txt +/**/test one/notthis +/**/test notthis +**/.* ab.c +**/.* abc/ab.c +.*/** a.bc +.*/** abc/a.bc +a[0-9]b a_b +a[!0-9]b a0b +a[!0-9]b a9b +[!-] - +*hello.txt hello.txt-and-then-some +*hello.txt goodbye.txt +*some/path/to/hello.txt some/path/to/hello.txt-and-then-some +*some/path/to/hello.txt some/other/path/to/hello.txt +./foo foo +**/foo foofoo +**/foo/bar foofoo/bar +/*.c mozilla-sha1/sha1.c +**/m4/ltoptions.m4 csharp/src/packages/repositories.config +a[^0-9]b a0b +a[^0-9]b a9b +[^-] - +some/*/needle.txt some/needle.txt +some/*/needle.txt some/one/two/needle.txt +some/*/needle.txt some/one/two/three/needle.txt +.*/** .abc +foo/** foo +*some/path/to/hello.txt a/bigger/some/path/to/hello.txt +{a,b} a +{a,b} b +{**/src/**,foo} abc/src/bar +{**/src/**,foo} foo +{[}],foo} } +{foo} foo +{*.foo,*.bar,*.wat} test.foo +{*.foo,*.bar,*.wat} test.bar +{*.foo,*.bar,*.wat} test.wat +abc*def abc/def +abc[/]def abc/def +\\[a-z] \\a +\\? \\a +\\* \\\\ +aBcDeFg aBcDeFg +aBcDeFg abcdefg +aBcDeFg ABCDEFG +aBcDeFg AbCdEfG EOF while read -r pattern match; do - echo $pattern $match -done < .gitignore + git check-ignore -vn "$match" 2>&1 || : +done <>git-baseline.match +*.c mozilla-sha1/sha1.c +a foo/a +/**/test test +a a +a*b a_b +a*b*c abc +a*b*c a_b_c +a*b*c a___b___c +abc*abc*abc abcabcabcabcabcabcabc +a*a*a*a*a*a*a*a*a aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +a*b[xyz]c*d abxcdbxcddd +*.rs .rs +☃ ☃ +some/**/needle.txt some/needle.txt +some/**/needle.txt some/one/needle.txt +some/**/needle.txt some/one/two/needle.txt +some/**/needle.txt some/other/needle.txt +** abcde +** .asdf +** x/.asdf +some/**/**/needle.txt some/needle.txt +some/**/**/needle.txt some/one/needle.txt +some/**/**/needle.txt some/one/two/needle.txt +some/**/**/needle.txt some/other/needle.txt +**/test one/two/test +**/test one/test +**/test test +/**/test one/two/test +/**/test one/test +/**/test test +**/.* .abc +**/.* abc/.abc +**/foo/bar foo/bar +.*/** .abc/abc +test/** test/ +test/** test/one +test/** test/one/two +some/*/needle.txt some/one/needle.txt +a[0-9]b a0b +a[0-9]b a9b +a[!0-9]b a_b +[a-z123] 1 +[1a-z23] 1 +[123a-z] 1 +[abc-] - +[-abc] - +[-a-c] b +[a-c-] b +[-] - +a[^0-9]b a_b +*hello.txt hello.txt +*hello.txt gareth_says_hello.txt +*hello.txt some/path/to/hello.txt +*hello.txt some\\path\\to\\hello.txt +*hello.txt an/absolute/path/to/hello.txt +*some/path/to/hello.txt some/path/to/hello.txt +_[[]_[]]_[?]_[*]_!_ _[_]_?_*_!_ +a,b a,b +abc/def abc/def +\\[ [ +\\? ? +\\* * +EOF + +git config core.ignorecase true +while read -r pattern match; do + echo "$pattern" "$match" + echo "$pattern" > .gitignore + git check-ignore -vn "$match" 2>&1 || : +done <>git-baseline.match-icase +aBcDeFg aBcDeFg +aBcDeFg abcdefg +aBcDeFg ABCDEFG +aBcDeFg AbCdEfG +EOF + +# nmatches OS specific +# windows +# "abc?def" "abc\\def" +# unix +# "abc\\def" "abc/def" + + +# matches OS specific + # unix only -"\\a" "a" +# "\\a" "a" +#"abc\\def" "abc/def" +#"abc?def" "abc/def" + # windows only -"\\a" "/a" -EOF +# "abc[/]def" "abc/def" +# "abc\\def" "abc/def" +#"abc?def" "abc\\def" + +# empty string is not a valid path-spec +#** " " +#{} " " +#{,} " " diff --git a/git-glob/tests/glob.rs b/git-glob/tests/glob.rs index 06f1a3c69d4..f4456c28dcd 100644 --- a/git-glob/tests/glob.rs +++ b/git-glob/tests/glob.rs @@ -1 +1,8 @@ mod parse; +mod matching { + + #[test] + fn compare_baseline_with_ours() { + let _dir = git_testtools::scripted_fixture_repo_read_only("make_baseline.sh"); + } +} From 027869d57bd7fcb7234e814d1a22197cb64c05cf Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 8 Apr 2022 13:20:44 +0800 Subject: [PATCH 11/56] basic infrastructure for running git-baseline against our implementation (#301) It probably needs more restructuring to allow simple patterns to work first, basically by not using wildcards at all. Probably that needs another little program to reorganize the input. --- git-glob/src/lib.rs | 29 ++++++++- git-glob/src/parse.rs | 18 +++--- git-glob/tests/fixtures/make_baseline.sh | 4 +- git-glob/tests/glob.rs | 80 +++++++++++++++++++++++- 4 files changed, 117 insertions(+), 14 deletions(-) diff --git a/git-glob/src/lib.rs b/git-glob/src/lib.rs index ded3b2fe43b..2853e3f3d1d 100644 --- a/git-glob/src/lib.rs +++ b/git-glob/src/lib.rs @@ -1,8 +1,18 @@ #![forbid(unsafe_code)] #![deny(rust_2018_idioms)] +use bstr::BString; + +pub struct Pattern { + /// the actual pattern bytes + _inner: BString, + _mode: pattern::Mode, +} + pub mod pattern { + use crate::Pattern; use bitflags::bitflags; + use bstr::{BStr, BString}; bitflags! { #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] @@ -16,7 +26,24 @@ pub mod pattern { const NEGATIVE = 1 << 3; } } + + impl Pattern { + pub fn new(pattern: impl Into, mode: Mode) -> Self { + Pattern { + _inner: pattern.into(), + _mode: mode, + } + } + + pub fn from_bytes(pattern: &BStr) -> Option { + crate::parse(pattern).map(|(pattern, mode)| Self::new(pattern, mode)) + } + + pub fn matches(&self, _value: &BStr) -> bool { + todo!() + } + } } mod parse; -pub use parse::parse_line as parse; +pub use parse::pattern as parse; diff --git a/git-glob/src/parse.rs b/git-glob/src/parse.rs index 105124f30c5..8c4b792a753 100644 --- a/git-glob/src/parse.rs +++ b/git-glob/src/parse.rs @@ -5,24 +5,24 @@ use bstr::{BString, ByteSlice}; #[inline] /// A sloppy parser that performs only the most basic checks, providing additional information /// using `pattern::Mode` flags. -pub fn parse_line(mut line: &[u8]) -> Option<(BString, pattern::Mode)> { +pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode)> { let mut mode = Mode::empty(); - if line.is_empty() { + if pat.is_empty() { return None; }; - if line.first() == Some(&b'!') { + if pat.first() == Some(&b'!') { mode |= Mode::NEGATIVE; - line = &line[1..]; - } else if line.first() == Some(&b'\\') { - let second = line.get(1); + pat = &pat[1..]; + } else if pat.first() == Some(&b'\\') { + let second = pat.get(1); if second == Some(&b'!') || second == Some(&b'#') { - line = &line[1..]; + pat = &pat[1..]; } } - if line.iter().all(|b| b.is_ascii_whitespace()) { + if pat.iter().all(|b| b.is_ascii_whitespace()) { return None; } - let mut line = truncate_non_escaped_trailing_spaces(line); + let mut line = truncate_non_escaped_trailing_spaces(pat); if line.last() == Some(&b'/') { mode |= Mode::MUST_BE_DIR; line.pop(); diff --git a/git-glob/tests/fixtures/make_baseline.sh b/git-glob/tests/fixtures/make_baseline.sh index 00df68dff83..02c1ef75c95 100644 --- a/git-glob/tests/fixtures/make_baseline.sh +++ b/git-glob/tests/fixtures/make_baseline.sh @@ -13,7 +13,6 @@ while read -r pattern nomatch; do git check-ignore -vn "$nomatch" 2>&1 || : done <>git-baseline.nmatch abc?def abc/def -abc*def abc/def a*b*c abcd abc*abc*abc abcabcabcabcabcabcabca some/**/needle.txt some/other/notthis.txt @@ -60,7 +59,6 @@ abc[/]def abc/def \\[a-z] \\a \\? \\a \\* \\\\ -aBcDeFg aBcDeFg aBcDeFg abcdefg aBcDeFg ABCDEFG aBcDeFg AbCdEfG @@ -73,7 +71,6 @@ while read -r pattern match; do done <>git-baseline.match *.c mozilla-sha1/sha1.c a foo/a -/**/test test a a a*b a_b a*b*c abc @@ -133,6 +130,7 @@ abc/def abc/def \\[ [ \\? ? \\* * +aBcDeFg aBcDeFg EOF git config core.ignorecase true diff --git a/git-glob/tests/glob.rs b/git-glob/tests/glob.rs index f4456c28dcd..0c12c320737 100644 --- a/git-glob/tests/glob.rs +++ b/git-glob/tests/glob.rs @@ -1,8 +1,86 @@ mod parse; mod matching { + use bstr::{BStr, ByteSlice}; + use std::collections::BTreeSet; + + #[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Copy, Clone)] + pub struct GitMatch<'a> { + pattern: &'a BStr, + value: &'a BStr, + /// True if git could match `value` with `pattern` + is_match: bool, + } + + pub struct Baseline<'a> { + inner: bstr::Lines<'a>, + } + + impl<'a> Iterator for Baseline<'a> { + type Item = GitMatch<'a>; + + fn next(&mut self) -> Option { + let mut tokens = self.inner.next()?.splitn(2, |b| *b == b' '); + let pattern = tokens.next().expect("pattern").as_bstr(); + let value = tokens.next().expect("value").as_bstr().trim_start().as_bstr(); + + let git_match = self.inner.next()?; + let is_match = !git_match.starts_with(b"::\t"); + Some(GitMatch { + pattern, + value, + is_match, + }) + } + } + + impl<'a> Baseline<'a> { + fn new(input: &'a [u8]) -> Self { + Baseline { + inner: input.as_bstr().lines(), + } + } + } #[test] + #[ignore] fn compare_baseline_with_ours() { - let _dir = git_testtools::scripted_fixture_repo_read_only("make_baseline.sh"); + let dir = git_testtools::scripted_fixture_repo_read_only("make_baseline.sh").unwrap(); + { + let input = std::fs::read(dir.join("git-baseline.match")).unwrap(); + let mut seen = BTreeSet::default(); + for git_match in Baseline::new(&input) { + assert!(seen.insert(git_match), "duplicate match entry: {:?}", git_match); + assert!( + git_match.is_match, + "baseline for matches must indeed be matches - check baseline and git version: {:?}", + git_match + ); + let pattern = git_glob::Pattern::from_bytes(git_match.pattern).expect("parsing works"); + assert!(pattern.matches(git_match.value)) + } + } + + { + let input = std::fs::read(dir.join("git-baseline.nmatch")).unwrap(); + let mut seen = BTreeSet::default(); + for git_match in Baseline::new(&input) { + assert!(seen.insert(git_match), "duplicate match entry: {:?}", git_match); + assert!( + !git_match.is_match, + "baseline for no-matches must indeed not be matches - check baseline and git version: {:?}", + git_match + ); + let pattern = git_glob::Pattern::from_bytes(git_match.pattern).expect("parsing works"); + assert!(!pattern.matches(git_match.value)) + } + } } + + #[test] + #[ignore] + fn check_case_insensitive() {} + + #[test] + #[ignore] + fn negated_patterns() {} } From f285ca03094655590d7014770ffb6f6a77d02289 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 8 Apr 2022 16:00:51 +0800 Subject: [PATCH 12/56] refactor (#301) --- git-glob/src/lib.rs | 18 ++++++------------ git-glob/src/parse.rs | 6 +++++- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/git-glob/src/lib.rs b/git-glob/src/lib.rs index 2853e3f3d1d..0ecde69b7e2 100644 --- a/git-glob/src/lib.rs +++ b/git-glob/src/lib.rs @@ -5,14 +5,15 @@ use bstr::BString; pub struct Pattern { /// the actual pattern bytes - _inner: BString, - _mode: pattern::Mode, + pub text: BString, + /// Additional information to help accelerate pattern matching. + pub mode: pattern::Mode, } pub mod pattern { use crate::Pattern; use bitflags::bitflags; - use bstr::{BStr, BString}; + use bstr::BStr; bitflags! { #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] @@ -28,15 +29,8 @@ pub mod pattern { } impl Pattern { - pub fn new(pattern: impl Into, mode: Mode) -> Self { - Pattern { - _inner: pattern.into(), - _mode: mode, - } - } - - pub fn from_bytes(pattern: &BStr) -> Option { - crate::parse(pattern).map(|(pattern, mode)| Self::new(pattern, mode)) + pub fn from_bytes(text: &BStr) -> Option { + crate::parse::pattern(text).map(|(text, mode)| Pattern { text, mode }) } pub fn matches(&self, _value: &BStr) -> bool { diff --git a/git-glob/src/parse.rs b/git-glob/src/parse.rs index 8c4b792a753..ef46521065c 100644 --- a/git-glob/src/parse.rs +++ b/git-glob/src/parse.rs @@ -30,12 +30,16 @@ pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode)> { if !line.contains(&b'/') { mode |= Mode::NO_SUB_DIR; } - if line.first() == Some(&b'*') && line[1..].find_byteset(GLOB_CHARACTERS).is_none() { + if line.first() == Some(&b'*') && no_wildcard_len(&line[1..]).is_none() { mode |= Mode::ENDS_WITH; } Some((line, mode)) } +fn no_wildcard_len(pat: &[u8]) -> Option { + pat.find_byteset(GLOB_CHARACTERS) +} + const GLOB_CHARACTERS: &[u8] = br"*?[\"; /// We always copy just because that's ultimately needed anyway, not because we always have to. From a11f5d441a22b844caefd31b9cb7783dd6b048ad Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 8 Apr 2022 16:21:05 +0800 Subject: [PATCH 13/56] prepare for upcoming wildcard-length field in glob pattern (#301) --- git-attributes/src/parse/attribute.rs | 9 +++-- git-attributes/src/parse/ignore.rs | 8 ++-- git-attributes/tests/parse/attribute.rs | 6 ++- git-attributes/tests/parse/ignore.rs | 20 ++++++++-- git-glob/src/lib.rs | 14 ++++++- git-glob/src/parse.rs | 6 ++- git-glob/tests/parse/mod.rs | 52 ++++++++++++++----------- 7 files changed, 75 insertions(+), 40 deletions(-) diff --git a/git-attributes/src/parse/attribute.rs b/git-attributes/src/parse/attribute.rs index f8fe2876f89..8d044e933a1 100644 --- a/git-attributes/src/parse/attribute.rs +++ b/git-attributes/src/parse/attribute.rs @@ -6,8 +6,9 @@ use bstr::{BStr, BString, ByteSlice}; #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub enum Kind { /// A pattern to match paths against - Pattern(BString, git_glob::pattern::Mode), + Pattern(git_glob::Pattern), /// The name of the macro to define, always a valid attribute name + // TODO: turn it into its own type for maximum safety Macro(BString), } @@ -162,14 +163,14 @@ fn parse_line(line: &BStr, line_number: usize) -> Option, _ => unreachable!("BUG: check_attr() must only return attribute errors"), }), None => { - let (pattern, flags) = git_glob::parse(line.as_ref())?; - if flags.contains(git_glob::pattern::Mode::NEGATIVE) { + let pattern = git_glob::Pattern::from_bytes(line.as_ref())?; + if pattern.mode.contains(git_glob::pattern::Mode::NEGATIVE) { Err(Error::PatternNegation { line: line.into_owned(), line_number, }) } else { - Ok(Kind::Pattern(pattern, flags)) + Ok(Kind::Pattern(pattern)) } } }; diff --git a/git-attributes/src/parse/ignore.rs b/git-attributes/src/parse/ignore.rs index 62ea0111b6a..71c29904ba2 100644 --- a/git-attributes/src/parse/ignore.rs +++ b/git-attributes/src/parse/ignore.rs @@ -1,4 +1,4 @@ -use bstr::{BString, ByteSlice}; +use bstr::ByteSlice; pub struct Lines<'a> { lines: bstr::Lines<'a>, @@ -16,7 +16,7 @@ impl<'a> Lines<'a> { } impl<'a> Iterator for Lines<'a> { - type Item = (BString, git_glob::pattern::Mode, usize); + type Item = (git_glob::Pattern, usize); fn next(&mut self) -> Option { for line in self.lines.by_ref() { @@ -24,9 +24,9 @@ impl<'a> Iterator for Lines<'a> { if line.first() == Some(&b'#') { continue; } - match git_glob::parse(line) { + match git_glob::Pattern::from_bytes(line) { None => continue, - Some((line, flags)) => return Some((line, flags, self.line_no)), + Some(pattern) => return Some((pattern, self.line_no)), } } None diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index 3cfa764415d..0145c11ad10 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -252,7 +252,11 @@ fn value<'a, 'b>(attr: &'a str, value: &'b str) -> (&'a BStr, State<'b>) { } fn pattern(name: &str, flags: git_glob::pattern::Mode) -> parse::Kind { - parse::Kind::Pattern(name.into(), flags) + parse::Kind::Pattern(git_glob::Pattern { + text: name.into(), + mode: flags, + no_wildcard_len: 0, + }) } fn macro_(name: &str) -> parse::Kind { diff --git a/git-attributes/tests/parse/ignore.rs b/git-attributes/tests/parse/ignore.rs index 3a59f9daa06..fd6a2b9200d 100644 --- a/git-attributes/tests/parse/ignore.rs +++ b/git-attributes/tests/parse/ignore.rs @@ -1,10 +1,12 @@ +use bstr::BString; use git_glob::pattern::Mode; +use git_glob::Pattern; use git_testtools::fixture_bytes; #[test] fn byte_order_marks_are_no_patterns() { assert_eq!( - git_attributes::parse::ignore("\u{feff}hello".as_bytes()).next(), + flatten(git_attributes::parse::ignore("\u{feff}hello".as_bytes()).next()), Some((r"hello".into(), Mode::NO_SUB_DIR, 1)) ); } @@ -12,7 +14,7 @@ fn byte_order_marks_are_no_patterns() { #[test] fn line_numbers_are_counted_correctly() { let input = fixture_bytes("ignore/various.txt"); - let actual: Vec<_> = git_attributes::parse::ignore(&input).collect(); + let actual: Vec<_> = git_attributes::parse::ignore(&input).map(flat_map).collect(); assert_eq!( actual, vec![ @@ -30,7 +32,9 @@ fn line_numbers_are_counted_correctly() { #[test] fn line_endings_can_be_windows_or_unix() { assert_eq!( - git_attributes::parse::ignore(b"unix\nwindows\r\nlast").collect::>(), + git_attributes::parse::ignore(b"unix\nwindows\r\nlast") + .map(flat_map) + .collect::>(), vec![ (r"unix".into(), Mode::NO_SUB_DIR, 1), (r"windows".into(), Mode::NO_SUB_DIR, 2), @@ -48,7 +52,15 @@ fn comments_are_ignored_as_well_as_empty_ones() { #[test] fn backslashes_before_hashes_are_no_comments() { assert_eq!( - git_attributes::parse::ignore(br"\#hello").next(), + flatten(git_attributes::parse::ignore(br"\#hello").next()), Some((r"#hello".into(), Mode::NO_SUB_DIR, 1)) ); } + +fn flatten(input: Option<(Pattern, usize)>) -> Option<(BString, git_glob::pattern::Mode, usize)> { + input.map(flat_map) +} + +fn flat_map(input: (Pattern, usize)) -> (BString, git_glob::pattern::Mode, usize) { + (input.0.text, input.0.mode, input.1) +} diff --git a/git-glob/src/lib.rs b/git-glob/src/lib.rs index 0ecde69b7e2..7f5a0598789 100644 --- a/git-glob/src/lib.rs +++ b/git-glob/src/lib.rs @@ -3,11 +3,17 @@ use bstr::BString; +#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] +#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct Pattern { /// the actual pattern bytes pub text: BString, /// Additional information to help accelerate pattern matching. pub mode: pattern::Mode, + /// The amount of `text` bytes which aren't a wildcard character. + /// + /// Useful for optimizations. It's 0 if the first character is a wildcard. + pub no_wildcard_len: usize, } pub mod pattern { @@ -29,8 +35,12 @@ pub mod pattern { } impl Pattern { - pub fn from_bytes(text: &BStr) -> Option { - crate::parse::pattern(text).map(|(text, mode)| Pattern { text, mode }) + pub fn from_bytes(text: &[u8]) -> Option { + crate::parse::pattern(text).map(|(text, mode, no_wildcard_len)| Pattern { + text, + mode, + no_wildcard_len, + }) } pub fn matches(&self, _value: &BStr) -> bool { diff --git a/git-glob/src/parse.rs b/git-glob/src/parse.rs index ef46521065c..5d5f5b6a3b7 100644 --- a/git-glob/src/parse.rs +++ b/git-glob/src/parse.rs @@ -5,7 +5,9 @@ use bstr::{BString, ByteSlice}; #[inline] /// A sloppy parser that performs only the most basic checks, providing additional information /// using `pattern::Mode` flags. -pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode)> { +/// +/// Returns `(pattern, mode, no_wildcard_len)` +pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, usize)> { let mut mode = Mode::empty(); if pat.is_empty() { return None; @@ -33,7 +35,7 @@ pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode)> { if line.first() == Some(&b'*') && no_wildcard_len(&line[1..]).is_none() { mode |= Mode::ENDS_WITH; } - Some((line, mode)) + Some((line, mode, 0)) } fn no_wildcard_len(pat: &[u8]) -> Option { diff --git a/git-glob/tests/parse/mod.rs b/git-glob/tests/parse/mod.rs index aa42914a4ba..2c1b2c1b711 100644 --- a/git-glob/tests/parse/mod.rs +++ b/git-glob/tests/parse/mod.rs @@ -4,24 +4,24 @@ use git_glob::pattern::Mode; fn mark_ends_with_pattern_specifically() { assert_eq!( git_glob::parse(br"*literal"), - Some((r"*literal".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH)) + Some((r"*literal".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 0)) ); assert_eq!( git_glob::parse(br"**literal"), - Some((r"**literal".into(), Mode::NO_SUB_DIR)), + Some((r"**literal".into(), Mode::NO_SUB_DIR, 0)), "double-asterisk won't allow for fast comparisons" ); assert_eq!( git_glob::parse(br"*litera[l]"), - Some((r"*litera[l]".into(), Mode::NO_SUB_DIR)) + Some((r"*litera[l]".into(), Mode::NO_SUB_DIR, 0)) ); assert_eq!( git_glob::parse(br"*litera?"), - Some((r"*litera?".into(), Mode::NO_SUB_DIR)) + Some((r"*litera?".into(), Mode::NO_SUB_DIR, 0)) ); assert_eq!( git_glob::parse(br"*litera\?"), - Some((r"*litera\?".into(), Mode::NO_SUB_DIR)), + Some((r"*litera\?".into(), Mode::NO_SUB_DIR, 0)), "for now we don't handle escapes properly like git seems to do" ); } @@ -35,20 +35,23 @@ fn whitespace_only_is_ignored() { fn hash_symbols_are_not_special() { assert_eq!( git_glob::parse(b"# hello world"), - Some(("# hello world".into(), Mode::NO_SUB_DIR)) + Some(("# hello world".into(), Mode::NO_SUB_DIR, 0)) ); } #[test] fn backslashes_before_hashes_are_considered_an_escape_sequence() { - assert_eq!(git_glob::parse(br"\#hello"), Some((r"#hello".into(), Mode::NO_SUB_DIR))); + assert_eq!( + git_glob::parse(br"\#hello"), + Some((r"#hello".into(), Mode::NO_SUB_DIR, 0)) + ); } #[test] fn backslashes_are_part_of_the_pattern_if_not_in_specific_positions() { assert_eq!( git_glob::parse(br"\hello\world"), - Some((r"\hello\world".into(), Mode::NO_SUB_DIR)) + Some((r"\hello\world".into(), Mode::NO_SUB_DIR, 0)) ); } @@ -56,40 +59,43 @@ fn backslashes_are_part_of_the_pattern_if_not_in_specific_positions() { fn leading_exclamation_mark_negates_pattern() { assert_eq!( git_glob::parse(b"!hello"), - Some(("hello".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR)) + Some(("hello".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR, 0)) ); } #[test] fn leading_exclamation_marks_can_be_escaped_with_backslash() { - assert_eq!(git_glob::parse(br"\!hello"), Some(("!hello".into(), Mode::NO_SUB_DIR))); + assert_eq!( + git_glob::parse(br"\!hello"), + Some(("!hello".into(), Mode::NO_SUB_DIR, 0)) + ); } #[test] fn absence_of_sub_directories_are_marked() { - assert_eq!(git_glob::parse(br"a/b"), Some(("a/b".into(), Mode::empty()))); - assert_eq!(git_glob::parse(br"ab"), Some(("ab".into(), Mode::NO_SUB_DIR))); + assert_eq!(git_glob::parse(br"a/b"), Some(("a/b".into(), Mode::empty(), 0))); + assert_eq!(git_glob::parse(br"ab"), Some(("ab".into(), Mode::NO_SUB_DIR, 0))); } #[test] fn trailing_slashes_are_marked_and_removed() { assert_eq!( git_glob::parse(b"dir/"), - Some(("dir".into(), Mode::MUST_BE_DIR | Mode::NO_SUB_DIR)) + Some(("dir".into(), Mode::MUST_BE_DIR | Mode::NO_SUB_DIR, 0)) ); assert_eq!( git_glob::parse(b"dir///"), - Some(("dir//".into(), Mode::MUST_BE_DIR)), + Some(("dir//".into(), Mode::MUST_BE_DIR, 0)), "but only the last slash is removed" ); } #[test] fn trailing_spaces_are_ignored() { - assert_eq!(git_glob::parse(br"a "), Some(("a".into(), Mode::NO_SUB_DIR))); + assert_eq!(git_glob::parse(br"a "), Some(("a".into(), Mode::NO_SUB_DIR, 0))); assert_eq!( git_glob::parse(b"a\t\t "), - Some(("a\t\t".into(), Mode::NO_SUB_DIR)), + Some(("a\t\t".into(), Mode::NO_SUB_DIR, 0)), "trailing tabs are not ignored" ); } @@ -98,37 +104,37 @@ fn trailing_spaces_are_ignored() { fn trailing_spaces_can_be_escaped_to_be_literal() { assert_eq!( git_glob::parse(br"a \ "), - Some(("a ".into(), Mode::NO_SUB_DIR)), + Some(("a ".into(), Mode::NO_SUB_DIR, 0)), "a single escape in front of the last desired space is enough" ); assert_eq!( git_glob::parse(br"a b c "), - Some(("a b c".into(), Mode::NO_SUB_DIR)), + Some(("a b c".into(), Mode::NO_SUB_DIR, 0)), "spaces in the middle are fine" ); assert_eq!( git_glob::parse(br"a\ \ \ "), - Some(("a ".into(), Mode::NO_SUB_DIR)), + Some(("a ".into(), Mode::NO_SUB_DIR, 0)), "one can also escape every single one" ); assert_eq!( git_glob::parse(br"a \ "), - Some(("a ".into(), Mode::NO_SUB_DIR)), + Some(("a ".into(), Mode::NO_SUB_DIR, 0)), "or just the one in the middle, losing the last actual space" ); assert_eq!( git_glob::parse(br"a \"), - Some(("a ".into(), Mode::NO_SUB_DIR)), + Some(("a ".into(), Mode::NO_SUB_DIR, 0)), "escaping nothing also works as a whitespace protection" ); assert_eq!( git_glob::parse(br"a \\\ "), - Some((r"a ".into(), Mode::NO_SUB_DIR)), + Some((r"a ".into(), Mode::NO_SUB_DIR, 0)), "strange things like these work too" ); assert_eq!( git_glob::parse(br"a \\ "), - Some((r"a ".into(), Mode::NO_SUB_DIR)), + Some((r"a ".into(), Mode::NO_SUB_DIR, 0)), "strange things like these work as well" ); } From 4178a6356ad11013ae08b6233de2bfb366bf4278 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 8 Apr 2022 16:32:50 +0800 Subject: [PATCH 14/56] Also parse the position of the first wildcard (#301) --- git-attributes/tests/parse/attribute.rs | 72 ++++++++++++++----------- git-glob/src/lib.rs | 10 ++-- git-glob/src/parse.rs | 9 ++-- git-glob/tests/parse/mod.rs | 46 ++++++++-------- 4 files changed, 74 insertions(+), 63 deletions(-) diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index 0145c11ad10..3fc7be72d7b 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -5,10 +5,13 @@ use git_testtools::fixture_bytes; #[test] fn byte_order_marks_are_no_patterns() { - assert_eq!(line("\u{feff}hello"), (pattern(r"hello", Mode::NO_SUB_DIR), vec![], 1)); + assert_eq!( + line("\u{feff}hello"), + (pattern(r"hello", Mode::NO_SUB_DIR, None), vec![], 1) + ); assert_eq!( line("\u{feff}\"hello\""), - (pattern(r"hello", Mode::NO_SUB_DIR), vec![], 1) + (pattern(r"hello", Mode::NO_SUB_DIR, None), vec![], 1) ); } @@ -18,15 +21,15 @@ fn line_numbers_are_counted_correctly() { assert_eq!( try_lines(&String::from_utf8(input).unwrap()).unwrap(), vec![ - (pattern(r"*.[oa]", Mode::NO_SUB_DIR), vec![set("c")], 2), + (pattern(r"*.[oa]", Mode::NO_SUB_DIR, Some(0)), vec![set("c")], 2), ( - pattern(r"*.html", Mode::NO_SUB_DIR | Mode::ENDS_WITH), + pattern(r"*.html", Mode::NO_SUB_DIR | Mode::ENDS_WITH, Some(0)), vec![set("a"), value("b", "c")], 5 ), - (pattern(r"!foo.html", Mode::NO_SUB_DIR), vec![set("x")], 8), - (pattern(r"#a/path", Mode::empty()), vec![unset("a")], 10), - (pattern(r"/*", Mode::empty()), vec![unspecified("b")], 11), + (pattern(r"!foo.html", Mode::NO_SUB_DIR, None), vec![set("x")], 8), + (pattern(r"#a/path", Mode::empty(), None), vec![unset("a")], 10), + (pattern(r"/*", Mode::empty(), Some(1)), vec![unspecified("b")], 11), ] ); } @@ -36,9 +39,9 @@ fn line_endings_can_be_windows_or_unix() { assert_eq!( try_lines("unix\nwindows\r\nlast").unwrap(), vec![ - (pattern(r"unix", Mode::NO_SUB_DIR), vec![], 1), - (pattern(r"windows", Mode::NO_SUB_DIR), vec![], 2), - (pattern(r"last", Mode::NO_SUB_DIR), vec![], 3) + (pattern(r"unix", Mode::NO_SUB_DIR, None), vec![], 1), + (pattern(r"windows", Mode::NO_SUB_DIR, None), vec![], 2), + (pattern(r"last", Mode::NO_SUB_DIR, None), vec![], 3) ] ); } @@ -56,15 +59,15 @@ fn comment_lines_are_ignored_as_well_as_empty_ones() { #[test] fn leading_whitespace_is_ignored() { - assert_eq!(line(" \r\tp"), (pattern(r"p", Mode::NO_SUB_DIR), vec![], 1)); - assert_eq!(line(" \r\t\"p\""), (pattern(r"p", Mode::NO_SUB_DIR), vec![], 1)); + assert_eq!(line(" \r\tp"), (pattern(r"p", Mode::NO_SUB_DIR, None), vec![], 1)); + assert_eq!(line(" \r\t\"p\""), (pattern(r"p", Mode::NO_SUB_DIR, None), vec![], 1)); } #[test] fn quotes_separate_attributes_even_without_whitespace() { assert_eq!( line(r#""path"a b"#), - (pattern(r"path", Mode::NO_SUB_DIR), vec![set("a"), set("b")], 1) + (pattern(r"path", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1) ); } @@ -72,15 +75,21 @@ fn quotes_separate_attributes_even_without_whitespace() { fn comment_can_be_escaped_like_gitignore_or_quoted() { assert_eq!( line(r"\#hello"), - (pattern(r"#hello", Mode::NO_SUB_DIR), vec![], 1), + (pattern(r"#hello", Mode::NO_SUB_DIR, None), vec![], 1), "undocumented, but definitely works" ); - assert_eq!(line("\"# hello\""), (pattern(r"# hello", Mode::NO_SUB_DIR), vec![], 1)); + assert_eq!( + line("\"# hello\""), + (pattern(r"# hello", Mode::NO_SUB_DIR, None), vec![], 1) + ); } #[test] fn exclamation_marks_must_be_escaped_or_error_unlike_gitignore() { - assert_eq!(line(r"\!hello"), (pattern(r"!hello", Mode::NO_SUB_DIR), vec![], 1)); + assert_eq!( + line(r"\!hello"), + (pattern(r"!hello", Mode::NO_SUB_DIR, None), vec![], 1) + ); assert!(matches!( try_line(r"!hello"), Err(parse::Error::PatternNegation { line_number: 1, .. }) @@ -94,7 +103,7 @@ fn exclamation_marks_must_be_escaped_or_error_unlike_gitignore() { ); assert_eq!( line(r#""\\!hello""#), - (pattern(r"!hello", Mode::NO_SUB_DIR), vec![], 1), + (pattern(r"!hello", Mode::NO_SUB_DIR, None), vec![], 1), "…and must be double-escaped, once to get through quote, then to get through parse ignore line" ); } @@ -156,32 +165,32 @@ fn attribute_names_must_not_begin_with_dash_and_must_be_ascii_only() { fn attributes_are_parsed_behind_various_whitespace_characters() { assert_eq!( line(r#"p a b"#), - (pattern("p", Mode::NO_SUB_DIR), vec![set("a"), set("b")], 1), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), "behind space" ); assert_eq!( line(r#""p" a b"#), - (pattern("p", Mode::NO_SUB_DIR), vec![set("a"), set("b")], 1), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), "behind space" ); assert_eq!( line("p\ta\tb"), - (pattern("p", Mode::NO_SUB_DIR), vec![set("a"), set("b")], 1), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), "behind tab" ); assert_eq!( line("\"p\"\ta\tb"), - (pattern("p", Mode::NO_SUB_DIR), vec![set("a"), set("b")], 1), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), "behind tab" ); assert_eq!( line("p \t a \t b"), - (pattern("p", Mode::NO_SUB_DIR), vec![set("a"), set("b")], 1), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), "behind a mix of space and tab" ); assert_eq!( line("\"p\" \t a \t b"), - (pattern("p", Mode::NO_SUB_DIR), vec![set("a"), set("b")], 1), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a"), set("b")], 1), "behind a mix of space and tab" ); } @@ -191,7 +200,7 @@ fn attributes_come_in_different_flavors_due_to_prefixes() { assert_eq!( line(r#"p set -unset !unspecified -set"#), ( - pattern("p", Mode::NO_SUB_DIR), + pattern("p", Mode::NO_SUB_DIR, None), vec![set("set"), unset("unset"), unspecified("unspecified"), unset("set")], 1 ), @@ -204,7 +213,7 @@ fn attributes_can_have_values() { assert_eq!( line(r#"p a=one b=2 c=你好 "#), ( - pattern("p", Mode::NO_SUB_DIR), + pattern("p", Mode::NO_SUB_DIR, None), vec![value("a", "one"), value("b", "2"), value("c", "你好")], 1 ), @@ -217,7 +226,7 @@ fn attributes_see_state_adjustments_over_value_assignments() { assert_eq!( line(r#"p set -unset=a !unspecified=b"#), ( - pattern("p", Mode::NO_SUB_DIR), + pattern("p", Mode::NO_SUB_DIR, None), vec![set("set"), unset("unset"), unspecified("unspecified")], 1 ) @@ -226,10 +235,13 @@ fn attributes_see_state_adjustments_over_value_assignments() { #[test] fn trailing_whitespace_in_attributes_is_ignored() { - assert_eq!(line("p a \r\t"), (pattern("p", Mode::NO_SUB_DIR), vec![set("a")], 1),); + assert_eq!( + line("p a \r\t"), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a")], 1), + ); assert_eq!( line("\"p\" a \r\t"), - (pattern("p", Mode::NO_SUB_DIR), vec![set("a")], 1), + (pattern("p", Mode::NO_SUB_DIR, None), vec![set("a")], 1), ); } @@ -251,11 +263,11 @@ fn value<'a, 'b>(attr: &'a str, value: &'b str) -> (&'a BStr, State<'b>) { (attr.as_bytes().as_bstr(), State::Value(value.as_bytes().as_bstr())) } -fn pattern(name: &str, flags: git_glob::pattern::Mode) -> parse::Kind { +fn pattern(name: &str, flags: git_glob::pattern::Mode, first_wildcard_pos: Option) -> parse::Kind { parse::Kind::Pattern(git_glob::Pattern { text: name.into(), mode: flags, - no_wildcard_len: 0, + first_wildcard_pos, }) } diff --git a/git-glob/src/lib.rs b/git-glob/src/lib.rs index 7f5a0598789..00429e1b311 100644 --- a/git-glob/src/lib.rs +++ b/git-glob/src/lib.rs @@ -10,10 +10,8 @@ pub struct Pattern { pub text: BString, /// Additional information to help accelerate pattern matching. pub mode: pattern::Mode, - /// The amount of `text` bytes which aren't a wildcard character. - /// - /// Useful for optimizations. It's 0 if the first character is a wildcard. - pub no_wildcard_len: usize, + /// The position in `text` with the first wildcard character, or `None` if there is no wildcard at all. + pub first_wildcard_pos: Option, } pub mod pattern { @@ -36,10 +34,10 @@ pub mod pattern { impl Pattern { pub fn from_bytes(text: &[u8]) -> Option { - crate::parse::pattern(text).map(|(text, mode, no_wildcard_len)| Pattern { + crate::parse::pattern(text).map(|(text, mode, first_wildcard_pos)| Pattern { text, mode, - no_wildcard_len, + first_wildcard_pos, }) } diff --git a/git-glob/src/parse.rs b/git-glob/src/parse.rs index 5d5f5b6a3b7..294867938d5 100644 --- a/git-glob/src/parse.rs +++ b/git-glob/src/parse.rs @@ -7,7 +7,7 @@ use bstr::{BString, ByteSlice}; /// using `pattern::Mode` flags. /// /// Returns `(pattern, mode, no_wildcard_len)` -pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, usize)> { +pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, Option)> { let mut mode = Mode::empty(); if pat.is_empty() { return None; @@ -32,13 +32,14 @@ pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, usize)> { if !line.contains(&b'/') { mode |= Mode::NO_SUB_DIR; } - if line.first() == Some(&b'*') && no_wildcard_len(&line[1..]).is_none() { + let pos_of_first_wildcard = first_wildcard_pos(&line); + if line.first() == Some(&b'*') && first_wildcard_pos(&line[1..]).is_none() { mode |= Mode::ENDS_WITH; } - Some((line, mode, 0)) + Some((line, mode, pos_of_first_wildcard)) } -fn no_wildcard_len(pat: &[u8]) -> Option { +fn first_wildcard_pos(pat: &[u8]) -> Option { pat.find_byteset(GLOB_CHARACTERS) } diff --git a/git-glob/tests/parse/mod.rs b/git-glob/tests/parse/mod.rs index 2c1b2c1b711..00adf786c19 100644 --- a/git-glob/tests/parse/mod.rs +++ b/git-glob/tests/parse/mod.rs @@ -4,24 +4,24 @@ use git_glob::pattern::Mode; fn mark_ends_with_pattern_specifically() { assert_eq!( git_glob::parse(br"*literal"), - Some((r"*literal".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 0)) + Some((r"*literal".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, Some(0))) ); assert_eq!( git_glob::parse(br"**literal"), - Some((r"**literal".into(), Mode::NO_SUB_DIR, 0)), + Some((r"**literal".into(), Mode::NO_SUB_DIR, Some(0))), "double-asterisk won't allow for fast comparisons" ); assert_eq!( git_glob::parse(br"*litera[l]"), - Some((r"*litera[l]".into(), Mode::NO_SUB_DIR, 0)) + Some((r"*litera[l]".into(), Mode::NO_SUB_DIR, Some(0))) ); assert_eq!( git_glob::parse(br"*litera?"), - Some((r"*litera?".into(), Mode::NO_SUB_DIR, 0)) + Some((r"*litera?".into(), Mode::NO_SUB_DIR, Some(0))) ); assert_eq!( git_glob::parse(br"*litera\?"), - Some((r"*litera\?".into(), Mode::NO_SUB_DIR, 0)), + Some((r"*litera\?".into(), Mode::NO_SUB_DIR, Some(0))), "for now we don't handle escapes properly like git seems to do" ); } @@ -35,7 +35,7 @@ fn whitespace_only_is_ignored() { fn hash_symbols_are_not_special() { assert_eq!( git_glob::parse(b"# hello world"), - Some(("# hello world".into(), Mode::NO_SUB_DIR, 0)) + Some(("# hello world".into(), Mode::NO_SUB_DIR, None)) ); } @@ -43,7 +43,7 @@ fn hash_symbols_are_not_special() { fn backslashes_before_hashes_are_considered_an_escape_sequence() { assert_eq!( git_glob::parse(br"\#hello"), - Some((r"#hello".into(), Mode::NO_SUB_DIR, 0)) + Some((r"#hello".into(), Mode::NO_SUB_DIR, None)) ); } @@ -51,7 +51,7 @@ fn backslashes_before_hashes_are_considered_an_escape_sequence() { fn backslashes_are_part_of_the_pattern_if_not_in_specific_positions() { assert_eq!( git_glob::parse(br"\hello\world"), - Some((r"\hello\world".into(), Mode::NO_SUB_DIR, 0)) + Some((r"\hello\world".into(), Mode::NO_SUB_DIR, Some(0))) ); } @@ -59,7 +59,7 @@ fn backslashes_are_part_of_the_pattern_if_not_in_specific_positions() { fn leading_exclamation_mark_negates_pattern() { assert_eq!( git_glob::parse(b"!hello"), - Some(("hello".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR, 0)) + Some(("hello".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR, None)) ); } @@ -67,35 +67,35 @@ fn leading_exclamation_mark_negates_pattern() { fn leading_exclamation_marks_can_be_escaped_with_backslash() { assert_eq!( git_glob::parse(br"\!hello"), - Some(("!hello".into(), Mode::NO_SUB_DIR, 0)) + Some(("!hello".into(), Mode::NO_SUB_DIR, None)) ); } #[test] fn absence_of_sub_directories_are_marked() { - assert_eq!(git_glob::parse(br"a/b"), Some(("a/b".into(), Mode::empty(), 0))); - assert_eq!(git_glob::parse(br"ab"), Some(("ab".into(), Mode::NO_SUB_DIR, 0))); + assert_eq!(git_glob::parse(br"a/b"), Some(("a/b".into(), Mode::empty(), None))); + assert_eq!(git_glob::parse(br"ab"), Some(("ab".into(), Mode::NO_SUB_DIR, None))); } #[test] fn trailing_slashes_are_marked_and_removed() { assert_eq!( git_glob::parse(b"dir/"), - Some(("dir".into(), Mode::MUST_BE_DIR | Mode::NO_SUB_DIR, 0)) + Some(("dir".into(), Mode::MUST_BE_DIR | Mode::NO_SUB_DIR, None)) ); assert_eq!( git_glob::parse(b"dir///"), - Some(("dir//".into(), Mode::MUST_BE_DIR, 0)), + Some(("dir//".into(), Mode::MUST_BE_DIR, None)), "but only the last slash is removed" ); } #[test] fn trailing_spaces_are_ignored() { - assert_eq!(git_glob::parse(br"a "), Some(("a".into(), Mode::NO_SUB_DIR, 0))); + assert_eq!(git_glob::parse(br"a "), Some(("a".into(), Mode::NO_SUB_DIR, None))); assert_eq!( git_glob::parse(b"a\t\t "), - Some(("a\t\t".into(), Mode::NO_SUB_DIR, 0)), + Some(("a\t\t".into(), Mode::NO_SUB_DIR, None)), "trailing tabs are not ignored" ); } @@ -104,37 +104,37 @@ fn trailing_spaces_are_ignored() { fn trailing_spaces_can_be_escaped_to_be_literal() { assert_eq!( git_glob::parse(br"a \ "), - Some(("a ".into(), Mode::NO_SUB_DIR, 0)), + Some(("a ".into(), Mode::NO_SUB_DIR, None)), "a single escape in front of the last desired space is enough" ); assert_eq!( git_glob::parse(br"a b c "), - Some(("a b c".into(), Mode::NO_SUB_DIR, 0)), + Some(("a b c".into(), Mode::NO_SUB_DIR, None)), "spaces in the middle are fine" ); assert_eq!( git_glob::parse(br"a\ \ \ "), - Some(("a ".into(), Mode::NO_SUB_DIR, 0)), + Some(("a ".into(), Mode::NO_SUB_DIR, None)), "one can also escape every single one" ); assert_eq!( git_glob::parse(br"a \ "), - Some(("a ".into(), Mode::NO_SUB_DIR, 0)), + Some(("a ".into(), Mode::NO_SUB_DIR, None)), "or just the one in the middle, losing the last actual space" ); assert_eq!( git_glob::parse(br"a \"), - Some(("a ".into(), Mode::NO_SUB_DIR, 0)), + Some(("a ".into(), Mode::NO_SUB_DIR, None)), "escaping nothing also works as a whitespace protection" ); assert_eq!( git_glob::parse(br"a \\\ "), - Some((r"a ".into(), Mode::NO_SUB_DIR, 0)), + Some((r"a ".into(), Mode::NO_SUB_DIR, None)), "strange things like these work too" ); assert_eq!( git_glob::parse(br"a \\ "), - Some((r"a ".into(), Mode::NO_SUB_DIR, 0)), + Some((r"a ".into(), Mode::NO_SUB_DIR, None)), "strange things like these work as well" ); } From a7c3a630cd5661f26220b494f01e50c9f2dbd2e2 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 9 Apr 2022 09:33:59 +0800 Subject: [PATCH 15/56] refactor; roughly sort regex by simplicity (#301) Make sure that many of these can be shortcutted to not invoke an actual wildcard search at all. --- git-glob/tests/fixtures/make_baseline.sh | 172 +++++++++++------------ git-glob/tests/glob.rs | 86 +----------- git-glob/tests/matching/mod.rs | 85 +++++++++++ 3 files changed, 172 insertions(+), 171 deletions(-) create mode 100644 git-glob/tests/matching/mod.rs diff --git a/git-glob/tests/fixtures/make_baseline.sh b/git-glob/tests/fixtures/make_baseline.sh index 02c1ef75c95..8ea3991475c 100644 --- a/git-glob/tests/fixtures/make_baseline.sh +++ b/git-glob/tests/fixtures/make_baseline.sh @@ -12,9 +12,35 @@ while read -r pattern nomatch; do echo "$pattern" > .gitignore git check-ignore -vn "$nomatch" 2>&1 || : done <>git-baseline.nmatch +*hello.txt hello.txt-and-then-some +*hello.txt goodbye.txt +*some/path/to/hello.txt some/path/to/hello.txt-and-then-some +*some/path/to/hello.txt some/other/path/to/hello.txt +*some/path/to/hello.txt a/bigger/some/path/to/hello.txt abc?def abc/def a*b*c abcd abc*abc*abc abcabcabcabcabcabcabca +a[0-9]b a_b +a[!0-9]b a0b +a[!0-9]b a9b +[!-] - +a[^0-9]b a0b +a[^0-9]b a9b +[^-] - +{a,b} a +{a,b} b +{[}],foo} } +{foo} foo +{*.foo,*.bar,*.wat} test.foo +{*.foo,*.bar,*.wat} test.bar +{*.foo,*.bar,*.wat} test.wat +abc*def abc/def +\[a-z] \a +\? \a +\* \\ +aBcDeFg abcdefg +aBcDeFg ABCDEFG +aBcDeFg AbCdEfG some/**/needle.txt some/other/notthis.txt some/**/**/needle.txt some/other/notthis.txt /**/test one/notthis @@ -23,45 +49,19 @@ some/**/**/needle.txt some/other/notthis.txt **/.* abc/ab.c .*/** a.bc .*/** abc/a.bc -a[0-9]b a_b -a[!0-9]b a0b -a[!0-9]b a9b -[!-] - -*hello.txt hello.txt-and-then-some -*hello.txt goodbye.txt -*some/path/to/hello.txt some/path/to/hello.txt-and-then-some -*some/path/to/hello.txt some/other/path/to/hello.txt ./foo foo **/foo foofoo **/foo/bar foofoo/bar /*.c mozilla-sha1/sha1.c **/m4/ltoptions.m4 csharp/src/packages/repositories.config -a[^0-9]b a0b -a[^0-9]b a9b -[^-] - some/*/needle.txt some/needle.txt some/*/needle.txt some/one/two/needle.txt some/*/needle.txt some/one/two/three/needle.txt .*/** .abc foo/** foo -*some/path/to/hello.txt a/bigger/some/path/to/hello.txt -{a,b} a -{a,b} b -{**/src/**,foo} abc/src/bar -{**/src/**,foo} foo -{[}],foo} } -{foo} foo -{*.foo,*.bar,*.wat} test.foo -{*.foo,*.bar,*.wat} test.bar -{*.foo,*.bar,*.wat} test.wat -abc*def abc/def -abc[/]def abc/def -\\[a-z] \\a -\\? \\a -\\* \\\\ -aBcDeFg abcdefg -aBcDeFg ABCDEFG -aBcDeFg AbCdEfG +{**/src/**,foo} abc/src/bar +{**/src/**,foo} foo +abc[/]def abc/def EOF while read -r pattern match; do @@ -70,67 +70,67 @@ while read -r pattern match; do git check-ignore -vn "$match" 2>&1 || : done <>git-baseline.match *.c mozilla-sha1/sha1.c +*.rs .rs +*hello.txt hello.txt +*hello.txt gareth_says_hello.txt +*hello.txt some/path/to/hello.txt +*hello.txt some\path\to\hello.txt +*hello.txt an/absolute/path/to/hello.txt +*some/path/to/hello.txt some/path/to/hello.txt a foo/a a a -a*b a_b -a*b*c abc -a*b*c a_b_c -a*b*c a___b___c -abc*abc*abc abcabcabcabcabcabcabc -a*a*a*a*a*a*a*a*a aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -a*b[xyz]c*d abxcdbxcddd -*.rs .rs -☃ ☃ -some/**/needle.txt some/needle.txt -some/**/needle.txt some/one/needle.txt -some/**/needle.txt some/one/two/needle.txt -some/**/needle.txt some/other/needle.txt -** abcde +a*b a_b +a*b*c abc +a*b*c a_b_c +a*b*c a___b___c +abc*abc*abc abcabcabcabcabcabcabc +a*a*a*a*a*a*a*a*a aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +a*b[xyz]c*d abxcdbxcddd +☃ ☃ +** abcde ** .asdf ** x/.asdf -some/**/**/needle.txt some/needle.txt -some/**/**/needle.txt some/one/needle.txt -some/**/**/needle.txt some/one/two/needle.txt -some/**/**/needle.txt some/other/needle.txt -**/test one/two/test -**/test one/test -**/test test -/**/test one/two/test -/**/test one/test -/**/test test -**/.* .abc -**/.* abc/.abc -**/foo/bar foo/bar -.*/** .abc/abc -test/** test/ -test/** test/one -test/** test/one/two -some/*/needle.txt some/one/needle.txt -a[0-9]b a0b -a[0-9]b a9b -a[!0-9]b a_b -[a-z123] 1 -[1a-z23] 1 -[123a-z] 1 -[abc-] - -[-abc] - -[-a-c] b -[a-c-] b -[-] - -a[^0-9]b a_b -*hello.txt hello.txt -*hello.txt gareth_says_hello.txt -*hello.txt some/path/to/hello.txt -*hello.txt some\\path\\to\\hello.txt -*hello.txt an/absolute/path/to/hello.txt -*some/path/to/hello.txt some/path/to/hello.txt -_[[]_[]]_[?]_[*]_!_ _[_]_?_*_!_ -a,b a,b -abc/def abc/def -\\[ [ -\\? ? -\\* * -aBcDeFg aBcDeFg +a[0-9]b a0b +a[0-9]b a9b +a[!0-9]b a_b +[a-z123] 1 +[1a-z23] 1 +[123a-z] 1 +[abc-] - +[-abc] - +[-a-c] b +[a-c-] b +[-] - +a[^0-9]b a_b +_[[]_[]]_[?]_[*]_!_ _[_]_?_*_!_ +a,b a,b +\[ [ +\? ? +\* * +aBcDeFg aBcDeFg +some/**/needle.txt some/needle.txt +some/**/needle.txt some/one/needle.txt +some/**/needle.txt some/one/two/needle.txt +some/**/needle.txt some/other/needle.txt +some/**/**/needle.txt some/needle.txt +some/**/**/needle.txt some/one/needle.txt +some/**/**/needle.txt some/one/two/needle.txt +some/**/**/needle.txt some/other/needle.txt +**/test one/two/test +**/test one/test +**/test test +/**/test one/two/test +/**/test one/test +/**/test test +**/.* .abc +**/.* abc/.abc +**/foo/bar foo/bar +.*/** .abc/abc +test/** test/ +test/** test/one +test/** test/one/two +some/*/needle.txt some/one/needle.txt +abc/def abc/def EOF git config core.ignorecase true diff --git a/git-glob/tests/glob.rs b/git-glob/tests/glob.rs index 0c12c320737..f87044113a7 100644 --- a/git-glob/tests/glob.rs +++ b/git-glob/tests/glob.rs @@ -1,86 +1,2 @@ +mod matching; mod parse; -mod matching { - use bstr::{BStr, ByteSlice}; - use std::collections::BTreeSet; - - #[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Copy, Clone)] - pub struct GitMatch<'a> { - pattern: &'a BStr, - value: &'a BStr, - /// True if git could match `value` with `pattern` - is_match: bool, - } - - pub struct Baseline<'a> { - inner: bstr::Lines<'a>, - } - - impl<'a> Iterator for Baseline<'a> { - type Item = GitMatch<'a>; - - fn next(&mut self) -> Option { - let mut tokens = self.inner.next()?.splitn(2, |b| *b == b' '); - let pattern = tokens.next().expect("pattern").as_bstr(); - let value = tokens.next().expect("value").as_bstr().trim_start().as_bstr(); - - let git_match = self.inner.next()?; - let is_match = !git_match.starts_with(b"::\t"); - Some(GitMatch { - pattern, - value, - is_match, - }) - } - } - - impl<'a> Baseline<'a> { - fn new(input: &'a [u8]) -> Self { - Baseline { - inner: input.as_bstr().lines(), - } - } - } - - #[test] - #[ignore] - fn compare_baseline_with_ours() { - let dir = git_testtools::scripted_fixture_repo_read_only("make_baseline.sh").unwrap(); - { - let input = std::fs::read(dir.join("git-baseline.match")).unwrap(); - let mut seen = BTreeSet::default(); - for git_match in Baseline::new(&input) { - assert!(seen.insert(git_match), "duplicate match entry: {:?}", git_match); - assert!( - git_match.is_match, - "baseline for matches must indeed be matches - check baseline and git version: {:?}", - git_match - ); - let pattern = git_glob::Pattern::from_bytes(git_match.pattern).expect("parsing works"); - assert!(pattern.matches(git_match.value)) - } - } - - { - let input = std::fs::read(dir.join("git-baseline.nmatch")).unwrap(); - let mut seen = BTreeSet::default(); - for git_match in Baseline::new(&input) { - assert!(seen.insert(git_match), "duplicate match entry: {:?}", git_match); - assert!( - !git_match.is_match, - "baseline for no-matches must indeed not be matches - check baseline and git version: {:?}", - git_match - ); - let pattern = git_glob::Pattern::from_bytes(git_match.pattern).expect("parsing works"); - assert!(!pattern.matches(git_match.value)) - } - } - } - - #[test] - #[ignore] - fn check_case_insensitive() {} - - #[test] - #[ignore] - fn negated_patterns() {} -} diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs new file mode 100644 index 00000000000..26d005bfe3b --- /dev/null +++ b/git-glob/tests/matching/mod.rs @@ -0,0 +1,85 @@ +use bstr::{BStr, ByteSlice}; +use std::collections::BTreeSet; + +#[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Copy, Clone)] +pub struct GitMatch<'a> { + pattern: &'a BStr, + value: &'a BStr, + /// True if git could match `value` with `pattern` + is_match: bool, +} + +pub struct Baseline<'a> { + inner: bstr::Lines<'a>, +} + +impl<'a> Iterator for Baseline<'a> { + type Item = GitMatch<'a>; + + fn next(&mut self) -> Option { + let mut tokens = self.inner.next()?.splitn(2, |b| *b == b' '); + let pattern = tokens.next().expect("pattern").as_bstr(); + let value = tokens.next().expect("value").as_bstr().trim_start().as_bstr(); + + let git_match = self.inner.next()?; + let is_match = !git_match.starts_with(b"::\t"); + Some(GitMatch { + pattern, + value, + is_match, + }) + } +} + +impl<'a> Baseline<'a> { + fn new(input: &'a [u8]) -> Self { + Baseline { + inner: input.as_bstr().lines(), + } + } +} + +#[test] +#[ignore] +fn compare_baseline_with_ours() { + let dir = git_testtools::scripted_fixture_repo_read_only("make_baseline.sh").unwrap(); + { + let input = std::fs::read(dir.join("git-baseline.match")).unwrap(); + let mut seen = BTreeSet::default(); + + for git_match in Baseline::new(&input) { + assert!(seen.insert(git_match), "duplicate match entry: {:?}", git_match); + assert!( + git_match.is_match, + "baseline for matches must indeed be matches - check baseline and git version: {:?}", + git_match + ); + let pattern = git_glob::Pattern::from_bytes(git_match.pattern).expect("parsing works"); + assert!(pattern.matches(git_match.value)) + } + } + + { + let input = std::fs::read(dir.join("git-baseline.nmatch")).unwrap(); + let mut seen = BTreeSet::default(); + + for git_match in Baseline::new(&input) { + assert!(seen.insert(git_match), "duplicate match entry: {:?}", git_match); + assert!( + !git_match.is_match, + "baseline for no-matches must indeed not be matches - check baseline and git version: {:?}", + git_match + ); + let pattern = git_glob::Pattern::from_bytes(git_match.pattern).expect("parsing works"); + assert!(!pattern.matches(git_match.value)) + } + } +} + +#[test] +#[ignore] +fn check_case_insensitive() {} + +#[test] +#[ignore] +fn negated_patterns() {} From b947ff9d2c5ae8810547066096c91c745d1466fe Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 9 Apr 2022 10:08:06 +0800 Subject: [PATCH 16/56] adjust signatures to know enough to implement git-like matching (#301) This means there is a special path-matching case, but non-path matching is possible as well. --- git-glob/src/lib.rs | 42 +++++++++++++++++++++++++++- git-glob/tests/matching/mod.rs | 50 ++++++++++++++++------------------ 2 files changed, 65 insertions(+), 27 deletions(-) diff --git a/git-glob/src/lib.rs b/git-glob/src/lib.rs index 00429e1b311..19dea7d6342 100644 --- a/git-glob/src/lib.rs +++ b/git-glob/src/lib.rs @@ -28,9 +28,26 @@ pub mod pattern { const ENDS_WITH = 1 << 1; /// The pattern must match a directory, and not a file. const MUST_BE_DIR = 1 << 2; + /// The pattern matches, but should be negated. Note that this mode has to be checked and applied by the caller. const NEGATIVE = 1 << 3; } } + bitflags! { + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + pub struct MatchOptions: u8 { + /// Let globs not match the slash `/` literal. + const SLASH_IS_LITERAL = 1 << 0; + /// Match case insensitively for ascii characters only. + const IGNORE_CASE = 1 << 1; + } + } + + pub enum Case { + /// The case affects the match + Sensitive, + /// Ignore the case of ascii characters. + Fold, + } impl Pattern { pub fn from_bytes(text: &[u8]) -> Option { @@ -41,7 +58,30 @@ pub mod pattern { }) } - pub fn matches(&self, _value: &BStr) -> bool { + /// Return true if a match is negated. + pub fn is_negative(&self) -> bool { + self.mode.contains(Mode::NEGATIVE) + } + + /// Match the given `path` which takes slashes (and only slashes) literally. + /// We may take various shortcuts which is when `basename_start_pos` and `is_dir` come into play. + /// Lastly, case insensitive matches are supported as well. + pub fn matches_path( + &self, + _path: &BStr, + _basename_start_pos: Option, + _is_dir: bool, + case: Case, + ) -> bool { + let _flags = MatchOptions::SLASH_IS_LITERAL + | match case { + Case::Fold => MatchOptions::IGNORE_CASE, + Case::Sensitive => MatchOptions::empty(), + }; + todo!() + } + + pub fn matches(&self, _value: &BStr, _options: MatchOptions) -> bool { todo!() } } diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index 26d005bfe3b..3757f4ab117 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -1,4 +1,5 @@ use bstr::{BStr, ByteSlice}; +use git_glob::pattern; use std::collections::BTreeSet; #[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Copy, Clone)] @@ -43,35 +44,32 @@ impl<'a> Baseline<'a> { #[ignore] fn compare_baseline_with_ours() { let dir = git_testtools::scripted_fixture_repo_read_only("make_baseline.sh").unwrap(); - { - let input = std::fs::read(dir.join("git-baseline.match")).unwrap(); + for (input_file, expected_matches) in &[("git-baseline.match", true), ("git-baseline.nmatch", false)] { + let input = std::fs::read(dir.join(*input_file)).unwrap(); let mut seen = BTreeSet::default(); - for git_match in Baseline::new(&input) { - assert!(seen.insert(git_match), "duplicate match entry: {:?}", git_match); - assert!( - git_match.is_match, - "baseline for matches must indeed be matches - check baseline and git version: {:?}", - git_match - ); - let pattern = git_glob::Pattern::from_bytes(git_match.pattern).expect("parsing works"); - assert!(pattern.matches(git_match.value)) - } - } - - { - let input = std::fs::read(dir.join("git-baseline.nmatch")).unwrap(); - let mut seen = BTreeSet::default(); - - for git_match in Baseline::new(&input) { - assert!(seen.insert(git_match), "duplicate match entry: {:?}", git_match); - assert!( - !git_match.is_match, - "baseline for no-matches must indeed not be matches - check baseline and git version: {:?}", - git_match + for m @ GitMatch { + pattern, + value, + is_match, + } in Baseline::new(&input) + { + assert!(seen.insert(m), "duplicate match entry: {:?}", m); + assert_eq!( + is_match, *expected_matches, + "baseline for matches must indeed be {} - check baseline and git version: {:?}", + expected_matches, m ); - let pattern = git_glob::Pattern::from_bytes(git_match.pattern).expect("parsing works"); - assert!(!pattern.matches(git_match.value)) + let pattern = git_glob::Pattern::from_bytes(pattern).expect("parsing works"); + assert_eq!( + pattern.matches_path( + value, + value.rfind_byte(b'/').map(|pos| pos + 1), + false, // TODO: does it make sense to pretent it is a dir and see what happens? + pattern::Case::Sensitive + ), + is_match + ) } } } From fe3d0a778210a46d46a7db15cc8d213706e45fee Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 9 Apr 2022 10:21:30 +0800 Subject: [PATCH 17/56] git-baseline now acts like a massive regression test (#301) Thus it will only keep a failure count, which we try to boost to 100% over time. --- git-glob/src/lib.rs | 2 +- git-glob/tests/matching/mod.rs | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/git-glob/src/lib.rs b/git-glob/src/lib.rs index 19dea7d6342..269616e868a 100644 --- a/git-glob/src/lib.rs +++ b/git-glob/src/lib.rs @@ -78,7 +78,7 @@ pub mod pattern { Case::Fold => MatchOptions::IGNORE_CASE, Case::Sensitive => MatchOptions::empty(), }; - todo!() + false } pub fn matches(&self, _value: &BStr, _options: MatchOptions) -> bool { diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index 3757f4ab117..c4420098afc 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -44,6 +44,7 @@ impl<'a> Baseline<'a> { #[ignore] fn compare_baseline_with_ours() { let dir = git_testtools::scripted_fixture_repo_read_only("make_baseline.sh").unwrap(); + let (mut total_matches, mut total_correct) = (0, 0); for (input_file, expected_matches) in &[("git-baseline.match", true), ("git-baseline.nmatch", false)] { let input = std::fs::read(dir.join(*input_file)).unwrap(); let mut seen = BTreeSet::default(); @@ -54,6 +55,7 @@ fn compare_baseline_with_ours() { is_match, } in Baseline::new(&input) { + total_matches += 1; assert!(seen.insert(m), "duplicate match entry: {:?}", m); assert_eq!( is_match, *expected_matches, @@ -61,17 +63,19 @@ fn compare_baseline_with_ours() { expected_matches, m ); let pattern = git_glob::Pattern::from_bytes(pattern).expect("parsing works"); - assert_eq!( - pattern.matches_path( - value, - value.rfind_byte(b'/').map(|pos| pos + 1), - false, // TODO: does it make sense to pretent it is a dir and see what happens? - pattern::Case::Sensitive - ), - is_match - ) + let actual_match = pattern.matches_path( + value, + value.rfind_byte(b'/').map(|pos| pos + 1), + false, // TODO: does it make sense to pretent it is a dir and see what happens? + pattern::Case::Sensitive, + ); + if actual_match == is_match { + total_correct += 1; + } } } + + assert_eq!(total_correct, total_matches, "We perfectly agree with git here"); } #[test] From d18ef14e7cbf9c6d316086d6c88b5676c4b7516c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 11 Apr 2022 20:42:16 +0800 Subject: [PATCH 18/56] basename parsing with simple pattern skips (#301) --- git-glob/src/lib.rs | 73 +---------------------- git-glob/src/pattern.rs | 104 +++++++++++++++++++++++++++++++++ git-glob/tests/matching/mod.rs | 56 ++++++++++++++++-- 3 files changed, 156 insertions(+), 77 deletions(-) create mode 100644 git-glob/src/pattern.rs diff --git a/git-glob/src/lib.rs b/git-glob/src/lib.rs index 269616e868a..a7f023ba636 100644 --- a/git-glob/src/lib.rs +++ b/git-glob/src/lib.rs @@ -14,78 +14,7 @@ pub struct Pattern { pub first_wildcard_pos: Option, } -pub mod pattern { - use crate::Pattern; - use bitflags::bitflags; - use bstr::BStr; - - bitflags! { - #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] - pub struct Mode: u32 { - /// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one. - const NO_SUB_DIR = 1 << 0; - /// A pattern that is '*literal', meaning that it ends with what's given here - const ENDS_WITH = 1 << 1; - /// The pattern must match a directory, and not a file. - const MUST_BE_DIR = 1 << 2; - /// The pattern matches, but should be negated. Note that this mode has to be checked and applied by the caller. - const NEGATIVE = 1 << 3; - } - } - bitflags! { - #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] - pub struct MatchOptions: u8 { - /// Let globs not match the slash `/` literal. - const SLASH_IS_LITERAL = 1 << 0; - /// Match case insensitively for ascii characters only. - const IGNORE_CASE = 1 << 1; - } - } - - pub enum Case { - /// The case affects the match - Sensitive, - /// Ignore the case of ascii characters. - Fold, - } - - impl Pattern { - pub fn from_bytes(text: &[u8]) -> Option { - crate::parse::pattern(text).map(|(text, mode, first_wildcard_pos)| Pattern { - text, - mode, - first_wildcard_pos, - }) - } - - /// Return true if a match is negated. - pub fn is_negative(&self) -> bool { - self.mode.contains(Mode::NEGATIVE) - } - - /// Match the given `path` which takes slashes (and only slashes) literally. - /// We may take various shortcuts which is when `basename_start_pos` and `is_dir` come into play. - /// Lastly, case insensitive matches are supported as well. - pub fn matches_path( - &self, - _path: &BStr, - _basename_start_pos: Option, - _is_dir: bool, - case: Case, - ) -> bool { - let _flags = MatchOptions::SLASH_IS_LITERAL - | match case { - Case::Fold => MatchOptions::IGNORE_CASE, - Case::Sensitive => MatchOptions::empty(), - }; - false - } - - pub fn matches(&self, _value: &BStr, _options: MatchOptions) -> bool { - todo!() - } - } -} +pub mod pattern; mod parse; pub use parse::pattern as parse; diff --git a/git-glob/src/pattern.rs b/git-glob/src/pattern.rs new file mode 100644 index 00000000000..8afaa3c0ea8 --- /dev/null +++ b/git-glob/src/pattern.rs @@ -0,0 +1,104 @@ +use crate::{pattern, Pattern}; +use bitflags::bitflags; +use bstr::BStr; + +bitflags! { + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + pub struct Mode: u32 { + /// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one. + const NO_SUB_DIR = 1 << 0; + /// A pattern that is '*literal', meaning that it ends with what's given here + const ENDS_WITH = 1 << 1; + /// The pattern must match a directory, and not a file. + const MUST_BE_DIR = 1 << 2; + /// The pattern matches, but should be negated. Note that this mode has to be checked and applied by the caller. + const NEGATIVE = 1 << 3; + } +} +bitflags! { + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + pub struct MatchOptions: u8 { + /// Let globs not match the slash `/` literal. + const SLASH_IS_LITERAL = 1 << 0; + /// Match case insensitively for ascii characters only. + const IGNORE_CASE = 1 << 1; + } +} + +pub enum Case { + /// The case affects the match + Sensitive, + /// Ignore the case of ascii characters. + Fold, +} + +impl Pattern { + pub fn from_bytes(text: &[u8]) -> Option { + crate::parse::pattern(text).map(|(text, mode, first_wildcard_pos)| Pattern { + text, + mode, + first_wildcard_pos, + }) + } + + /// Return true if a match is negated. + pub fn is_negative(&self) -> bool { + self.mode.contains(Mode::NEGATIVE) + } + + /// Match the given `path` which takes slashes (and only slashes) literally. + /// We may take various shortcuts which is when `basename_start_pos` and `is_dir` come into play. + /// Lastly, case insensitive matches are supported as well. + pub fn matches_path<'a>( + &self, + path: impl Into<&'a BStr>, + basename_start_pos: Option, + is_dir: bool, + case: Case, + ) -> bool { + if !is_dir && self.mode.contains(pattern::Mode::MUST_BE_DIR) { + return false; + } + + let flags = MatchOptions::SLASH_IS_LITERAL + | match case { + Case::Fold => MatchOptions::IGNORE_CASE, + Case::Sensitive => MatchOptions::empty(), + }; + let path = path.into(); + + if self.mode.contains(pattern::Mode::NO_SUB_DIR) { + let basename = &path[basename_start_pos.unwrap_or_default()..]; + self.matches(basename, flags) + } else { + // TODO + false + } + } + + pub fn matches(&self, value: &BStr, options: MatchOptions) -> bool { + match self.first_wildcard_pos { + // "*literal" case + Some(pos) if self.mode.contains(pattern::Mode::ENDS_WITH) => { + let text = &self.text[pos + 1..]; + if options.contains(MatchOptions::IGNORE_CASE) { + value + .len() + .checked_sub(text.len()) + .map(|start| text.eq_ignore_ascii_case(&value[start..])) + .unwrap_or(false) + } else { + value.ends_with(text.as_ref()) + } + } + Some(_pos) => todo!("actual wildcards for basename: {}", _pos), + None => { + if options.contains(MatchOptions::IGNORE_CASE) { + self.text.eq_ignore_ascii_case(value) + } else { + self.text == value + } + } + } + } +} diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index c4420098afc..ee2001a1b8a 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -1,5 +1,6 @@ use bstr::{BStr, ByteSlice}; use git_glob::pattern; +use git_glob::pattern::Case; use std::collections::BTreeSet; #[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Copy, Clone)] @@ -62,11 +63,11 @@ fn compare_baseline_with_ours() { "baseline for matches must indeed be {} - check baseline and git version: {:?}", expected_matches, m ); - let pattern = git_glob::Pattern::from_bytes(pattern).expect("parsing works"); + let pattern = pat(pattern); let actual_match = pattern.matches_path( value, - value.rfind_byte(b'/').map(|pos| pos + 1), - false, // TODO: does it make sense to pretent it is a dir and see what happens? + basename_start_pos(value), + false, // TODO: does it make sense to pretend it is a dir and see what happens? pattern::Case::Sensitive, ); if actual_match == is_match { @@ -79,9 +80,54 @@ fn compare_baseline_with_ours() { } #[test] -#[ignore] -fn check_case_insensitive() {} +fn non_dirs_for_must_be_dir_patterns_are_ignored() { + let pattern = pat("hello/"); + + assert!(pattern.mode.contains(pattern::Mode::MUST_BE_DIR)); + assert_eq!( + pattern.text, "hello", + "a dir pattern doesn't actually end with the trailing slash" + ); + let path = "hello"; + assert!( + !pattern.matches_path(path, None, false /* is-dir */, Case::Sensitive), + "non-dirs never match a dir pattern" + ); + assert!( + pattern.matches_path(path, None, true /* is-dir */, Case::Sensitive), + "dirs can match a dir pattern with the normal rules" + ); +} + +#[test] +fn case_insensitive() { + let pattern = pat("foo"); + assert!(pattern.matches_path("FoO", None, false, Case::Fold)); + assert!(!pattern.matches_path("FoOo", None, false, Case::Fold)); + assert!(!pattern.matches_path("Foo", None, false, Case::Sensitive)); + assert!(pattern.matches_path("foo", None, false, Case::Sensitive)); +} + +#[test] +fn glob_and_literal_is_ends_with() { + let pattern = pat("*foo"); + assert!(pattern.matches_path("FoO", None, false, Case::Fold)); + assert!(pattern.matches_path("BarFoO", None, false, Case::Fold)); + assert!(!pattern.matches_path("BarFoOo", None, false, Case::Fold)); + assert!(!pattern.matches_path("Foo", None, false, Case::Sensitive)); + assert!(!pattern.matches_path("BarFoo", None, false, Case::Sensitive)); + assert!(pattern.matches_path("barfoo", None, false, Case::Sensitive)); + assert!(!pattern.matches_path("barfooo", None, false, Case::Sensitive)); +} #[test] #[ignore] fn negated_patterns() {} + +fn pat<'a>(pattern: impl Into<&'a BStr>) -> git_glob::Pattern { + git_glob::Pattern::from_bytes(pattern.into()).expect("parsing works") +} + +fn basename_start_pos(value: &BStr) -> Option { + value.rfind_byte(b'/').map(|pos| pos + 1) +} From 3956480e6fb5f4766a67ebf2860cae2f48125594 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 08:29:47 +0800 Subject: [PATCH 19/56] Keep track of absolute patterns, those that have to start with it (#301) --- git-attributes/tests/parse/attribute.rs | 6 +++++- git-attributes/tests/parse/ignore.rs | 8 ++++---- git-glob/src/parse.rs | 4 ++++ git-glob/src/pattern.rs | 10 ++++++++-- git-glob/tests/matching/mod.rs | 13 +++++++++++-- git-glob/tests/parse/mod.rs | 13 +++++++++++++ 6 files changed, 45 insertions(+), 9 deletions(-) diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index 3fc7be72d7b..292b51fd624 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -29,7 +29,11 @@ fn line_numbers_are_counted_correctly() { ), (pattern(r"!foo.html", Mode::NO_SUB_DIR, None), vec![set("x")], 8), (pattern(r"#a/path", Mode::empty(), None), vec![unset("a")], 10), - (pattern(r"/*", Mode::empty(), Some(1)), vec![unspecified("b")], 11), + ( + pattern(r"*", Mode::ABSOLUTE | Mode::NO_SUB_DIR | Mode::ENDS_WITH, Some(0)), + vec![unspecified("b")], + 11 + ), ] ); } diff --git a/git-attributes/tests/parse/ignore.rs b/git-attributes/tests/parse/ignore.rs index fd6a2b9200d..b02db516697 100644 --- a/git-attributes/tests/parse/ignore.rs +++ b/git-attributes/tests/parse/ignore.rs @@ -21,10 +21,10 @@ fn line_numbers_are_counted_correctly() { ("*.[oa]".into(), Mode::NO_SUB_DIR, 2), ("*.html".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH, 5), ("foo.html".into(), Mode::NO_SUB_DIR | Mode::NEGATIVE, 8), - ("/*".into(), Mode::empty(), 11), - ("/foo".into(), Mode::NEGATIVE, 12), - ("/foo/*".into(), Mode::empty(), 13), - ("/foo/bar".into(), Mode::NEGATIVE, 14) + ("*".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH | Mode::ABSOLUTE, 11), + ("foo".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR | Mode::ABSOLUTE, 12), + ("foo/*".into(), Mode::ABSOLUTE, 13), + ("foo/bar".into(), Mode::ABSOLUTE | Mode::NEGATIVE, 14) ] ); } diff --git a/git-glob/src/parse.rs b/git-glob/src/parse.rs index 294867938d5..150e9c4e748 100644 --- a/git-glob/src/parse.rs +++ b/git-glob/src/parse.rs @@ -24,6 +24,10 @@ pub fn pattern(mut pat: &[u8]) -> Option<(BString, pattern::Mode, Option) if pat.iter().all(|b| b.is_ascii_whitespace()) { return None; } + if pat.first() == Some(&b'/') { + mode |= Mode::ABSOLUTE; + pat = &pat[1..]; + } let mut line = truncate_non_escaped_trailing_spaces(pat); if line.last() == Some(&b'/') { mode |= Mode::MUST_BE_DIR; diff --git a/git-glob/src/pattern.rs b/git-glob/src/pattern.rs index 8afaa3c0ea8..29452b76af7 100644 --- a/git-glob/src/pattern.rs +++ b/git-glob/src/pattern.rs @@ -13,6 +13,8 @@ bitflags! { const MUST_BE_DIR = 1 << 2; /// The pattern matches, but should be negated. Note that this mode has to be checked and applied by the caller. const NEGATIVE = 1 << 3; + /// The pattern starts with a slash and thus matches only from the beginning. + const ABSOLUTE = 1 << 4; } } bitflags! { @@ -46,9 +48,13 @@ impl Pattern { self.mode.contains(Mode::NEGATIVE) } - /// Match the given `path` which takes slashes (and only slashes) literally. + /// Match the given `path` which takes slashes (and only slashes) literally, and is relative to the repository root. + /// Note that `path` is assumed to be sharing the base with the pattern already so they can be reasonably compared. + /// /// We may take various shortcuts which is when `basename_start_pos` and `is_dir` come into play. - /// Lastly, case insensitive matches are supported as well. + /// `basename_start_pos` is the index at which the `path`'s basename starts. + /// + /// Lastly, `case` folding can be configured as well. pub fn matches_path<'a>( &self, path: impl Into<&'a BStr>, diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index ee2001a1b8a..8be55b3938c 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -100,7 +100,7 @@ fn non_dirs_for_must_be_dir_patterns_are_ignored() { } #[test] -fn case_insensitive() { +fn basename_case_insensitive() { let pattern = pat("foo"); assert!(pattern.matches_path("FoO", None, false, Case::Fold)); assert!(!pattern.matches_path("FoOo", None, false, Case::Fold)); @@ -109,7 +109,16 @@ fn case_insensitive() { } #[test] -fn glob_and_literal_is_ends_with() { +fn absolute_basename_matches_only_from_beginning() { + let pattern = pat("/foo"); + assert!(pattern.matches_path("FoO", None, false, Case::Fold)); + assert!(!pattern.matches_path("bar/Foo", None, false, Case::Fold)); + assert!(pattern.matches_path("foo", None, false, Case::Sensitive)); + assert!(!pattern.matches_path("bar/foo", None, false, Case::Sensitive)); +} + +#[test] +fn basename_glob_and_literal_is_ends_with() { let pattern = pat("*foo"); assert!(pattern.matches_path("FoO", None, false, Case::Fold)); assert!(pattern.matches_path("BarFoO", None, false, Case::Fold)); diff --git a/git-glob/tests/parse/mod.rs b/git-glob/tests/parse/mod.rs index 00adf786c19..ee4f233a759 100644 --- a/git-glob/tests/parse/mod.rs +++ b/git-glob/tests/parse/mod.rs @@ -71,6 +71,19 @@ fn leading_exclamation_marks_can_be_escaped_with_backslash() { ); } +#[test] +fn leading_slashes_mark_patterns_as_absolute() { + assert_eq!( + git_glob::parse(br"/absolute"), + Some(("absolute".into(), Mode::NO_SUB_DIR | Mode::ABSOLUTE, None)) + ); + + assert_eq!( + git_glob::parse(br"/absolute/path"), + Some(("absolute/path".into(), Mode::ABSOLUTE, None)) + ); +} + #[test] fn absence_of_sub_directories_are_marked() { assert_eq!(git_glob::parse(br"a/b"), Some(("a/b".into(), Mode::empty(), None))); From df9778b924610f6a82d93cdf12cfddda60e61789 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 08:36:11 +0800 Subject: [PATCH 20/56] prepare for handling absolute patterns (#301) It's unclear if this is the way as it deviates from what git does, but it's probably OK to change the logic into something more intuitive as long as results are the same. --- git-glob/src/pattern.rs | 1 + git-glob/tests/matching/mod.rs | 23 ++++++++++++++++++----- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/git-glob/src/pattern.rs b/git-glob/src/pattern.rs index 29452b76af7..23a1845cb52 100644 --- a/git-glob/src/pattern.rs +++ b/git-glob/src/pattern.rs @@ -24,6 +24,7 @@ bitflags! { const SLASH_IS_LITERAL = 1 << 0; /// Match case insensitively for ascii characters only. const IGNORE_CASE = 1 << 1; + // TODO: Patterns match from the beginning only. } } diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index 8be55b3938c..ce808eb9a99 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -109,12 +109,13 @@ fn basename_case_insensitive() { } #[test] +#[ignore] fn absolute_basename_matches_only_from_beginning() { - let pattern = pat("/foo"); - assert!(pattern.matches_path("FoO", None, false, Case::Fold)); - assert!(!pattern.matches_path("bar/Foo", None, false, Case::Fold)); - assert!(pattern.matches_path("foo", None, false, Case::Sensitive)); - assert!(!pattern.matches_path("bar/foo", None, false, Case::Sensitive)); + let pattern = "/foo"; + assert!(match_path(pattern, "FoO", false, Case::Fold)); + assert!(!match_path(pattern, "bar/Foo", false, Case::Fold)); + assert!(match_path(pattern, "foo", false, Case::Sensitive)); + assert!(!match_path(pattern, "bar/foo", false, Case::Sensitive)); } #[test] @@ -129,6 +130,12 @@ fn basename_glob_and_literal_is_ends_with() { assert!(!pattern.matches_path("barfooo", None, false, Case::Sensitive)); } +#[test] +#[ignore] +fn absolute_basename_glob_and_literal_is_ends_with() { + let _pattern = pat("/*foo"); +} + #[test] #[ignore] fn negated_patterns() {} @@ -137,6 +144,12 @@ fn pat<'a>(pattern: impl Into<&'a BStr>) -> git_glob::Pattern { git_glob::Pattern::from_bytes(pattern.into()).expect("parsing works") } +fn match_path<'a>(pattern: impl Into<&'a BStr>, path: impl Into<&'a BStr>, is_dir: bool, case: Case) -> bool { + let pattern = pat(pattern.into()); + let path = path.into(); + pattern.matches_path(path, basename_start_pos(path), is_dir, case) +} + fn basename_start_pos(value: &BStr) -> Option { value.rfind_byte(b'/').map(|pos| pos + 1) } From f2f3f53574b4c0b5ba85780b134825f9128fa64f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 09:37:46 +0800 Subject: [PATCH 21/56] refactor (#301) --- git-glob/tests/fixtures/make_baseline.sh | 2 ++ git-glob/tests/matching/mod.rs | 46 +++++++++++++++--------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/git-glob/tests/fixtures/make_baseline.sh b/git-glob/tests/fixtures/make_baseline.sh index 8ea3991475c..1a61b6c2533 100644 --- a/git-glob/tests/fixtures/make_baseline.sh +++ b/git-glob/tests/fixtures/make_baseline.sh @@ -74,6 +74,8 @@ done <>git-baseline.match *hello.txt hello.txt *hello.txt gareth_says_hello.txt *hello.txt some/path/to/hello.txt +/*foo.txt foo.txt +/*foo.txt hello/foo.txt *hello.txt some\path\to\hello.txt *hello.txt an/absolute/path/to/hello.txt *some/path/to/hello.txt some/path/to/hello.txt diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index ce808eb9a99..ff723565069 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -101,39 +101,47 @@ fn non_dirs_for_must_be_dir_patterns_are_ignored() { #[test] fn basename_case_insensitive() { - let pattern = pat("foo"); - assert!(pattern.matches_path("FoO", None, false, Case::Fold)); - assert!(!pattern.matches_path("FoOo", None, false, Case::Fold)); - assert!(!pattern.matches_path("Foo", None, false, Case::Sensitive)); - assert!(pattern.matches_path("foo", None, false, Case::Sensitive)); + let pattern = "foo"; + assert!(match_file(pattern, "FoO", Case::Fold)); + assert!(!match_file(pattern, "FoOo", Case::Fold)); + assert!(!match_file(pattern, "Foo", Case::Sensitive)); + assert!(match_file(pattern, "foo", Case::Sensitive)); } #[test] #[ignore] fn absolute_basename_matches_only_from_beginning() { let pattern = "/foo"; - assert!(match_path(pattern, "FoO", false, Case::Fold)); - assert!(!match_path(pattern, "bar/Foo", false, Case::Fold)); - assert!(match_path(pattern, "foo", false, Case::Sensitive)); - assert!(!match_path(pattern, "bar/foo", false, Case::Sensitive)); + assert!(match_file(pattern, "FoO", Case::Fold)); + assert!(!match_file(pattern, "bar/Foo", Case::Fold)); + assert!(match_file(pattern, "foo", Case::Sensitive)); + assert!(!match_file(pattern, "bar/foo", Case::Sensitive)); } #[test] fn basename_glob_and_literal_is_ends_with() { - let pattern = pat("*foo"); - assert!(pattern.matches_path("FoO", None, false, Case::Fold)); - assert!(pattern.matches_path("BarFoO", None, false, Case::Fold)); - assert!(!pattern.matches_path("BarFoOo", None, false, Case::Fold)); - assert!(!pattern.matches_path("Foo", None, false, Case::Sensitive)); - assert!(!pattern.matches_path("BarFoo", None, false, Case::Sensitive)); - assert!(pattern.matches_path("barfoo", None, false, Case::Sensitive)); - assert!(!pattern.matches_path("barfooo", None, false, Case::Sensitive)); + let pattern = "*foo"; + assert!(match_file(pattern, "FoO", Case::Fold)); + assert!(match_file(pattern, "BarFoO", Case::Fold)); + assert!(!match_file(pattern, "BarFoOo", Case::Fold)); + assert!(!match_file(pattern, "Foo", Case::Sensitive)); + assert!(!match_file(pattern, "BarFoo", Case::Sensitive)); + assert!(match_file(pattern, "barfoo", Case::Sensitive)); + assert!(!match_file(pattern, "barfooo", Case::Sensitive)); } #[test] #[ignore] fn absolute_basename_glob_and_literal_is_ends_with() { let _pattern = pat("/*foo"); + + assert!(match_file(pattern, "FoO", Case::Fold)); + assert!(match_file(pattern, "BarFoO", Case::Fold)); + assert!(!match_file(pattern, "BarFoOo", Case::Fold)); + assert!(!match_file(pattern, "Foo", Case::Sensitive)); + assert!(!match_file(pattern, "BarFoo", Case::Sensitive)); + assert!(match_file(pattern, "barfoo", Case::Sensitive)); + assert!(!match_file(pattern, "barfooo", Case::Sensitive)); } #[test] @@ -144,6 +152,10 @@ fn pat<'a>(pattern: impl Into<&'a BStr>) -> git_glob::Pattern { git_glob::Pattern::from_bytes(pattern.into()).expect("parsing works") } +fn match_file<'a>(pattern: impl Into<&'a BStr>, path: impl Into<&'a BStr>, case: Case) -> bool { + match_path(pattern, path, false, case) +} + fn match_path<'a>(pattern: impl Into<&'a BStr>, path: impl Into<&'a BStr>, is_dir: bool, case: Case) -> bool { let pattern = pat(pattern.into()); let path = path.into(); From 263298876d1b10b12011c2a221b67126d6d8202d Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 09:54:28 +0800 Subject: [PATCH 22/56] get to the point where globs probably should have a base (#301) That way, we can assume to get repo-relative paths all the time and skip stripping the base unless it's truly required. --- git-glob/src/pattern.rs | 9 ++++++--- git-glob/tests/matching/mod.rs | 20 +++++++++++++++++--- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/git-glob/src/pattern.rs b/git-glob/src/pattern.rs index 23a1845cb52..b8ef1622ea3 100644 --- a/git-glob/src/pattern.rs +++ b/git-glob/src/pattern.rs @@ -24,7 +24,6 @@ bitflags! { const SLASH_IS_LITERAL = 1 << 0; /// Match case insensitively for ascii characters only. const IGNORE_CASE = 1 << 1; - // TODO: Patterns match from the beginning only. } } @@ -75,7 +74,11 @@ impl Pattern { let path = path.into(); if self.mode.contains(pattern::Mode::NO_SUB_DIR) { - let basename = &path[basename_start_pos.unwrap_or_default()..]; + let basename = if self.mode.contains(pattern::Mode::ABSOLUTE) { + path + } else { + &path[basename_start_pos.unwrap_or_default()..] + }; self.matches(basename, flags) } else { // TODO @@ -85,7 +88,7 @@ impl Pattern { pub fn matches(&self, value: &BStr, options: MatchOptions) -> bool { match self.first_wildcard_pos { - // "*literal" case + // "*literal" case, overrides starts-with Some(pos) if self.mode.contains(pattern::Mode::ENDS_WITH) => { let text = &self.text[pos + 1..]; if options.contains(MatchOptions::IGNORE_CASE) { diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index ff723565069..510dd036d3f 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -109,7 +109,6 @@ fn basename_case_insensitive() { } #[test] -#[ignore] fn absolute_basename_matches_only_from_beginning() { let pattern = "/foo"; assert!(match_file(pattern, "FoO", Case::Fold)); @@ -118,6 +117,16 @@ fn absolute_basename_matches_only_from_beginning() { assert!(!match_file(pattern, "bar/foo", Case::Sensitive)); } +#[test] +#[ignore] +fn absolute_path_matches_only_from_beginning() { + let pattern = "/bar/foo"; + assert!(!match_file(pattern, "FoO", Case::Fold)); + assert!(match_file(pattern, "bar/Foo", Case::Fold)); + assert!(!match_file(pattern, "foo", Case::Sensitive)); + assert!(match_file(pattern, "bar/foo", Case::Sensitive)); +} + #[test] fn basename_glob_and_literal_is_ends_with() { let pattern = "*foo"; @@ -128,12 +137,14 @@ fn basename_glob_and_literal_is_ends_with() { assert!(!match_file(pattern, "BarFoo", Case::Sensitive)); assert!(match_file(pattern, "barfoo", Case::Sensitive)); assert!(!match_file(pattern, "barfooo", Case::Sensitive)); + + assert!(match_file(pattern, "bar/foo", Case::Sensitive)); + assert!(match_file(pattern, "bar/bazfoo", Case::Sensitive)); } #[test] -#[ignore] fn absolute_basename_glob_and_literal_is_ends_with() { - let _pattern = pat("/*foo"); + let pattern = "/*foo"; assert!(match_file(pattern, "FoO", Case::Fold)); assert!(match_file(pattern, "BarFoO", Case::Fold)); @@ -142,6 +153,9 @@ fn absolute_basename_glob_and_literal_is_ends_with() { assert!(!match_file(pattern, "BarFoo", Case::Sensitive)); assert!(match_file(pattern, "barfoo", Case::Sensitive)); assert!(!match_file(pattern, "barfooo", Case::Sensitive)); + + assert!(match_file(pattern, "bar/foo", Case::Sensitive)); + assert!(match_file(pattern, "bar/bazfoo", Case::Sensitive)); } #[test] From 3d58db8a9abfb91600216b8fc6f4109f5289d776 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 10:32:51 +0800 Subject: [PATCH 23/56] a way to set a globs base path (#301) --- git-attributes/tests/parse/attribute.rs | 1 + git-glob/src/lib.rs | 6 +++ git-glob/src/pattern.rs | 20 +++++++- git-glob/tests/fixtures/make_baseline.sh | 4 +- git-glob/tests/matching/mod.rs | 62 +++++++++++++++++++----- 5 files changed, 78 insertions(+), 15 deletions(-) diff --git a/git-attributes/tests/parse/attribute.rs b/git-attributes/tests/parse/attribute.rs index 292b51fd624..abc77196239 100644 --- a/git-attributes/tests/parse/attribute.rs +++ b/git-attributes/tests/parse/attribute.rs @@ -272,6 +272,7 @@ fn pattern(name: &str, flags: git_glob::pattern::Mode, first_wildcard_pos: Optio text: name.into(), mode: flags, first_wildcard_pos, + base_path: None, }) } diff --git a/git-glob/src/lib.rs b/git-glob/src/lib.rs index a7f023ba636..300a7986390 100644 --- a/git-glob/src/lib.rs +++ b/git-glob/src/lib.rs @@ -3,6 +3,9 @@ use bstr::BString; +/// A glob pattern at a particular base path. +/// +/// This closely models how patterns appear in a directory hierarchy of include or attribute files. #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct Pattern { @@ -12,6 +15,9 @@ pub struct Pattern { pub mode: pattern::Mode, /// The position in `text` with the first wildcard character, or `None` if there is no wildcard at all. pub first_wildcard_pos: Option, + /// The relative base at which this pattern resides, with trailing slash, using slashes as path separator. + /// If `None`, the pattern is considered to be at the root of the repository. + pub base_path: Option, } pub mod pattern; diff --git a/git-glob/src/pattern.rs b/git-glob/src/pattern.rs index b8ef1622ea3..6c12032ad3f 100644 --- a/git-glob/src/pattern.rs +++ b/git-glob/src/pattern.rs @@ -1,6 +1,6 @@ use crate::{pattern, Pattern}; use bitflags::bitflags; -use bstr::BStr; +use bstr::{BStr, BString, ByteSlice}; bitflags! { #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] @@ -40,6 +40,7 @@ impl Pattern { text, mode, first_wildcard_pos, + base_path: None, }) } @@ -48,6 +49,18 @@ impl Pattern { self.mode.contains(Mode::NEGATIVE) } + /// Set the base path of the pattern. + /// Must be a slash-separated relative path with a trailing slash. + /// + /// Use this upon creation of the pattern when the source file is known. + pub fn with_base(mut self, path: impl Into) -> Self { + let path = path.into(); + debug_assert!(path.ends_with(b"/"), "base must end with a trailing slash"); + debug_assert!(!path.starts_with(b"/"), "base must be relative"); + self.base_path = Some(path); + self + } + /// Match the given `path` which takes slashes (and only slashes) literally, and is relative to the repository root. /// Note that `path` is assumed to be sharing the base with the pattern already so they can be reasonably compared. /// @@ -72,6 +85,11 @@ impl Pattern { Case::Sensitive => MatchOptions::empty(), }; let path = path.into(); + debug_assert_eq!( + basename_start_pos, + path.rfind_byte(b'/').map(|p| p + 1), + "BUG: invalid cached basename_start_pos provided" + ); if self.mode.contains(pattern::Mode::NO_SUB_DIR) { let basename = if self.mode.contains(pattern::Mode::ABSOLUTE) { diff --git a/git-glob/tests/fixtures/make_baseline.sh b/git-glob/tests/fixtures/make_baseline.sh index 1a61b6c2533..38948ed4e55 100644 --- a/git-glob/tests/fixtures/make_baseline.sh +++ b/git-glob/tests/fixtures/make_baseline.sh @@ -12,6 +12,8 @@ while read -r pattern nomatch; do echo "$pattern" > .gitignore git check-ignore -vn "$nomatch" 2>&1 || : done <>git-baseline.nmatch +/*foo.txt hello/foo.txt +bar/foo baz/bar/foo *hello.txt hello.txt-and-then-some *hello.txt goodbye.txt *some/path/to/hello.txt some/path/to/hello.txt-and-then-some @@ -69,13 +71,13 @@ while read -r pattern match; do echo "$pattern" > .gitignore git check-ignore -vn "$match" 2>&1 || : done <>git-baseline.match +/*foo.txt barfoo.txt *.c mozilla-sha1/sha1.c *.rs .rs *hello.txt hello.txt *hello.txt gareth_says_hello.txt *hello.txt some/path/to/hello.txt /*foo.txt foo.txt -/*foo.txt hello/foo.txt *hello.txt some\path\to\hello.txt *hello.txt an/absolute/path/to/hello.txt *some/path/to/hello.txt some/path/to/hello.txt diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index 510dd036d3f..41f33d45222 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -45,7 +45,8 @@ impl<'a> Baseline<'a> { #[ignore] fn compare_baseline_with_ours() { let dir = git_testtools::scripted_fixture_repo_read_only("make_baseline.sh").unwrap(); - let (mut total_matches, mut total_correct) = (0, 0); + let (mut total_matches, mut total_correct, mut panics) = (0, 0, 0); + let mut mismatches = Vec::new(); for (input_file, expected_matches) in &[("git-baseline.match", true), ("git-baseline.nmatch", false)] { let input = std::fs::read(dir.join(*input_file)).unwrap(); let mut seen = BTreeSet::default(); @@ -63,20 +64,38 @@ fn compare_baseline_with_ours() { "baseline for matches must indeed be {} - check baseline and git version: {:?}", expected_matches, m ); - let pattern = pat(pattern); - let actual_match = pattern.matches_path( - value, - basename_start_pos(value), - false, // TODO: does it make sense to pretend it is a dir and see what happens? - pattern::Case::Sensitive, - ); - if actual_match == is_match { - total_correct += 1; - } + match std::panic::catch_unwind(|| { + let pattern = pat(pattern); + pattern.matches_path( + value, + basename_start_pos(value), + false, // TODO: does it make sense to pretend it is a dir and see what happens? + pattern::Case::Sensitive, + ) + }) { + Ok(actual_match) => { + if actual_match == is_match { + total_correct += 1; + } else { + mismatches.push((pattern.to_owned(), value.to_owned(), is_match)); + } + } + Err(_) => { + panics += 1; + continue; + } + }; } } - assert_eq!(total_correct, total_matches, "We perfectly agree with git here"); + dbg!(panics); + dbg!(mismatches); + assert_eq!( + total_correct, + total_matches - panics, + "We perfectly agree with git here" + ); + assert_eq!(panics, 0); } #[test] @@ -100,12 +119,14 @@ fn non_dirs_for_must_be_dir_patterns_are_ignored() { } #[test] -fn basename_case_insensitive() { +fn basename_matches_from_end() { let pattern = "foo"; assert!(match_file(pattern, "FoO", Case::Fold)); assert!(!match_file(pattern, "FoOo", Case::Fold)); assert!(!match_file(pattern, "Foo", Case::Sensitive)); assert!(match_file(pattern, "foo", Case::Sensitive)); + assert!(!match_file(pattern, "Foo", Case::Sensitive)); + assert!(!match_file(pattern, "barfoo", Case::Sensitive)); } #[test] @@ -114,6 +135,7 @@ fn absolute_basename_matches_only_from_beginning() { assert!(match_file(pattern, "FoO", Case::Fold)); assert!(!match_file(pattern, "bar/Foo", Case::Fold)); assert!(match_file(pattern, "foo", Case::Sensitive)); + assert!(!match_file(pattern, "Foo", Case::Sensitive)); assert!(!match_file(pattern, "bar/foo", Case::Sensitive)); } @@ -125,6 +147,20 @@ fn absolute_path_matches_only_from_beginning() { assert!(match_file(pattern, "bar/Foo", Case::Fold)); assert!(!match_file(pattern, "foo", Case::Sensitive)); assert!(match_file(pattern, "bar/foo", Case::Sensitive)); + assert!(!match_file(pattern, "bar/Foo", Case::Sensitive)); +} + +#[test] +#[ignore] +fn relative_path_does_not_match_from_end() { + let pattern = "bar/foo"; + assert!(!match_file(pattern, "FoO", Case::Fold)); + assert!(match_file(pattern, "bar/Foo", Case::Fold)); + assert!(!match_file(pattern, "baz/bar/Foo", Case::Fold)); + assert!(!match_file(pattern, "foo", Case::Sensitive)); + assert!(match_file(pattern, "bar/foo", Case::Sensitive)); + assert!(!match_file(pattern, "baz/bar/foo", Case::Sensitive)); + assert!(!match_file(pattern, "Baz/bar/Foo", Case::Sensitive)); } #[test] From 056b3683eb2d4d4c478ae2655e6ef067d4d0d1e7 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 10:35:03 +0800 Subject: [PATCH 24/56] refactor (#301) --- git-glob/tests/matching/mod.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index 41f33d45222..2c11de087a2 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -120,7 +120,7 @@ fn non_dirs_for_must_be_dir_patterns_are_ignored() { #[test] fn basename_matches_from_end() { - let pattern = "foo"; + let pattern = &pat("foo"); assert!(match_file(pattern, "FoO", Case::Fold)); assert!(!match_file(pattern, "FoOo", Case::Fold)); assert!(!match_file(pattern, "Foo", Case::Sensitive)); @@ -131,7 +131,7 @@ fn basename_matches_from_end() { #[test] fn absolute_basename_matches_only_from_beginning() { - let pattern = "/foo"; + let pattern = &pat("/foo"); assert!(match_file(pattern, "FoO", Case::Fold)); assert!(!match_file(pattern, "bar/Foo", Case::Fold)); assert!(match_file(pattern, "foo", Case::Sensitive)); @@ -142,7 +142,7 @@ fn absolute_basename_matches_only_from_beginning() { #[test] #[ignore] fn absolute_path_matches_only_from_beginning() { - let pattern = "/bar/foo"; + let pattern = &pat("/bar/foo"); assert!(!match_file(pattern, "FoO", Case::Fold)); assert!(match_file(pattern, "bar/Foo", Case::Fold)); assert!(!match_file(pattern, "foo", Case::Sensitive)); @@ -153,7 +153,7 @@ fn absolute_path_matches_only_from_beginning() { #[test] #[ignore] fn relative_path_does_not_match_from_end() { - let pattern = "bar/foo"; + let pattern = &pat("bar/foo"); assert!(!match_file(pattern, "FoO", Case::Fold)); assert!(match_file(pattern, "bar/Foo", Case::Fold)); assert!(!match_file(pattern, "baz/bar/Foo", Case::Fold)); @@ -165,7 +165,7 @@ fn relative_path_does_not_match_from_end() { #[test] fn basename_glob_and_literal_is_ends_with() { - let pattern = "*foo"; + let pattern = &pat("*foo"); assert!(match_file(pattern, "FoO", Case::Fold)); assert!(match_file(pattern, "BarFoO", Case::Fold)); assert!(!match_file(pattern, "BarFoOo", Case::Fold)); @@ -180,7 +180,7 @@ fn basename_glob_and_literal_is_ends_with() { #[test] fn absolute_basename_glob_and_literal_is_ends_with() { - let pattern = "/*foo"; + let pattern = &pat("/*foo"); assert!(match_file(pattern, "FoO", Case::Fold)); assert!(match_file(pattern, "BarFoO", Case::Fold)); @@ -202,12 +202,11 @@ fn pat<'a>(pattern: impl Into<&'a BStr>) -> git_glob::Pattern { git_glob::Pattern::from_bytes(pattern.into()).expect("parsing works") } -fn match_file<'a>(pattern: impl Into<&'a BStr>, path: impl Into<&'a BStr>, case: Case) -> bool { +fn match_file<'a>(pattern: &git_glob::Pattern, path: impl Into<&'a BStr>, case: Case) -> bool { match_path(pattern, path, false, case) } -fn match_path<'a>(pattern: impl Into<&'a BStr>, path: impl Into<&'a BStr>, is_dir: bool, case: Case) -> bool { - let pattern = pat(pattern.into()); +fn match_path<'a>(pattern: &git_glob::Pattern, path: impl Into<&'a BStr>, is_dir: bool, case: Case) -> bool { let path = path.into(); pattern.matches_path(path, basename_start_pos(path), is_dir, case) } From 1b2684892419f234e6006b0f3820341f162dc28b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 10:45:47 +0800 Subject: [PATCH 25/56] test that bases are ignored for basenames (#301) --- git-glob/src/pattern.rs | 7 +++++- git-glob/tests/matching/mod.rs | 46 ++++++++++++++++++++-------------- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/git-glob/src/pattern.rs b/git-glob/src/pattern.rs index 6c12032ad3f..decd60a2653 100644 --- a/git-glob/src/pattern.rs +++ b/git-glob/src/pattern.rs @@ -54,11 +54,16 @@ impl Pattern { /// /// Use this upon creation of the pattern when the source file is known. pub fn with_base(mut self, path: impl Into) -> Self { + self.set_base(path); + self + } + + /// Similar to [`with_base()`][Self::with_base()] but suitable borrowed patterns. + pub fn set_base(&mut self, path: impl Into) { let path = path.into(); debug_assert!(path.ends_with(b"/"), "base must end with a trailing slash"); debug_assert!(!path.starts_with(b"/"), "base must be relative"); self.base_path = Some(path); - self } /// Match the given `path` which takes slashes (and only slashes) literally, and is relative to the repository root. diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index 2c11de087a2..d1578d7a4b3 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -120,34 +120,42 @@ fn non_dirs_for_must_be_dir_patterns_are_ignored() { #[test] fn basename_matches_from_end() { - let pattern = &pat("foo"); - assert!(match_file(pattern, "FoO", Case::Fold)); - assert!(!match_file(pattern, "FoOo", Case::Fold)); - assert!(!match_file(pattern, "Foo", Case::Sensitive)); - assert!(match_file(pattern, "foo", Case::Sensitive)); - assert!(!match_file(pattern, "Foo", Case::Sensitive)); - assert!(!match_file(pattern, "barfoo", Case::Sensitive)); + let mut pattern = pat("foo"); + let pat = &pattern; + assert!(match_file(pat, "FoO", Case::Fold)); + assert!(!match_file(pat, "FoOo", Case::Fold)); + assert!(!match_file(pat, "Foo", Case::Sensitive)); + assert!(match_file(pat, "foo", Case::Sensitive)); + assert!(!match_file(pat, "Foo", Case::Sensitive)); + assert!(!match_file(pat, "barfoo", Case::Sensitive)); + + pattern.set_base("base/"); + let pat = &pattern; + assert!( + match_file(pat, "other/FoO", Case::Fold), + "base is ignored for basename-only patterns" + ); } #[test] fn absolute_basename_matches_only_from_beginning() { - let pattern = &pat("/foo"); - assert!(match_file(pattern, "FoO", Case::Fold)); - assert!(!match_file(pattern, "bar/Foo", Case::Fold)); - assert!(match_file(pattern, "foo", Case::Sensitive)); - assert!(!match_file(pattern, "Foo", Case::Sensitive)); - assert!(!match_file(pattern, "bar/foo", Case::Sensitive)); + let pat = &pat("/foo"); + assert!(match_file(pat, "FoO", Case::Fold)); + assert!(!match_file(pat, "bar/Foo", Case::Fold)); + assert!(match_file(pat, "foo", Case::Sensitive)); + assert!(!match_file(pat, "Foo", Case::Sensitive)); + assert!(!match_file(pat, "bar/foo", Case::Sensitive)); } #[test] #[ignore] fn absolute_path_matches_only_from_beginning() { - let pattern = &pat("/bar/foo"); - assert!(!match_file(pattern, "FoO", Case::Fold)); - assert!(match_file(pattern, "bar/Foo", Case::Fold)); - assert!(!match_file(pattern, "foo", Case::Sensitive)); - assert!(match_file(pattern, "bar/foo", Case::Sensitive)); - assert!(!match_file(pattern, "bar/Foo", Case::Sensitive)); + let pat = &pat("/bar/foo"); + assert!(!match_file(pat, "FoO", Case::Fold)); + assert!(match_file(pat, "bar/Foo", Case::Fold)); + assert!(!match_file(pat, "foo", Case::Sensitive)); + assert!(match_file(pat, "bar/foo", Case::Sensitive)); + assert!(!match_file(pat, "bar/Foo", Case::Sensitive)); } #[test] From 5bf503af86ce0dd4d0a79c9b1a451cf89b494a6e Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 10:58:56 +0800 Subject: [PATCH 26/56] make much clearer how base-path works and put in safe-guards (#301) --- git-glob/src/pattern.rs | 19 +++++++++++-------- git-glob/tests/matching/mod.rs | 23 +++++++++++------------ 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/git-glob/src/pattern.rs b/git-glob/src/pattern.rs index decd60a2653..656901fd79f 100644 --- a/git-glob/src/pattern.rs +++ b/git-glob/src/pattern.rs @@ -54,26 +54,22 @@ impl Pattern { /// /// Use this upon creation of the pattern when the source file is known. pub fn with_base(mut self, path: impl Into) -> Self { - self.set_base(path); - self - } - - /// Similar to [`with_base()`][Self::with_base()] but suitable borrowed patterns. - pub fn set_base(&mut self, path: impl Into) { let path = path.into(); debug_assert!(path.ends_with(b"/"), "base must end with a trailing slash"); debug_assert!(!path.starts_with(b"/"), "base must be relative"); self.base_path = Some(path); + self } /// Match the given `path` which takes slashes (and only slashes) literally, and is relative to the repository root. - /// Note that `path` is assumed to be sharing the base with the pattern already so they can be reasonably compared. + /// Note that `path` is assumed to be relative to the repository, and that our [`base_path`][Self::base_path] + /// is assumed to contain `path`. /// /// We may take various shortcuts which is when `basename_start_pos` and `is_dir` come into play. /// `basename_start_pos` is the index at which the `path`'s basename starts. /// /// Lastly, `case` folding can be configured as well. - pub fn matches_path<'a>( + pub fn matches_repo_relative_path<'a>( &self, path: impl Into<&'a BStr>, basename_start_pos: Option, @@ -95,6 +91,13 @@ impl Pattern { path.rfind_byte(b'/').map(|p| p + 1), "BUG: invalid cached basename_start_pos provided" ); + debug_assert!( + self.base_path + .as_ref() + .map(|base| path.starts_with(base)) + .unwrap_or(true), + "repo-relative paths must be pre-filtered to match our base." + ); if self.mode.contains(pattern::Mode::NO_SUB_DIR) { let basename = if self.mode.contains(pattern::Mode::ABSOLUTE) { diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index d1578d7a4b3..8919b372212 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -66,7 +66,7 @@ fn compare_baseline_with_ours() { ); match std::panic::catch_unwind(|| { let pattern = pat(pattern); - pattern.matches_path( + pattern.matches_repo_relative_path( value, basename_start_pos(value), false, // TODO: does it make sense to pretend it is a dir and see what happens? @@ -109,32 +109,31 @@ fn non_dirs_for_must_be_dir_patterns_are_ignored() { ); let path = "hello"; assert!( - !pattern.matches_path(path, None, false /* is-dir */, Case::Sensitive), + !pattern.matches_repo_relative_path(path, None, false /* is-dir */, Case::Sensitive), "non-dirs never match a dir pattern" ); assert!( - pattern.matches_path(path, None, true /* is-dir */, Case::Sensitive), + pattern.matches_repo_relative_path(path, None, true /* is-dir */, Case::Sensitive), "dirs can match a dir pattern with the normal rules" ); } #[test] fn basename_matches_from_end() { - let mut pattern = pat("foo"); - let pat = &pattern; + let pat = &pat("foo"); assert!(match_file(pat, "FoO", Case::Fold)); assert!(!match_file(pat, "FoOo", Case::Fold)); assert!(!match_file(pat, "Foo", Case::Sensitive)); assert!(match_file(pat, "foo", Case::Sensitive)); assert!(!match_file(pat, "Foo", Case::Sensitive)); assert!(!match_file(pat, "barfoo", Case::Sensitive)); +} - pattern.set_base("base/"); - let pat = &pattern; - assert!( - match_file(pat, "other/FoO", Case::Fold), - "base is ignored for basename-only patterns" - ); +#[test] +#[should_panic] +fn base_path_must_match_or_panic_occours_in_debug_mode() { + let pat = pat("foo").with_base("base/"); + match_file(&pat, "other/FoO", Case::Fold); } #[test] @@ -216,7 +215,7 @@ fn match_file<'a>(pattern: &git_glob::Pattern, path: impl Into<&'a BStr>, case: fn match_path<'a>(pattern: &git_glob::Pattern, path: impl Into<&'a BStr>, is_dir: bool, case: Case) -> bool { let path = path.into(); - pattern.matches_path(path, basename_start_pos(path), is_dir, case) + pattern.matches_repo_relative_path(path, basename_start_pos(path), is_dir, case) } fn basename_start_pos(value: &BStr) -> Option { From 45c62597b50c3c4bac34e20cd2040b08833584cc Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 11:48:22 +0800 Subject: [PATCH 27/56] more non-basename shortcuts, and only wildcard matches left (#301) --- git-glob/src/pattern.rs | 19 +++++-- git-glob/tests/fixtures/make_baseline.sh | 1 + git-glob/tests/matching/mod.rs | 64 +++++++++++++++++++++--- 3 files changed, 71 insertions(+), 13 deletions(-) diff --git a/git-glob/src/pattern.rs b/git-glob/src/pattern.rs index 656901fd79f..721577f9227 100644 --- a/git-glob/src/pattern.rs +++ b/git-glob/src/pattern.rs @@ -101,21 +101,30 @@ impl Pattern { if self.mode.contains(pattern::Mode::NO_SUB_DIR) { let basename = if self.mode.contains(pattern::Mode::ABSOLUTE) { - path + self.base_path + .as_ref() + .and_then(|base| path.strip_prefix(base.as_slice()).map(|b| b.as_bstr())) + .unwrap_or(path) } else { &path[basename_start_pos.unwrap_or_default()..] }; self.matches(basename, flags) } else { - // TODO - false + let path = match self.base_path.as_ref() { + Some(base) => match path.strip_prefix(base.as_slice()) { + Some(path) => path.as_bstr(), + None => return false, + }, + None => path, + }; + self.matches(path, flags) } } - pub fn matches(&self, value: &BStr, options: MatchOptions) -> bool { + fn matches(&self, value: &BStr, options: MatchOptions) -> bool { match self.first_wildcard_pos { // "*literal" case, overrides starts-with - Some(pos) if self.mode.contains(pattern::Mode::ENDS_WITH) => { + Some(pos) if self.mode.contains(pattern::Mode::ENDS_WITH) && !value.contains(&b'/') => { let text = &self.text[pos + 1..]; if options.contains(MatchOptions::IGNORE_CASE) { value diff --git a/git-glob/tests/fixtures/make_baseline.sh b/git-glob/tests/fixtures/make_baseline.sh index 38948ed4e55..946ebf0bbe4 100644 --- a/git-glob/tests/fixtures/make_baseline.sh +++ b/git-glob/tests/fixtures/make_baseline.sh @@ -72,6 +72,7 @@ while read -r pattern match; do git check-ignore -vn "$match" 2>&1 || : done <>git-baseline.match /*foo.txt barfoo.txt +*foo.txt bar/foo.txt *.c mozilla-sha1/sha1.c *.rs .rs *hello.txt hello.txt diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index 8919b372212..5dac0d653d4 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -138,27 +138,43 @@ fn base_path_must_match_or_panic_occours_in_debug_mode() { #[test] fn absolute_basename_matches_only_from_beginning() { - let pat = &pat("/foo"); + let mut pattern = pat("/foo"); + let pat = &pattern; assert!(match_file(pat, "FoO", Case::Fold)); assert!(!match_file(pat, "bar/Foo", Case::Fold)); assert!(match_file(pat, "foo", Case::Sensitive)); assert!(!match_file(pat, "Foo", Case::Sensitive)); assert!(!match_file(pat, "bar/foo", Case::Sensitive)); + + pattern = pattern.with_base("base/"); + let pat = &pattern; + assert!(match_file(pat, "base/FoO", Case::Fold)); + assert!(!match_file(pat, "base/bar/Foo", Case::Fold)); + assert!(match_file(pat, "base/foo", Case::Sensitive)); + assert!(!match_file(pat, "base/Foo", Case::Sensitive)); + assert!(!match_file(pat, "base/bar/foo", Case::Sensitive)); } #[test] -#[ignore] fn absolute_path_matches_only_from_beginning() { - let pat = &pat("/bar/foo"); + let mut pattern = pat("/bar/foo"); + let pat = &pattern; assert!(!match_file(pat, "FoO", Case::Fold)); assert!(match_file(pat, "bar/Foo", Case::Fold)); assert!(!match_file(pat, "foo", Case::Sensitive)); assert!(match_file(pat, "bar/foo", Case::Sensitive)); assert!(!match_file(pat, "bar/Foo", Case::Sensitive)); + + pattern = pattern.with_base("base/"); + let pat = &pattern; + assert!(!match_file(pat, "base/FoO", Case::Fold)); + assert!(match_file(pat, "base/bar/Foo", Case::Fold)); + assert!(!match_file(pat, "base/foo", Case::Sensitive)); + assert!(match_file(pat, "base/bar/foo", Case::Sensitive)); + assert!(!match_file(pat, "base/bar/Foo", Case::Sensitive)); } #[test] -#[ignore] fn relative_path_does_not_match_from_end() { let pattern = &pat("bar/foo"); assert!(!match_file(pattern, "FoO", Case::Fold)); @@ -186,7 +202,24 @@ fn basename_glob_and_literal_is_ends_with() { } #[test] -fn absolute_basename_glob_and_literal_is_ends_with() { +#[ignore] +fn special_cases_from_corpus() { + let pattern = &pat("*some/path/to/hello.txt"); + assert!( + !match_file(pattern, "a/bigger/some/path/to/hello.txt", Case::Sensitive), + "asterisk doesn't match path separators" + ); + + let pattern = &pat("/*foo.txt"); + assert!(match_file(pattern, "hello-foo.txt", Case::Sensitive)); + assert!( + !match_file(pattern, "hello/foo.txt", Case::Sensitive), + "absolute single asterisk doesn't match paths" + ); +} + +#[test] +fn absolute_basename_glob_and_literal_is_ends_with_in_basenames() { let pattern = &pat("/*foo"); assert!(match_file(pattern, "FoO", Case::Fold)); @@ -196,14 +229,29 @@ fn absolute_basename_glob_and_literal_is_ends_with() { assert!(!match_file(pattern, "BarFoo", Case::Sensitive)); assert!(match_file(pattern, "barfoo", Case::Sensitive)); assert!(!match_file(pattern, "barfooo", Case::Sensitive)); +} - assert!(match_file(pattern, "bar/foo", Case::Sensitive)); +#[test] +#[ignore] +fn absolute_basename_glob_and_literal_is_glob_in_paths() { + let pattern = &pat("/*foo"); + + assert!(match_file(pattern, "bar/foo", Case::Sensitive), "* does not match /"); assert!(match_file(pattern, "bar/bazfoo", Case::Sensitive)); } #[test] -#[ignore] -fn negated_patterns() {} +fn negated_patterns_are_handled_by_caller() { + let pattern = &pat("!foo"); + assert!( + match_file(pattern, "foo", Case::Sensitive), + "negative patterns match like any other" + ); + assert!( + pattern.is_negative(), + "the caller checks for the negative flag and acts accordingly" + ); +} fn pat<'a>(pattern: impl Into<&'a BStr>) -> git_glob::Pattern { git_glob::Pattern::from_bytes(pattern.into()).expect("parsing works") From 1ff348c4f09839569dcd8bb93699e7004fa59d4a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 12:04:29 +0800 Subject: [PATCH 28/56] more tests for early exit in case no-wildcard prefix doesn't match (#301) This also allows means 70 out of 150ish test cases still fail. --- git-glob/src/pattern.rs | 16 +++++++++++++++- git-glob/tests/matching/mod.rs | 29 +++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/git-glob/src/pattern.rs b/git-glob/src/pattern.rs index 721577f9227..2946087e826 100644 --- a/git-glob/src/pattern.rs +++ b/git-glob/src/pattern.rs @@ -136,7 +136,21 @@ impl Pattern { value.ends_with(text.as_ref()) } } - Some(_pos) => todo!("actual wildcards for basename: {}", _pos), + Some(pos) => { + if options.contains(MatchOptions::IGNORE_CASE) { + if !value + .get(..pos) + .map_or(false, |value| value.eq_ignore_ascii_case(&self.text[..pos])) + { + return false; + } + } else { + if !value.starts_with(&self.text[..pos]) { + return false; + } + } + todo!("actual wildcard match for '{}' ~= '{}'", self.text, value) + } None => { if options.contains(MatchOptions::IGNORE_CASE) { self.text.eq_ignore_ascii_case(value) diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index 5dac0d653d4..fe705e9c0c3 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -174,6 +174,35 @@ fn absolute_path_matches_only_from_beginning() { assert!(!match_file(pat, "base/bar/Foo", Case::Sensitive)); } +#[test] +fn absolute_path_with_recursive_glob_detects_mismatches_quickly() { + let mut pattern = pat("/bar/foo/**"); + let pat = &pattern; + assert!(!match_file(pat, "FoO", Case::Fold)); + assert!(!match_file(pat, "bar/Fooo", Case::Fold)); + assert!(!match_file(pat, "baz/bar/Foo", Case::Fold)); + + pattern = pattern.with_base("base/"); + let pat = &pattern; + assert!(!match_file(pat, "base/FoO", Case::Fold)); + assert!(!match_file(pat, "base/bar/Fooo", Case::Fold)); + assert!(!match_file(pat, "base/baz/bar/foo", Case::Sensitive)); +} + +#[test] +#[ignore] +fn absolute_path_with_recursive_glob_can_do_case_insensitive_prefix_search() { + let mut pattern = pat("/bar/foo/**"); + let pat = &pattern; + assert!(!match_file(pat, "bar/Foo/match", Case::Sensitive)); + assert!(match_file(pat, "bar/Foo/match", Case::Fold)); + + pattern = pattern.with_base("base/"); + let pat = &pattern; + assert!(!match_file(pat, "base/bar/Foo/match", Case::Sensitive)); + assert!(match_file(pat, "base/bar/Foo/match", Case::Fold)); +} + #[test] fn relative_path_does_not_match_from_end() { let pattern = &pat("bar/foo"); From 683233e86dab36cc438bed0f8b0338eb767f57a0 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 12:06:44 +0800 Subject: [PATCH 29/56] thanks clippy --- git-glob/src/pattern.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/git-glob/src/pattern.rs b/git-glob/src/pattern.rs index 2946087e826..5969355c273 100644 --- a/git-glob/src/pattern.rs +++ b/git-glob/src/pattern.rs @@ -144,10 +144,8 @@ impl Pattern { { return false; } - } else { - if !value.starts_with(&self.text[..pos]) { - return false; - } + } else if !value.starts_with(&self.text[..pos]) { + return false; } todo!("actual wildcard match for '{}' ~= '{}'", self.text, value) } From 04ca8349e326f7b7505a9ea49a401565259f21dc Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 13:18:00 +0800 Subject: [PATCH 30/56] frame for wildmatch function and its tests (#301) --- git-glob/src/lib.rs | 3 +++ git-glob/src/pattern.rs | 27 +++++++++------------------ git-glob/src/wildmatch.rs | 19 +++++++++++++++++++ git-glob/tests/glob.rs | 1 + git-glob/tests/wildmatch/mod.rs | 0 5 files changed, 32 insertions(+), 18 deletions(-) create mode 100644 git-glob/src/wildmatch.rs create mode 100644 git-glob/tests/wildmatch/mod.rs diff --git a/git-glob/src/lib.rs b/git-glob/src/lib.rs index 300a7986390..dfc22868ffb 100644 --- a/git-glob/src/lib.rs +++ b/git-glob/src/lib.rs @@ -22,5 +22,8 @@ pub struct Pattern { pub mod pattern; +pub mod wildmatch; +pub use wildmatch::function::wildmatch; + mod parse; pub use parse::pattern as parse; diff --git a/git-glob/src/pattern.rs b/git-glob/src/pattern.rs index 5969355c273..3da7b62afe8 100644 --- a/git-glob/src/pattern.rs +++ b/git-glob/src/pattern.rs @@ -1,4 +1,4 @@ -use crate::{pattern, Pattern}; +use crate::{pattern, wildmatch, Pattern}; use bitflags::bitflags; use bstr::{BStr, BString, ByteSlice}; @@ -17,15 +17,6 @@ bitflags! { const ABSOLUTE = 1 << 4; } } -bitflags! { - #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] - pub struct MatchOptions: u8 { - /// Let globs not match the slash `/` literal. - const SLASH_IS_LITERAL = 1 << 0; - /// Match case insensitively for ascii characters only. - const IGNORE_CASE = 1 << 1; - } -} pub enum Case { /// The case affects the match @@ -80,10 +71,10 @@ impl Pattern { return false; } - let flags = MatchOptions::SLASH_IS_LITERAL + let flags = wildmatch::Mode::SLASH_IS_LITERAL | match case { - Case::Fold => MatchOptions::IGNORE_CASE, - Case::Sensitive => MatchOptions::empty(), + Case::Fold => wildmatch::Mode::IGNORE_CASE, + Case::Sensitive => wildmatch::Mode::empty(), }; let path = path.into(); debug_assert_eq!( @@ -121,12 +112,12 @@ impl Pattern { } } - fn matches(&self, value: &BStr, options: MatchOptions) -> bool { + fn matches(&self, value: &BStr, mode: wildmatch::Mode) -> bool { match self.first_wildcard_pos { // "*literal" case, overrides starts-with Some(pos) if self.mode.contains(pattern::Mode::ENDS_WITH) && !value.contains(&b'/') => { let text = &self.text[pos + 1..]; - if options.contains(MatchOptions::IGNORE_CASE) { + if mode.contains(wildmatch::Mode::IGNORE_CASE) { value .len() .checked_sub(text.len()) @@ -137,7 +128,7 @@ impl Pattern { } } Some(pos) => { - if options.contains(MatchOptions::IGNORE_CASE) { + if mode.contains(wildmatch::Mode::IGNORE_CASE) { if !value .get(..pos) .map_or(false, |value| value.eq_ignore_ascii_case(&self.text[..pos])) @@ -147,10 +138,10 @@ impl Pattern { } else if !value.starts_with(&self.text[..pos]) { return false; } - todo!("actual wildcard match for '{}' ~= '{}'", self.text, value) + crate::wildmatch(self.text.as_bstr(), value, mode) } None => { - if options.contains(MatchOptions::IGNORE_CASE) { + if mode.contains(wildmatch::Mode::IGNORE_CASE) { self.text.eq_ignore_ascii_case(value) } else { self.text == value diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs new file mode 100644 index 00000000000..eaed1663c4f --- /dev/null +++ b/git-glob/src/wildmatch.rs @@ -0,0 +1,19 @@ +use bitflags::bitflags; +bitflags! { + #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] + pub struct Mode: u8 { + /// Let globs not match the slash `/` literal. + const SLASH_IS_LITERAL = 1 << 0; + /// Match case insensitively for ascii characters only. + const IGNORE_CASE = 1 << 1; + } +} + +pub(crate) mod function { + use crate::wildmatch::Mode; + use bstr::BStr; + + pub fn wildmatch(pattern: &BStr, value: &BStr, _mode: Mode) -> bool { + todo!("actual wildcard match for '{}' ~= '{}'", pattern, value) + } +} diff --git a/git-glob/tests/glob.rs b/git-glob/tests/glob.rs index f87044113a7..c7452ed5f32 100644 --- a/git-glob/tests/glob.rs +++ b/git-glob/tests/glob.rs @@ -1,2 +1,3 @@ mod matching; mod parse; +mod wildmatch; diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs new file mode 100644 index 00000000000..e69de29bb2d From 50ff7aa7fa86e5e2a94fb15aab86470532ac3f51 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 13:39:58 +0800 Subject: [PATCH 31/56] make fmt (#301) --- git-actor/src/signature/mod.rs | 3 +- git-attributes/tests/parse/ignore.rs | 3 +- git-config/src/values.rs | 2 +- .../git_config_integration_tests.rs | 58 ++++++++++--------- git-odb/tests/odb/store/compound.rs | 3 +- git-odb/tests/odb/store/dynamic.rs | 2 +- git-odb/tests/odb/store/linked.rs | 3 +- git-ref/tests/fullname/mod.rs | 3 +- git-repository/src/commit.rs | 8 +-- git-repository/src/config.rs | 13 +++-- git-repository/src/object/commit.rs | 4 +- git-repository/src/object/tag.rs | 3 +- git-revision/src/describe.rs | 12 ++-- gitoxide-core/src/repository/commit.rs | 3 +- src/plumbing/main.rs | 1 - 15 files changed, 64 insertions(+), 57 deletions(-) diff --git a/git-actor/src/signature/mod.rs b/git-actor/src/signature/mod.rs index 5295c4dca6c..b20083a9b25 100644 --- a/git-actor/src/signature/mod.rs +++ b/git-actor/src/signature/mod.rs @@ -1,7 +1,8 @@ mod _ref { - use crate::{signature::decode, Signature, SignatureRef}; use bstr::ByteSlice; + use crate::{signature::decode, Signature, SignatureRef}; + impl<'a> SignatureRef<'a> { /// Deserialize a signature from the given `data`. pub fn from_bytes(data: &'a [u8]) -> Result, nom::Err> diff --git a/git-attributes/tests/parse/ignore.rs b/git-attributes/tests/parse/ignore.rs index b02db516697..f3c94e059ac 100644 --- a/git-attributes/tests/parse/ignore.rs +++ b/git-attributes/tests/parse/ignore.rs @@ -1,6 +1,5 @@ use bstr::BString; -use git_glob::pattern::Mode; -use git_glob::Pattern; +use git_glob::{pattern::Mode, Pattern}; use git_testtools::fixture_bytes; #[test] diff --git a/git-config/src/values.rs b/git-config/src/values.rs index 29411c59adf..871a444e3f2 100644 --- a/git-config/src/values.rs +++ b/git-config/src/values.rs @@ -1,8 +1,8 @@ //! Rust containers for valid `git-config` types. -use bstr::BStr; use std::{borrow::Cow, convert::TryFrom, fmt::Display, str::FromStr}; +use bstr::BStr; use quick_error::quick_error; #[cfg(feature = "serde")] use serde::{Serialize, Serializer}; diff --git a/git-config/tests/integration_tests/git_config_integration_tests.rs b/git-config/tests/integration_tests/git_config_integration_tests.rs index ce581d6de4c..904df0f6296 100644 --- a/git-config/tests/integration_tests/git_config_integration_tests.rs +++ b/git-config/tests/integration_tests/git_config_integration_tests.rs @@ -1,8 +1,9 @@ #[cfg(test)] mod mutable_value { - use git_config::file::GitConfig; use std::convert::TryFrom; + use git_config::file::GitConfig; + fn init_config() -> GitConfig<'static> { GitConfig::try_from( r#"[core] @@ -137,9 +138,10 @@ b #[cfg(test)] mod mutable_multi_value { - use git_config::file::GitConfig; use std::{borrow::Cow, convert::TryFrom}; + use git_config::file::GitConfig; + fn init_config() -> GitConfig<'static> { GitConfig::try_from( r#"[core] @@ -277,13 +279,12 @@ a"#, #[cfg(test)] mod from_paths_tests { - use std::borrow::Cow; - use std::path::Path; - use std::{fs, io}; + use std::{borrow::Cow, fs, io, path::Path}; - use git_config::file::from_paths::Error; - use git_config::file::{from_paths, GitConfig}; - use git_config::parser::ParserOrIoError; + use git_config::{ + file::{from_paths, from_paths::Error, GitConfig}, + parser::ParserOrIoError, + }; use tempfile::tempdir; /// Escapes backslash when writing a path as string so that it is a valid windows path @@ -749,12 +750,9 @@ mod from_paths_tests { #[cfg(test)] mod from_env_tests { - use std::borrow::Cow; - use std::{env, fs}; + use std::{borrow::Cow, env, fs}; - use git_config::file::from_paths::Options; - use git_config::file::GitConfig; - use git_config::file::{from_env, from_paths}; + use git_config::file::{from_env, from_paths, from_paths::Options, GitConfig}; use serial_test::serial; use tempfile::tempdir; @@ -892,10 +890,12 @@ mod from_env_tests { #[cfg(test)] mod get_raw_value { - use git_config::file::{GitConfig, GitConfigError}; - use git_config::parser::SectionHeaderName; - use std::borrow::Cow; - use std::convert::TryFrom; + use std::{borrow::Cow, convert::TryFrom}; + + use git_config::{ + file::{GitConfig, GitConfigError}, + parser::SectionHeaderName, + }; #[test] fn single_section() { @@ -956,11 +956,12 @@ mod get_raw_value { #[cfg(test)] mod get_value { - use git_config::file::GitConfig; - use git_config::values::{Boolean, Bytes, TrueVariant}; - use std::borrow::Cow; - use std::convert::TryFrom; - use std::error::Error; + use std::{borrow::Cow, convert::TryFrom, error::Error}; + + use git_config::{ + file::GitConfig, + values::{Boolean, Bytes, TrueVariant}, + }; #[test] fn single_section() -> Result<(), Box> { @@ -1005,10 +1006,12 @@ mod get_value { #[cfg(test)] mod get_raw_multi_value { - use git_config::file::{GitConfig, GitConfigError}; - use git_config::parser::SectionHeaderName; - use std::borrow::Cow; - use std::convert::TryFrom; + use std::{borrow::Cow, convert::TryFrom}; + + use git_config::{ + file::{GitConfig, GitConfigError}, + parser::SectionHeaderName, + }; #[test] fn single_value_is_identical_to_single_value_query() { @@ -1089,9 +1092,10 @@ mod get_raw_multi_value { #[cfg(test)] mod display { - use git_config::file::GitConfig; use std::convert::TryFrom; + use git_config::file::GitConfig; + #[test] fn can_reconstruct_empty_config() { let config = r#" diff --git a/git-odb/tests/odb/store/compound.rs b/git-odb/tests/odb/store/compound.rs index e8bad044cfe..f5a5711f7b5 100644 --- a/git-odb/tests/odb/store/compound.rs +++ b/git-odb/tests/odb/store/compound.rs @@ -9,9 +9,10 @@ fn db() -> git_odb::Handle { } mod locate { - use crate::{hex_to_id, odb::store::compound::db}; use git_odb::Find; + use crate::{hex_to_id, odb::store::compound::db}; + fn can_locate(db: &git_odb::Handle, hex_id: &str) { let mut buf = vec![]; assert!(db diff --git a/git-odb/tests/odb/store/dynamic.rs b/git-odb/tests/odb/store/dynamic.rs index 1e760be9e8c..6d6b6df9a27 100644 --- a/git-odb/tests/odb/store/dynamic.rs +++ b/git-odb/tests/odb/store/dynamic.rs @@ -1,6 +1,6 @@ -use git_hash::ObjectId; use std::process::Command; +use git_hash::ObjectId; use git_odb::{store, Find, FindExt, Write}; use git_testtools::{fixture_path, hex_to_id}; diff --git a/git-odb/tests/odb/store/linked.rs b/git-odb/tests/odb/store/linked.rs index bc9d680483c..4171ad12d33 100644 --- a/git-odb/tests/odb/store/linked.rs +++ b/git-odb/tests/odb/store/linked.rs @@ -57,10 +57,11 @@ mod locate { } mod init { - use crate::odb::{alternate::alternate, store::linked::db}; use git_hash::ObjectId; use git_odb::Find; + use crate::odb::{alternate::alternate, store::linked::db}; + #[test] fn multiple_linked_repositories_via_alternates() -> crate::Result { let tmp = git_testtools::tempfile::TempDir::new()?; diff --git a/git-ref/tests/fullname/mod.rs b/git-ref/tests/fullname/mod.rs index fa09ebc6c6d..01fb8129a1e 100644 --- a/git-ref/tests/fullname/mod.rs +++ b/git-ref/tests/fullname/mod.rs @@ -1,6 +1,7 @@ -use git_ref::Category; use std::convert::TryInto; +use git_ref::Category; + #[test] fn file_name() { let name: git_ref::FullName = "refs/heads/main".try_into().unwrap(); diff --git a/git-repository/src/commit.rs b/git-repository/src/commit.rs index c1f18165f35..d3175990552 100644 --- a/git-repository/src/commit.rs +++ b/git-repository/src/commit.rs @@ -17,12 +17,12 @@ pub enum Error { /// pub mod describe { - use crate::bstr::BStr; - use crate::ext::ObjectIdExt; - use crate::Repository; + use std::borrow::Cow; + use git_hash::ObjectId; use git_odb::Find; - use std::borrow::Cow; + + use crate::{bstr::BStr, ext::ObjectIdExt, Repository}; /// The result of [try_resolve()][Platform::try_resolve()]. pub struct Resolution<'repo> { diff --git a/git-repository/src/config.rs b/git-repository/src/config.rs index f9f27b4949c..1d747ca6d32 100644 --- a/git-repository/src/config.rs +++ b/git-repository/src/config.rs @@ -30,13 +30,16 @@ pub(crate) struct Cache { } mod cache { + use std::{borrow::Cow, convert::TryFrom}; + + use git_config::{ + file::GitConfig, + values, + values::{Boolean, Integer}, + }; + use super::{Cache, Error}; use crate::bstr::ByteSlice; - use git_config::file::GitConfig; - use git_config::values; - use git_config::values::{Boolean, Integer}; - use std::borrow::Cow; - use std::convert::TryFrom; impl Cache { pub fn new(git_dir: &std::path::Path) -> Result { diff --git a/git-repository/src/object/commit.rs b/git-repository/src/object/commit.rs index 434cc6e1399..b4b56138fa7 100644 --- a/git-repository/src/object/commit.rs +++ b/git-repository/src/object/commit.rs @@ -18,10 +18,10 @@ mod error { } } -use crate::id::Ancestors; - pub use error::Error; +use crate::id::Ancestors; + impl<'repo> Commit<'repo> { /// Create an owned instance of this object, copying our data in the process. pub fn detached(&self) -> DetachedObject { diff --git a/git-repository/src/object/tag.rs b/git-repository/src/object/tag.rs index 419e88d2ce2..f7532aa1f27 100644 --- a/git-repository/src/object/tag.rs +++ b/git-repository/src/object/tag.rs @@ -1,5 +1,4 @@ -use crate::ext::ObjectIdExt; -use crate::Tag; +use crate::{ext::ObjectIdExt, Tag}; impl<'repo> Tag<'repo> { /// Decode this tag and return the id of its target. diff --git a/git-revision/src/describe.rs b/git-revision/src/describe.rs index eda3c66c778..6c355cc23c0 100644 --- a/git-revision/src/describe.rs +++ b/git-revision/src/describe.rs @@ -143,21 +143,19 @@ where } pub(crate) mod function { - use super::Error; - use hash_hasher::HashBuildHasher; - use std::cmp::Ordering; - use std::collections::HashMap; use std::{ borrow::Cow, - collections::{hash_map, VecDeque}, + cmp::Ordering, + collections::{hash_map, HashMap, VecDeque}, iter::FromIterator, }; - use crate::describe::{Flags, Options, MAX_CANDIDATES}; use git_hash::oid; use git_object::{bstr::BStr, CommitRefIter}; + use hash_hasher::HashBuildHasher; - use super::Outcome; + use super::{Error, Outcome}; + use crate::describe::{Flags, Options, MAX_CANDIDATES}; /// Given a `commit` id, traverse the commit graph and collect candidate names from the `name_by_oid` mapping to produce /// an `Outcome`, which converted [`into_format()`][Outcome::into_format()] will produce a typical `git describe` string. diff --git a/gitoxide-core/src/repository/commit.rs b/gitoxide-core/src/repository/commit.rs index f8dc75e181d..1e449121361 100644 --- a/gitoxide-core/src/repository/commit.rs +++ b/gitoxide-core/src/repository/commit.rs @@ -1,6 +1,7 @@ +use std::path::PathBuf; + use anyhow::{Context, Result}; use git_repository as git; -use std::path::PathBuf; pub fn describe( repo: impl Into, diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index 00afd0a50e4..f8b391a56b4 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -9,7 +9,6 @@ use std::{ use anyhow::Result; use clap::Parser; - use gitoxide_core as core; use gitoxide_core::pack::verify; From bd8f95f757e45b3cf8523d3e11503f4571461abf Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 13:40:06 +0800 Subject: [PATCH 32/56] test corpus for wildcard matches (#301) --- git-glob/src/parse.rs | 4 +- git-glob/src/pattern.rs | 6 +- git-glob/src/wildmatch.rs | 3 +- git-glob/tests/matching/mod.rs | 6 +- git-glob/tests/wildmatch/mod.rs | 303 ++++++++++++++++++++++++++++++++ 5 files changed, 314 insertions(+), 8 deletions(-) diff --git a/git-glob/src/parse.rs b/git-glob/src/parse.rs index 150e9c4e748..e705421a052 100644 --- a/git-glob/src/parse.rs +++ b/git-glob/src/parse.rs @@ -1,7 +1,7 @@ -use crate::pattern; -use crate::pattern::Mode; use bstr::{BString, ByteSlice}; +use crate::{pattern, pattern::Mode}; + #[inline] /// A sloppy parser that performs only the most basic checks, providing additional information /// using `pattern::Mode` flags. diff --git a/git-glob/src/pattern.rs b/git-glob/src/pattern.rs index 3da7b62afe8..2935f88eeca 100644 --- a/git-glob/src/pattern.rs +++ b/git-glob/src/pattern.rs @@ -1,7 +1,8 @@ -use crate::{pattern, wildmatch, Pattern}; use bitflags::bitflags; use bstr::{BStr, BString, ByteSlice}; +use crate::{pattern, wildmatch, Pattern}; + bitflags! { #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct Mode: u32 { @@ -112,7 +113,8 @@ impl Pattern { } } - fn matches(&self, value: &BStr, mode: wildmatch::Mode) -> bool { + pub fn matches<'a>(&self, value: impl Into<&'a BStr>, mode: wildmatch::Mode) -> bool { + let value = value.into(); match self.first_wildcard_pos { // "*literal" case, overrides starts-with Some(pos) if self.mode.contains(pattern::Mode::ENDS_WITH) && !value.contains(&b'/') => { diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index eaed1663c4f..0955571187d 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -10,9 +10,10 @@ bitflags! { } pub(crate) mod function { - use crate::wildmatch::Mode; use bstr::BStr; + use crate::wildmatch::Mode; + pub fn wildmatch(pattern: &BStr, value: &BStr, _mode: Mode) -> bool { todo!("actual wildcard match for '{}' ~= '{}'", pattern, value) } diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index fe705e9c0c3..a12df1d4c5d 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -1,8 +1,8 @@ -use bstr::{BStr, ByteSlice}; -use git_glob::pattern; -use git_glob::pattern::Case; use std::collections::BTreeSet; +use bstr::{BStr, ByteSlice}; +use git_glob::{pattern, pattern::Case}; + #[derive(Debug, Ord, PartialOrd, Eq, PartialEq, Copy, Clone)] pub struct GitMatch<'a> { pattern: &'a BStr, diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index e69de29bb2d..526800dc23c 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -0,0 +1,303 @@ +use git_glob::pattern::Case; +use git_glob::wildmatch; +use std::fmt::{Debug, Formatter}; +use std::panic::catch_unwind; + +#[test] +#[ignore] +fn corpus() { + // based on git/t/t3070.sh + let tests = [ + (1u8,1u8,1u8,1u8, "foo", "foo"), + (0,0,0,0, "foo", "bar"), + (1,1,1,1, "foo", "???"), + (0,0,0,0, "foo", "??"), + (1,1,1,1, "foo", "*"), + (1,1,1,1, "foo", "f*"), + (0,0,0,0, "foo", "*f"), + (1,1,1,1, "foo", "*foo*"), + (1,1,1,1, "foobar", "*ob*a*r*"), + (1,1,1,1, "aaaaaaabababab", "*ab"), + (1,1,1,1, "foo*", r"foo\*"), + (0,0,0,0, "foobar", r"foo\*bar"), + (1,1,1,1, r"f\oo", r"f\\oo"), + (1,1,1,1, "ball", "*[al]?"), + (0,0,0,0, "ten", "[ten]"), + (1,1,1,1, "ten", "**[!te]"), + (0,0,0,0, "ten", "**[!ten]"), + (1,1,1,1, "ten", "t[a-g]n"), + (0,0,0,0, "ten", "t[!a-g]n"), + (1,1,1,1, "ton", "t[!a-g]n"), + (1,1,1,1, "ton", "t[^a-g]n"), + (1,1,1,1, "a]b", "a[]]b"), + (1,1,1,1, "a-b", "a[]-]b"), + (1,1,1,1, "a]b", "a[]-]b"), + (0,0,0,0, "aab", "a[]-]b"), + (1,1,1,1, "aab", "a[]a-]b"), + (1,1,1,1, "]", "]"), + // Extended slash-matching features + (0,0,1,1, "foo/baz/bar", "foo*bar"), + (0,0,1,1, "foo/baz/bar", "foo**bar"), + (1,1,1,1, "foobazbar", "foo**bar"), + (1,1,1,1, "foo/baz/bar", "foo/**/bar"), + (1,1,0,0, "foo/baz/bar", "foo/**/**/bar"), + (1,1,1,1, "foo/b/a/z/bar", "foo/**/bar"), + (1,1,1,1, "foo/b/a/z/bar", "foo/**/**/bar"), + (1,1,0,0, "foo/bar", "foo/**/bar"), + (1,1,0,0, "foo/bar", "foo/**/**/bar"), + (0,0,1,1, "foo/bar", "foo?bar"), + (0,0,1,1, "foo/bar", "foo[/]bar"), + (0,0,1,1, "foo/bar", "foo[^a-z]bar"), + (0,0,1,1, "foo/bar", "f[^eiu][^eiu][^eiu][^eiu][^eiu]r"), + (1,1,1,1, "foo-bar", "f[^eiu][^eiu][^eiu][^eiu][^eiu]r"), + (1,1,0,0, "foo", "**/foo"), + (1,1,1,1, "XXX/foo", "**/foo"), + (1,1,1,1, "bar/baz/foo", "**/foo"), + (0,0,1,1, "bar/baz/foo", "*/foo"), + (0,0,1,1, "foo/bar/baz", "**/bar*"), + (1,1,1,1, "deep/foo/bar/baz", "**/bar/*"), + (0,0,1,1, "deep/foo/bar/baz/", "**/bar/*"), + (1,1,1,1, "deep/foo/bar/baz/", "**/bar/**"), + (0,0,0,0, "deep/foo/bar", "**/bar/*"), + (1,1,1,1, "deep/foo/bar/", "**/bar/**"), + (0,0,1,1, "foo/bar/baz", "**/bar**"), + (1,1,1,1, "foo/bar/baz/x", "*/bar/**"), + (0,0,1,1, "deep/foo/bar/baz/x", "*/bar/**"), + (1,1,1,1, "deep/foo/bar/baz/x", "**/bar/*/*"), + + // Various additional tests + (0,0,0,0, "acrt", "a[c-c]st"), + (1,1,1,1, "acrt", "a[c-c]rt"), + (0,0,0,0, "]", "[!]-]"), + (1,1,1,1, "a", "[!]-]"), + (0,0,0,0, "", r"\"), + (0,0,0,0, r"XXX/\", r"*/\"), + (1,1,1,1, r"XXX/\", r"*/\\"), + (1,1,1,1, "foo", "foo"), + (1,1,1,1, "@foo", "@foo"), + (0,0,0,0, "foo", "@foo"), + (1,1,1,1, "[ab]", r"\[ab]"), + (1,1,1,1, "[ab]", "[[]ab]"), + (1,1,1,1, "[ab]", "[[:]ab]"), + (0,0,0,0, "[ab]", "[[::]ab]"), + (1,1,1,1, "[ab]", "[[:digit]ab]"), + (1,1,1,1, "[ab]", r"[\[:]ab]"), + (1,1,1,1, "?a?b", r"\??\?b"), + (1,1,1,1, "abc", r"\a\b\c"), + (1,1,1,1, "foo/bar/baz/to", "**/t[o]"), + + // Character class tests + (1,1,1,1, "a1B", "[[:alpha:]][[:digit:]][[:upper:]]"), + (0,1,0,1, "a", "[[:digit:][:upper:][:space:]]"), + (1,1,1,1, "A", "[[:digit:][:upper:][:space:]]"), + (1,1,1,1, "1", "[[:digit:][:upper:][:space:]]"), + (0,0,0,0, "1", "[[:digit:][:upper:][:spaci:]]"), + (1,1,1,1, " ", "[[:digit:][:upper:][:space:]]"), + (0,0,0,0, ".", "[[:digit:][:upper:][:space:]]"), + (1,1,1,1, ".", "[[:digit:][:punct:][:space:]]"), + (1,1,1,1, "5", "[[:xdigit:]]"), + (1,1,1,1, "f", "[[:xdigit:]]"), + (1,1,1,1, "D", "[[:xdigit:]]"), + (1,1,1,1, "_", "[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:xdigit:]]"), + (1,1,1,1, ".", "[^[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:lower:][:space:][:upper:][:xdigit:]]"), + (1,1,1,1, "5", "[a-c[:digit:]x-z]"), + (1,1,1,1, "b", "[a-c[:digit:]x-z]"), + (1,1,1,1, "y", "[a-c[:digit:]x-z]"), + (0,0,0,0, "q", "[a-c[:digit:]x-z]"), + + // Additional tests, including some malformed wild(patterns + (1,1,1,1, "]", "[\\-^]"), + (0,0,0,0, "[", "[\\-^]"), + (1,1,1,1, "-", r"[\-_]"), + (1,1,1,1, "]", r"[\]]"), + (0,0,0,0, r"\]", r"[\]]"), + (0,0,0,0, r"\", r"[\]]"), + (0,0,0,0, "ab", "a[]b"), + (0,0,0,0, "ab", "[!"), + (0,0,0,0, "ab", "[-"), + (1,1,1,1, "-", "[-]"), + (0,0,0,0, "-", "[a-"), + (0,0,0,0, "-", "[!a-"), + (1,1,1,1, "-", "[--A]"), + (1,1,1,1, "5", "[--A]"), + (1,1,1,1, " ", "[ --]"), + (1,1,1,1, "$", "[ --]"), + (1,1,1,1, "-", "[ --]"), + (0,0,0,0, "0", "[ --]"), + (1,1,1,1, "-", "[---]"), + (1,1,1,1, "-", "[------]"), + (0,0,0,0, "j", "[a-e-n]"), + (1,1,1,1, "-", "[a-e-n]"), + (1,1,1,1, "a", "[!------]"), + (0,0,0,0, "[", "[]-a]"), + (1,1,1,1, "^", "[]-a]"), + (0,0,0,0, "^", "[!]-a]"), + (1,1,1,1, "[", "[!]-a]"), + (1,1,1,1, "^", "[a^bc]"), + (1,1,1,1, "-b]", "[a-]b]"), + (0,0,0,0, r"\", r"[\]"), + (1,1,1,1, r"\", r"[\\]"), + (0,0,0,0, r"\", r"[!\\]"), + (1,1,1,1, "G", r"[A-\\]"), + (0,0,0,0, "aaabbb", "b*a"), + (0,0,0,0, "aabcaa", "*ba*"), + (1,1,1,1, ",", "[,]"), + (1,1,1,1, ",", r"[\\,]"), + (1,1,1,1, r"\", r"[\\,]"), + (1,1,1,1, "-", "[,-.]"), + (0,0,0,0, "+", "[,-.]"), + (0,0,0,0, "-.]", "[,-.]"), + (1,1,1,1, "2", r"[\1-\3]"), + (1,1,1,1, "3", r"[\1-\3]"), + (0,0,0,0, "4", r"[\1-\3]"), + (1,1,1,1, r"\", r"[[-\]]"), + (1,1,1,1, "[", r"[[-\]]"), + (1,1,1,1, "]", r"[[-\]]"), + (0,0,0,0, "-", r"[[-\]]"), + + // Test recursion + (1,1,1,1, "-adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1", "-*-*-*-*-*-*-12-*-*-*-m-*-*-*"), + (0,0,0,0, "-adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1", "-*-*-*-*-*-*-12-*-*-*-m-*-*-*"), + (0,0,0,0, "-adobe-courier-bold-o-normal--12-120-75-75-/-70-iso8859-1", "-*-*-*-*-*-*-12-*-*-*-m-*-*-*"), + (1,1,1,1, "XXX/adobe/courier/bold/o/normal//12/120/75/75/m/70/iso8859/1", "XXX/*/*/*/*/*/*/12/*/*/*/m/*/*/*"), + (0,0,0,0, "XXX/adobe/courier/bold/o/normal//12/120/75/75/X/70/iso8859/1", "XXX/*/*/*/*/*/*/12/*/*/*/m/*/*/*"), + (1,1,1,1, "abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txt", "**/*a*b*g*n*t"), + (0,0,0,0, "abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txtz", "**/*a*b*g*n*t"), + (0,0,0,0, "foo", "*/*/*"), + (0,0,0,0, "foo/bar", "*/*/*"), + (1,1,1,1, "foo/bba/arr", "*/*/*"), + (0,0,1,1, "foo/bb/aa/rr", "*/*/*"), + (1,1,1,1, "foo/bb/aa/rr", "**/**/**"), + (1,1,1,1, "abcXdefXghi", "*X*i"), + (0,0,1,1, "ab/cXd/efXg/hi", "*X*i"), + (1,1,1,1, "ab/cXd/efXg/hi", "*/*X*/*/*i"), + (1,1,1,1, "ab/cXd/efXg/hi", "**/*X*/**/*i"), + + // Extra path(tests + (0,0,0,0, "foo", "fo"), + (1,1,1,1,"foo/bar", "foo/bar"), + (1,1,1,1, "foo/bar", "foo/*"), + (0,0,1,1, "foo/bba/arr", "foo/*"), + (1,1,1,1, "foo/bba/arr", "foo/**"), + (0,0,1,1, "foo/bba/arr", "foo*"), + (0,0,1,1, "foo/bba/arr", "foo/*arr"), + (0,0,1,1, "foo/bba/arr", "foo/**arr"), + (0,0,0,0, "foo/bba/arr", "foo/*z"), + (0,0,0,0, "foo/bba/arr", "foo/**z"), + (0,0,1,1, "foo/bar", "foo?bar"), + (0,0,1,1, "foo/bar", "foo[/]bar"), + (0,0,1,1, "foo/bar", "foo[^a-z]bar"), + (0,0,1,1, "ab/cXd/efXg/hi", "*Xg*i"), + + // Extra case-sensitivity tests + (0,1,0,1, "a", "[A-Z]"), + (1,1,1,1, "A", "[A-Z]"), + (0,1,0,1, "A", "[a-z]"), + (1,1,1,1, "a", "[a-z]"), + (0,1,0,1, "a", "[[:upper:]]"), + (1,1,1,1, "A", "[[:upper:]]"), + (0,1,0,1, "A", "[[:lower:]]"), + (1,1,1,1, "a", "[[:lower:]]"), + (0,1,0,1, "A", "[B-Za]"), + (1,1,1,1, "a", "[B-Za]"), + (0,1,0,1, "A", "[B-a]"), + (1,1,1,1, "a", "[B-a]"), + (0,1,0,1, "z", "[Z-y]"), + (1,1,1,1, "Z", "[Z-y]"), + ]; + + let mut failures = Vec::new(); + let mut panics = 0; + for (glob_match, glob_imatch, path_match, path_imatch, text, pattern_text) in tests { + let pattern = git_glob::Pattern::from_bytes(pattern_text.as_bytes()).expect("valid (enough) pattern"); + let actual_path_match: MatchResult = catch_unwind(|| match_file_path(&pattern, text, Case::Sensitive)).into(); + let actual_path_imatch: MatchResult = catch_unwind(|| match_file_path(&pattern, text, Case::Fold)).into(); + let actual_glob_match: MatchResult = catch_unwind(|| pattern.matches(text, wildmatch::Mode::empty())).into(); + let actual_glob_imatch: MatchResult = + catch_unwind(|| pattern.matches(text, wildmatch::Mode::IGNORE_CASE)).into(); + + let expected: (MatchResult, MatchResult, MatchResult, MatchResult) = ( + glob_match.into(), + glob_imatch.into(), + path_match.into(), + path_imatch.into(), + ); + let actual = ( + actual_glob_match, + actual_glob_imatch, + actual_path_match, + actual_path_imatch, + ); + if actual != expected { + if let (MatchResult::Panic, MatchResult::Panic, MatchResult::Panic, MatchResult::Panic) = actual { + panics += 1; + } else { + failures.push((pattern_text, text, actual, expected)); + } + } + } + + dbg!(&failures); + dbg!(panics); + dbg!(tests.len() - panics); + assert_eq!(failures.len(), 0); + + // TODO: reproduce these + // (0 0 0 0 \ + // 1 1 1 1 '\' '\' + // (0 0 0 0 \ + // E E E E 'foo' '' + // (0 0 0 0 \ + // 1 1 1 1 'a[]b' 'a[]b' + // (0 0 0 0 \ + // 1 1 1 1 'ab[' 'ab[' + // (0 0 1 1 \ + // 1 1 1 1 foo/bba/arr 'foo**' +} + +#[derive(Eq, PartialEq)] +enum MatchResult { + Match, + NoMatch, + Panic, +} + +impl From> for MatchResult { + fn from(v: std::thread::Result) -> Self { + use MatchResult::*; + match v { + Ok(v) if v => Match, + Ok(_) => NoMatch, + Err(_) => Panic, + } + } +} + +impl From for MatchResult { + fn from(v: u8) -> Self { + use MatchResult::*; + match v { + 1 => Match, + 0 => NoMatch, + _ => unreachable!("BUG: only use 0 or 1 for expected values"), + } + } +} + +impl Debug for MatchResult { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + use MatchResult::*; + f.write_str(match self { + Match => "=", + NoMatch => "≠", + Panic => "E", + }) + } +} + +fn match_file_path(pattern: &git_glob::Pattern, path: &str, case: Case) -> bool { + pattern.matches_repo_relative_path(path, basename_of(path), false /* is_dir */, case) +} +fn basename_of(path: &str) -> Option { + path.rfind('/').map(|pos| pos + 1) +} From 1336bc938cc43e3a2f9e47af64f2c9933c9fc961 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 15:54:08 +0800 Subject: [PATCH 33/56] fix logic in wildmatch tests; validate feasibility of all test cases (#301) --- git-glob/tests/fixtures/make_baseline.sh | 3 ++ git-glob/tests/matching/mod.rs | 5 ++ git-glob/tests/wildmatch/mod.rs | 58 +++++++++++++++++------- 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/git-glob/tests/fixtures/make_baseline.sh b/git-glob/tests/fixtures/make_baseline.sh index 946ebf0bbe4..f33fb7082c2 100644 --- a/git-glob/tests/fixtures/make_baseline.sh +++ b/git-glob/tests/fixtures/make_baseline.sh @@ -12,6 +12,9 @@ while read -r pattern nomatch; do echo "$pattern" > .gitignore git check-ignore -vn "$nomatch" 2>&1 || : done <>git-baseline.nmatch +*/\ XXX/\ +*/\\ XXX/\ +foo*bar foo/baz/bar /*foo.txt hello/foo.txt bar/foo baz/bar/foo *hello.txt hello.txt-and-then-some diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index a12df1d4c5d..e8b9e4ba2b2 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -233,6 +233,11 @@ fn basename_glob_and_literal_is_ends_with() { #[test] #[ignore] fn special_cases_from_corpus() { + let pattern = &pat("foo*bar"); + assert!( + !match_file(pattern, "foo/baz/bar", Case::Sensitive), + "asterisk does not match path separators" + ); let pattern = &pat("*some/path/to/hello.txt"); assert!( !match_file(pattern, "a/bigger/some/path/to/hello.txt", Case::Sensitive), diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index 526800dc23c..f0232c4f5fe 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -72,7 +72,7 @@ fn corpus() { (1,1,1,1, "a", "[!]-]"), (0,0,0,0, "", r"\"), (0,0,0,0, r"XXX/\", r"*/\"), - (1,1,1,1, r"XXX/\", r"*/\\"), + (0,0,0,0, r"XXX/\", r"*/\\"), (1,1,1,1, "foo", "foo"), (1,1,1,1, "@foo", "@foo"), (0,0,0,0, "foo", "@foo"), @@ -207,8 +207,9 @@ fn corpus() { ]; let mut failures = Vec::new(); - let mut panics = 0; - for (glob_match, glob_imatch, path_match, path_imatch, text, pattern_text) in tests { + let mut all_panic = 0; + let mut at_least_one_panic = 0; + for (path_match, path_imatch, glob_match, glob_imatch, text, pattern_text) in tests { let pattern = git_glob::Pattern::from_bytes(pattern_text.as_bytes()).expect("valid (enough) pattern"); let actual_path_match: MatchResult = catch_unwind(|| match_file_path(&pattern, text, Case::Sensitive)).into(); let actual_path_imatch: MatchResult = catch_unwind(|| match_file_path(&pattern, text, Case::Fold)).into(); @@ -217,30 +218,43 @@ fn corpus() { catch_unwind(|| pattern.matches(text, wildmatch::Mode::IGNORE_CASE)).into(); let expected: (MatchResult, MatchResult, MatchResult, MatchResult) = ( - glob_match.into(), - glob_imatch.into(), path_match.into(), path_imatch.into(), + glob_match.into(), + glob_imatch.into(), ); let actual = ( - actual_glob_match, - actual_glob_imatch, actual_path_match, actual_path_imatch, + actual_glob_match, + actual_glob_imatch, ); - if actual != expected { - if let (MatchResult::Panic, MatchResult::Panic, MatchResult::Panic, MatchResult::Panic) = actual { - panics += 1; + + use MatchResult::Panic; + if let (Panic, Panic, Panic, Panic) = actual { + all_panic += 1; + at_least_one_panic += 1; + } else { + if actual != expected { + failures.push((pattern, pattern_text, text, actual, expected)); } else { - failures.push((pattern_text, text, actual, expected)); + at_least_one_panic += match actual { + (Panic, _, _, _) => 1, + (_, Panic, _, _) => 1, + (_, _, Panic, _) => 1, + (_, _, _, Panic) => 1, + _ => 0, + } } } } dbg!(&failures); - dbg!(panics); - dbg!(tests.len() - panics); + dbg!(all_panic, at_least_one_panic); + dbg!(tests.len() - at_least_one_panic); + dbg!(failures.len()); assert_eq!(failures.len(), 0); + assert_eq!(at_least_one_panic, 0, "not a single panic in any invocation"); // TODO: reproduce these // (0 0 0 0 \ @@ -255,13 +269,25 @@ fn corpus() { // 1 1 1 1 foo/bba/arr 'foo**' } -#[derive(Eq, PartialEq)] enum MatchResult { Match, NoMatch, Panic, } +impl PartialEq for MatchResult { + fn eq(&self, other: &Self) -> bool { + use MatchResult::*; + match (self, other) { + (Panic, _) | (_, Panic) => true, + (Match, NoMatch) | (NoMatch, Match) => false, + (Match, Match) | (NoMatch, NoMatch) => true, + } + } +} + +impl std::cmp::Eq for MatchResult {} + impl From> for MatchResult { fn from(v: std::thread::Result) -> Self { use MatchResult::*; @@ -288,8 +314,8 @@ impl Debug for MatchResult { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { use MatchResult::*; f.write_str(match self { - Match => "=", - NoMatch => "≠", + Match => "✔️", + NoMatch => "⨯", Panic => "E", }) } From 334c62459dbb6763a46647a64129f89e27b5781b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 16:37:41 +0800 Subject: [PATCH 34/56] very basic beginnings of wildmatch (#301) --- git-glob/src/wildmatch.rs | 52 +++++++++++++++++++++++- git-glob/tests/fixtures/make_baseline.sh | 2 + git-glob/tests/matching/mod.rs | 6 +-- 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index 0955571187d..453abcad266 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -14,7 +14,55 @@ pub(crate) mod function { use crate::wildmatch::Mode; - pub fn wildmatch(pattern: &BStr, value: &BStr, _mode: Mode) -> bool { - todo!("actual wildcard match for '{}' ~= '{}'", pattern, value) + #[derive(Eq, PartialEq)] + enum Result { + Match, + NoMatch, + AbortAll, + // AbortToStarStar, + } + + const STAR: u8 = b'*'; + + fn match_recursive(pattern: &BStr, text: &BStr, mode: Mode) -> Result { + use self::Result::*; + let possibly_lowercase = |c: &u8| { + if mode.contains(Mode::IGNORE_CASE) { + c.to_ascii_lowercase() + } else { + *c + } + }; + let mut p = pattern.iter().map(possibly_lowercase); + let mut t = text.iter().map(possibly_lowercase); + + while let Some(mut p_ch) = p.next() { + let t_ch = match t.next() { + Some(c) => c, + None if p_ch != STAR => return AbortAll, + None => 0, + }; + + if p_ch == b'\\' { + p_ch = match p.next() { + Some(c) => c, + None => return NoMatch, + }; + } + match p_ch { + non_glob_ch => { + if non_glob_ch != t_ch { + return NoMatch; + } else { + continue; + } + } + } + } + t.next().map(|_| NoMatch).unwrap_or(Match) + } + + pub fn wildmatch(pattern: &BStr, value: &BStr, mode: Mode) -> bool { + match_recursive(pattern, value, mode) == Result::Match } } diff --git a/git-glob/tests/fixtures/make_baseline.sh b/git-glob/tests/fixtures/make_baseline.sh index f33fb7082c2..763c4cd3245 100644 --- a/git-glob/tests/fixtures/make_baseline.sh +++ b/git-glob/tests/fixtures/make_baseline.sh @@ -14,6 +14,8 @@ while read -r pattern nomatch; do done <>git-baseline.nmatch */\ XXX/\ */\\ XXX/\ +/*foo bar/foo +/*foo bar/bazfoo foo*bar foo/baz/bar /*foo.txt hello/foo.txt bar/foo baz/bar/foo diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index e8b9e4ba2b2..d3b541ae5ad 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -231,7 +231,6 @@ fn basename_glob_and_literal_is_ends_with() { } #[test] -#[ignore] fn special_cases_from_corpus() { let pattern = &pat("foo*bar"); assert!( @@ -266,12 +265,11 @@ fn absolute_basename_glob_and_literal_is_ends_with_in_basenames() { } #[test] -#[ignore] fn absolute_basename_glob_and_literal_is_glob_in_paths() { let pattern = &pat("/*foo"); - assert!(match_file(pattern, "bar/foo", Case::Sensitive), "* does not match /"); - assert!(match_file(pattern, "bar/bazfoo", Case::Sensitive)); + assert!(!match_file(pattern, "bar/foo", Case::Sensitive), "* does not match /"); + assert!(!match_file(pattern, "bar/bazfoo", Case::Sensitive)); } #[test] From e83c8df03e801e00571f5934331e004af9774c7f Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 16:43:25 +0800 Subject: [PATCH 35/56] question mark support (#301) It's the easy one as it applies to single-characters only --- git-glob/src/wildmatch.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index 453abcad266..ad246a11a7e 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -23,6 +23,8 @@ pub(crate) mod function { } const STAR: u8 = b'*'; + const BACKSLASH: u8 = b'\\'; + const SLASH: u8 = b'/'; fn match_recursive(pattern: &BStr, text: &BStr, mode: Mode) -> Result { use self::Result::*; @@ -43,13 +45,20 @@ pub(crate) mod function { None => 0, }; - if p_ch == b'\\' { + if p_ch == BACKSLASH { p_ch = match p.next() { Some(c) => c, None => return NoMatch, }; } match p_ch { + b'?' => { + if mode.contains(Mode::SLASH_IS_LITERAL) && t_ch == SLASH { + return NoMatch; + } else { + continue; + } + } non_glob_ch => { if non_glob_ch != t_ch { return NoMatch; From 4efd21560c754062f09870d253b6a2809cb0efb1 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 17:25:37 +0800 Subject: [PATCH 36/56] slowly move towards star/double-star (#301) But the logic is hard to replicate in the way it's currently setup. A break certainly helps. --- git-glob/src/wildmatch.rs | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index ad246a11a7e..8259e6d2189 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -35,18 +35,18 @@ pub(crate) mod function { *c } }; - let mut p = pattern.iter().map(possibly_lowercase); - let mut t = text.iter().map(possibly_lowercase); + let mut p = pattern.iter().map(possibly_lowercase).enumerate(); + let mut t = text.iter().map(possibly_lowercase).enumerate(); - while let Some(mut p_ch) = p.next() { - let t_ch = match t.next() { + while let Some((mut _p_idx, mut p_ch)) = p.next() { + let (t_idx, t_ch) = match t.next() { Some(c) => c, None if p_ch != STAR => return AbortAll, - None => 0, + None => (text.len(), 0), // out of bounds, like in C, can we do better? }; if p_ch == BACKSLASH { - p_ch = match p.next() { + (_p_idx, p_ch) = match p.next() { Some(c) => c, None => return NoMatch, }; @@ -59,6 +59,24 @@ pub(crate) mod function { continue; } } + STAR => { + let match_slash = mode.contains(Mode::SLASH_IS_LITERAL).then(|| false).unwrap_or(true); + match p.next() { + Some((_next_p_idx, next_p_ch)) => { + if next_p_ch == STAR { + // check for '/**/' or '/**' + todo!("double star") + } + } + None => { + return if !match_slash && text[t_idx..].contains(&SLASH) { + NoMatch + } else { + Match + } + } + } + } non_glob_ch => { if non_glob_ch != t_ch { return NoMatch; From 48c57ff3299928fd427bfae3e4eeadf5a9ca8109 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 19:13:51 +0800 Subject: [PATCH 37/56] maybe even working double-star handling (#301) --- git-glob/src/wildmatch.rs | 59 ++++++++++++++++++++++++++++++---- git-glob/tests/matching/mod.rs | 2 ++ 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index 8259e6d2189..123ac9fc24b 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -10,7 +10,7 @@ bitflags! { } pub(crate) mod function { - use bstr::BStr; + use bstr::{BStr, ByteSlice}; use crate::wildmatch::Mode; @@ -35,10 +35,10 @@ pub(crate) mod function { *c } }; - let mut p = pattern.iter().map(possibly_lowercase).enumerate(); + let mut p = pattern.iter().map(possibly_lowercase).enumerate().peekable(); let mut t = text.iter().map(possibly_lowercase).enumerate(); - while let Some((mut _p_idx, mut p_ch)) = p.next() { + while let Some((mut p_idx, mut p_ch)) = p.next() { let (t_idx, t_ch) = match t.next() { Some(c) => c, None if p_ch != STAR => return AbortAll, @@ -46,7 +46,7 @@ pub(crate) mod function { }; if p_ch == BACKSLASH { - (_p_idx, p_ch) = match p.next() { + (p_idx, p_ch) = match p.next() { Some(c) => c, None => return NoMatch, }; @@ -62,10 +62,54 @@ pub(crate) mod function { STAR => { let match_slash = mode.contains(Mode::SLASH_IS_LITERAL).then(|| false).unwrap_or(true); match p.next() { - Some((_next_p_idx, next_p_ch)) => { + Some((next_p_idx, next_p_ch)) => { + let next; if next_p_ch == STAR { - // check for '/**/' or '/**' - todo!("double star") + let leading_slash_idx = p_idx.checked_sub(1); + while let Some(_) = p.next_if(|(_, c)| *c == STAR) {} + next = p.next(); + if !match_slash { + // check for '/**/' or '/**' or '**' + if leading_slash_idx.map_or(true, |idx| pattern[idx] == SLASH) + && next.map_or(true, |(_, c)| { + c == SLASH || (c == BACKSLASH && p.peek().map(|t| t.1) == Some(SLASH)) + }) + { + if next.map_or(false, |t| { + t.1 == SLASH + && match_recursive( + &pattern[t.0 + 1..].as_bstr(), + &text[t_idx..].as_bstr(), + mode, + ) == Match + }) { + return Match; + } + } + } + } else { + next = Some((next_p_idx, next_p_ch)); + } + + match next { + None => { + return if !match_slash && text[t_idx..].contains(&SLASH) { + NoMatch + } else { + Match + } + } + Some((_, p_ch)) => { + if !match_slash && p_ch == SLASH { + match text[t_idx..].find_byte(SLASH) { + Some(distance_to_slash) => { + for _ in t.by_ref().take(distance_to_slash) {} + break; + } + None => return NoMatch, + } + } + } } } None => { @@ -76,6 +120,7 @@ pub(crate) mod function { } } } + todo!("star handling"); } non_glob_ch => { if non_glob_ch != t_ch { diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index d3b541ae5ad..54858323e6e 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -231,6 +231,7 @@ fn basename_glob_and_literal_is_ends_with() { } #[test] +#[ignore] fn special_cases_from_corpus() { let pattern = &pat("foo*bar"); assert!( @@ -265,6 +266,7 @@ fn absolute_basename_glob_and_literal_is_ends_with_in_basenames() { } #[test] +#[ignore] fn absolute_basename_glob_and_literal_is_glob_in_paths() { let pattern = &pat("/*foo"); From 1393403b826cf4a2fbaf6ef58d505c5c62fd5e0a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 19:19:52 +0800 Subject: [PATCH 38/56] thanks clippy --- git-glob/src/wildmatch.rs | 15 +++++++-------- git-glob/tests/wildmatch/mod.rs | 18 ++++++++---------- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index 123ac9fc24b..c132f6efe47 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -66,7 +66,7 @@ pub(crate) mod function { let next; if next_p_ch == STAR { let leading_slash_idx = p_idx.checked_sub(1); - while let Some(_) = p.next_if(|(_, c)| *c == STAR) {} + while p.next_if(|(_, c)| *c == STAR).is_some() {} next = p.next(); if !match_slash { // check for '/**/' or '/**' or '**' @@ -74,17 +74,16 @@ pub(crate) mod function { && next.map_or(true, |(_, c)| { c == SLASH || (c == BACKSLASH && p.peek().map(|t| t.1) == Some(SLASH)) }) - { - if next.map_or(false, |t| { + && next.map_or(false, |t| { t.1 == SLASH && match_recursive( - &pattern[t.0 + 1..].as_bstr(), - &text[t_idx..].as_bstr(), + pattern[t.0 + 1..].as_bstr(), + text[t_idx..].as_bstr(), mode, ) == Match - }) { - return Match; - } + }) + { + return Match; } } } else { diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index f0232c4f5fe..ccf6bd08d29 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -234,17 +234,15 @@ fn corpus() { if let (Panic, Panic, Panic, Panic) = actual { all_panic += 1; at_least_one_panic += 1; + } else if actual != expected { + failures.push((pattern, pattern_text, text, actual, expected)); } else { - if actual != expected { - failures.push((pattern, pattern_text, text, actual, expected)); - } else { - at_least_one_panic += match actual { - (Panic, _, _, _) => 1, - (_, Panic, _, _) => 1, - (_, _, Panic, _) => 1, - (_, _, _, Panic) => 1, - _ => 0, - } + at_least_one_panic += match actual { + (Panic, _, _, _) => 1, + (_, Panic, _, _) => 1, + (_, _, Panic, _) => 1, + (_, _, _, Panic) => 1, + _ => 0, } } } From 321c4d2011617f2b13e29109cafe4566e53bfde3 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Tue, 12 Apr 2022 20:35:07 +0800 Subject: [PATCH 39/56] All our simple wildmatches are working, a good start (#301) Also, 75% of all ignore tests seem to work, and about 45% of the actual wild card match corpus. Probably worth pulling some cases out to make them work and get everything related to * and ? right before contiuning. --- git-glob/src/parse.rs | 2 +- git-glob/src/wildmatch.rs | 39 +++++++++++++++++++++++++++++----- git-glob/tests/matching/mod.rs | 3 --- 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/git-glob/src/parse.rs b/git-glob/src/parse.rs index e705421a052..545686ed6c3 100644 --- a/git-glob/src/parse.rs +++ b/git-glob/src/parse.rs @@ -47,7 +47,7 @@ fn first_wildcard_pos(pat: &[u8]) -> Option { pat.find_byteset(GLOB_CHARACTERS) } -const GLOB_CHARACTERS: &[u8] = br"*?[\"; +pub(crate) const GLOB_CHARACTERS: &[u8] = br"*?[\"; /// We always copy just because that's ultimately needed anyway, not because we always have to. fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> BString { diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index c132f6efe47..e90fa5bf61f 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -19,7 +19,7 @@ pub(crate) mod function { Match, NoMatch, AbortAll, - // AbortToStarStar, + AbortToStarStar, } const STAR: u8 = b'*'; @@ -39,10 +39,10 @@ pub(crate) mod function { let mut t = text.iter().map(possibly_lowercase).enumerate(); while let Some((mut p_idx, mut p_ch)) = p.next() { - let (t_idx, t_ch) = match t.next() { + let (mut t_idx, mut t_ch) = match t.next() { Some(c) => c, None if p_ch != STAR => return AbortAll, - None => (text.len(), 0), // out of bounds, like in C, can we do better? + None => return NoMatch, }; if p_ch == BACKSLASH { @@ -98,7 +98,8 @@ pub(crate) mod function { Match } } - Some((_, p_ch)) => { + Some((next_p_idx, next_p_ch)) => { + (p_idx, p_ch) = (next_p_idx, next_p_ch); if !match_slash && p_ch == SLASH { match text[t_idx..].find_byte(SLASH) { Some(distance_to_slash) => { @@ -119,7 +120,35 @@ pub(crate) mod function { } } } - todo!("star handling"); + + return loop { + if !crate::parse::GLOB_CHARACTERS.contains(&p_ch) { + loop { + if (!match_slash && t_ch == SLASH) || t_ch == p_ch { + break; + } + (t_idx, t_ch) = match t.next() { + Some(t) => (t.0, t.1), + None => break, + }; + } + if t_ch != p_ch { + return NoMatch; + } + } + let res = match_recursive(pattern[p_idx..].as_bstr(), text[t_idx..].as_bstr(), mode); + if res != NoMatch { + if !match_slash || res != AbortToStarStar { + return res; + } + } else if !match_slash && t_ch == SLASH { + return AbortToStarStar; + } + (t_idx, t_ch) = match t.next() { + Some(t) => (t.0, t.1), + None => break AbortAll, + }; + }; } non_glob_ch => { if non_glob_ch != t_ch { diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index 54858323e6e..fabe234335a 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -190,7 +190,6 @@ fn absolute_path_with_recursive_glob_detects_mismatches_quickly() { } #[test] -#[ignore] fn absolute_path_with_recursive_glob_can_do_case_insensitive_prefix_search() { let mut pattern = pat("/bar/foo/**"); let pat = &pattern; @@ -231,7 +230,6 @@ fn basename_glob_and_literal_is_ends_with() { } #[test] -#[ignore] fn special_cases_from_corpus() { let pattern = &pat("foo*bar"); assert!( @@ -266,7 +264,6 @@ fn absolute_basename_glob_and_literal_is_ends_with_in_basenames() { } #[test] -#[ignore] fn absolute_basename_glob_and_literal_is_glob_in_paths() { let pattern = &pat("/*foo"); From d21c6541959b0fe34a3882ffcb9e657d6c685734 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 09:41:21 +0800 Subject: [PATCH 40/56] new wildcard tests to help fix star matching (#301) There are simple cases that still don't work, so that should be fixed before adding more complexity. --- git-glob/tests/matching/mod.rs | 5 +- git-glob/tests/wildmatch/mod.rs | 121 +++++++++++++++++++++++--------- 2 files changed, 93 insertions(+), 33 deletions(-) diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index fabe234335a..def9bed666d 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -89,7 +89,10 @@ fn compare_baseline_with_ours() { } dbg!(panics); - dbg!(mismatches); + dbg!(mismatches + .iter() + .filter(|e| e.0.contains(&b'*') && !e.0.contains(&b'[')) + .next()); assert_eq!( total_correct, total_matches - panics, diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index ccf6bd08d29..e5cf1aea55c 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -1,6 +1,6 @@ use git_glob::pattern::Case; -use git_glob::wildmatch; -use std::fmt::{Debug, Formatter}; +use git_glob::{wildmatch, Pattern}; +use std::fmt::{Debug, Display, Formatter}; use std::panic::catch_unwind; #[test] @@ -210,44 +210,20 @@ fn corpus() { let mut all_panic = 0; let mut at_least_one_panic = 0; for (path_match, path_imatch, glob_match, glob_imatch, text, pattern_text) in tests { - let pattern = git_glob::Pattern::from_bytes(pattern_text.as_bytes()).expect("valid (enough) pattern"); - let actual_path_match: MatchResult = catch_unwind(|| match_file_path(&pattern, text, Case::Sensitive)).into(); - let actual_path_imatch: MatchResult = catch_unwind(|| match_file_path(&pattern, text, Case::Fold)).into(); - let actual_glob_match: MatchResult = catch_unwind(|| pattern.matches(text, wildmatch::Mode::empty())).into(); - let actual_glob_imatch: MatchResult = - catch_unwind(|| pattern.matches(text, wildmatch::Mode::IGNORE_CASE)).into(); + let (pattern, actual) = multi_match(pattern_text, text); + let expected = expect_multi(path_match, path_imatch, glob_match, glob_imatch); - let expected: (MatchResult, MatchResult, MatchResult, MatchResult) = ( - path_match.into(), - path_imatch.into(), - glob_match.into(), - glob_imatch.into(), - ); - let actual = ( - actual_path_match, - actual_path_imatch, - actual_glob_match, - actual_glob_imatch, - ); - - use MatchResult::Panic; - if let (Panic, Panic, Panic, Panic) = actual { + if actual.all_panicked() { all_panic += 1; at_least_one_panic += 1; } else if actual != expected { failures.push((pattern, pattern_text, text, actual, expected)); } else { - at_least_one_panic += match actual { - (Panic, _, _, _) => 1, - (_, Panic, _, _) => 1, - (_, _, Panic, _) => 1, - (_, _, _, Panic) => 1, - _ => 0, - } + at_least_one_panic += if actual.any_panicked() { 1 } else { 0 }; } } - dbg!(&failures); + dbg!(failures.first()); dbg!(all_panic, at_least_one_panic); dbg!(tests.len() - at_least_one_panic); dbg!(failures.len()); @@ -267,6 +243,87 @@ fn corpus() { // 1 1 1 1 foo/bba/arr 'foo**' } +#[test] +#[ignore] +fn simple_star_boundary() { + let (_pattern, actual) = multi_match("*foo*", "foo"); + assert!(!actual.any_panicked()); + assert_eq!(actual, expect_multi(1, 1, 1, 1)); +} + +#[test] +#[ignore] +fn simple_double_star() { + let (_pattern, actual) = multi_match("some/**/needle.txt", "some/one/Needle.txt"); + assert!(!actual.any_panicked()); + assert_eq!(actual, expect_multi(0, 1, 0, 1)); +} + +fn multi_match(pattern_text: &str, text: &str) -> (Pattern, MultiMatch) { + let pattern = git_glob::Pattern::from_bytes(pattern_text.as_bytes()).expect("valid (enough) pattern"); + let actual_path_match: MatchResult = catch_unwind(|| match_file_path(&pattern, text, Case::Sensitive)).into(); + let actual_path_imatch: MatchResult = catch_unwind(|| match_file_path(&pattern, text, Case::Fold)).into(); + let actual_glob_match: MatchResult = catch_unwind(|| pattern.matches(text, wildmatch::Mode::empty())).into(); + let actual_glob_imatch: MatchResult = catch_unwind(|| pattern.matches(text, wildmatch::Mode::IGNORE_CASE)).into(); + let actual = MultiMatch { + path_match: actual_path_match, + path_imatch: actual_path_imatch, + glob_match: actual_glob_match, + glob_imatch: actual_glob_imatch, + }; + (pattern, actual) +} + +fn expect_multi(path_match: u8, path_imatch: u8, glob_match: u8, glob_imatch: u8) -> MultiMatch { + (path_match, path_imatch, glob_match, glob_imatch).into() +} + +#[derive(Eq, PartialEq)] +struct MultiMatch { + path_match: MatchResult, + path_imatch: MatchResult, + glob_match: MatchResult, + glob_imatch: MatchResult, +} + +impl MultiMatch { + fn all_panicked(&self) -> bool { + use MatchResult::Panic; + matches!(self.path_match, Panic) + && matches!(self.path_imatch, Panic) + && matches!(self.glob_match, Panic) + && matches!(self.glob_imatch, Panic) + } + fn any_panicked(&self) -> bool { + use MatchResult::Panic; + matches!(self.path_match, Panic) + || matches!(self.path_imatch, Panic) + || matches!(self.glob_match, Panic) + || matches!(self.glob_imatch, Panic) + } +} + +impl From<(u8, u8, u8, u8)> for MultiMatch { + fn from(t: (u8, u8, u8, u8)) -> Self { + MultiMatch { + path_match: t.0.into(), + path_imatch: t.1.into(), + glob_match: t.2.into(), + glob_imatch: t.3.into(), + } + } +} + +impl Debug for MultiMatch { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!( + f, + "({} {} {} {})", + self.path_match, self.path_imatch, self.glob_match, self.glob_imatch + ) + } +} + enum MatchResult { Match, NoMatch, @@ -308,7 +365,7 @@ impl From for MatchResult { } } -impl Debug for MatchResult { +impl Display for MatchResult { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { use MatchResult::*; f.write_str(match self { From 7907cb4e12b56bdbea6abdc59f1022a508a83c87 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 09:48:49 +0800 Subject: [PATCH 41/56] fix backslash handling; improve star handling (#301) There are still standard double star cases not working, but it is getting there one by one. --- git-glob/src/wildmatch.rs | 12 +++++++++--- git-glob/tests/wildmatch/mod.rs | 3 +-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index e90fa5bf61f..8b7c06fd473 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -42,12 +42,18 @@ pub(crate) mod function { let (mut t_idx, mut t_ch) = match t.next() { Some(c) => c, None if p_ch != STAR => return AbortAll, - None => return NoMatch, + None => (text.len(), 0), }; if p_ch == BACKSLASH { - (p_idx, p_ch) = match p.next() { - Some(c) => c, + match p.next() { + Some((_p_idx, p_ch)) => { + if p_ch != t_ch { + return NoMatch; + } else { + continue; + } + } None => return NoMatch, }; } diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index e5cf1aea55c..80eca8645cf 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -223,7 +223,7 @@ fn corpus() { } } - dbg!(failures.first()); + dbg!(failures.iter().filter(|e| !e.1.contains('[')).next()); dbg!(all_panic, at_least_one_panic); dbg!(tests.len() - at_least_one_panic); dbg!(failures.len()); @@ -244,7 +244,6 @@ fn corpus() { } #[test] -#[ignore] fn simple_star_boundary() { let (_pattern, actual) = multi_match("*foo*", "foo"); assert!(!actual.any_panicked()); From e5a79951dc32d336ae5b6c4230b3058ed80456d6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 10:23:16 +0800 Subject: [PATCH 42/56] fix single-level double-star (#301) --- git-glob/src/wildmatch.rs | 2 +- git-glob/tests/wildmatch/mod.rs | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index 8b7c06fd473..ee672708e2b 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -110,7 +110,7 @@ pub(crate) mod function { match text[t_idx..].find_byte(SLASH) { Some(distance_to_slash) => { for _ in t.by_ref().take(distance_to_slash) {} - break; + continue; } None => return NoMatch, } diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index 80eca8645cf..ee332fdab35 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -251,11 +251,10 @@ fn simple_star_boundary() { } #[test] -#[ignore] fn simple_double_star() { - let (_pattern, actual) = multi_match("some/**/needle.txt", "some/one/Needle.txt"); + let (_pattern, actual) = multi_match("some/**/needle.txt", "some/one/needle.txt"); assert!(!actual.any_panicked()); - assert_eq!(actual, expect_multi(0, 1, 0, 1)); + assert_eq!(actual, expect_multi(1, 1, 1, 1)); } fn multi_match(pattern_text: &str, text: &str) -> (Pattern, MultiMatch) { From 43371b6fa0d6e62d9cde0399f1c9dd3e76b95d99 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 10:45:55 +0800 Subject: [PATCH 43/56] fix double-star matches (#301) --- git-glob/src/wildmatch.rs | 5 +++-- git-glob/tests/wildmatch/mod.rs | 9 +-------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index ee672708e2b..b76323742ba 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -66,7 +66,7 @@ pub(crate) mod function { } } STAR => { - let match_slash = mode.contains(Mode::SLASH_IS_LITERAL).then(|| false).unwrap_or(true); + let mut match_slash = mode.contains(Mode::SLASH_IS_LITERAL).then(|| false).unwrap_or(true); match p.next() { Some((next_p_idx, next_p_ch)) => { let next; @@ -91,6 +91,7 @@ pub(crate) mod function { { return Match; } + match_slash = true; } } else { next = Some((next_p_idx, next_p_ch)); @@ -102,7 +103,7 @@ pub(crate) mod function { NoMatch } else { Match - } + }; } Some((next_p_idx, next_p_ch)) => { (p_idx, p_ch) = (next_p_idx, next_p_ch); diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index ee332fdab35..4861fdc8f1e 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -243,16 +243,9 @@ fn corpus() { // 1 1 1 1 foo/bba/arr 'foo**' } -#[test] -fn simple_star_boundary() { - let (_pattern, actual) = multi_match("*foo*", "foo"); - assert!(!actual.any_panicked()); - assert_eq!(actual, expect_multi(1, 1, 1, 1)); -} - #[test] fn simple_double_star() { - let (_pattern, actual) = multi_match("some/**/needle.txt", "some/one/needle.txt"); + let (_pattern, actual) = multi_match("foo/**/bar", "foo/a/b/bar"); assert!(!actual.any_panicked()); assert_eq!(actual, expect_multi(1, 1, 1, 1)); } From 09095dfb123f419a3df715d48e60e1f8ec62d060 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 11:00:52 +0800 Subject: [PATCH 44/56] fix another special case (#301) It's all about manipulating match_slash precisely. --- git-glob/src/wildmatch.rs | 29 +++++++++++++++-------------- git-glob/tests/wildmatch/mod.rs | 4 ++-- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index b76323742ba..e0e62abb3dd 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -74,24 +74,25 @@ pub(crate) mod function { let leading_slash_idx = p_idx.checked_sub(1); while p.next_if(|(_, c)| *c == STAR).is_some() {} next = p.next(); - if !match_slash { - // check for '/**/' or '/**' or '**' - if leading_slash_idx.map_or(true, |idx| pattern[idx] == SLASH) - && next.map_or(true, |(_, c)| { - c == SLASH || (c == BACKSLASH && p.peek().map(|t| t.1) == Some(SLASH)) - }) - && next.map_or(false, |t| { - t.1 == SLASH - && match_recursive( - pattern[t.0 + 1..].as_bstr(), - text[t_idx..].as_bstr(), - mode, - ) == Match - }) + if !mode.contains(Mode::SLASH_IS_LITERAL) { + match_slash = true; + } else if leading_slash_idx.map_or(true, |idx| pattern[idx] == SLASH) + && next.map_or(true, |(_, c)| { + c == SLASH || (c == BACKSLASH && p.peek().map(|t| t.1) == Some(SLASH)) + }) + && next.map_or(false, |t| t.1 == SLASH) + { + if match_recursive( + pattern[next.expect("checked prior").0 + 1..].as_bstr(), + text[t_idx..].as_bstr(), + mode, + ) == Match { return Match; } match_slash = true; + } else { + match_slash = false; } } else { next = Some((next_p_idx, next_p_ch)); diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index 4861fdc8f1e..9de10224ce5 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -245,9 +245,9 @@ fn corpus() { #[test] fn simple_double_star() { - let (_pattern, actual) = multi_match("foo/**/bar", "foo/a/b/bar"); + let (_pattern, actual) = multi_match("**/bar**", "foo/bar/baz"); assert!(!actual.any_panicked()); - assert_eq!(actual, expect_multi(1, 1, 1, 1)); + assert_eq!(actual, expect_multi(0, 0, 1, 1)); } fn multi_match(pattern_text: &str, text: &str) -> (Pattern, MultiMatch) { From d15c2fb0119edc7635efc174a703101e100c0b4c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 11:13:40 +0800 Subject: [PATCH 45/56] fix another issue around double-star (#301) --- git-glob/src/wildmatch.rs | 9 +++------ git-glob/tests/wildmatch/mod.rs | 4 ++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index e0e62abb3dd..420ae562df9 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -80,13 +80,10 @@ pub(crate) mod function { && next.map_or(true, |(_, c)| { c == SLASH || (c == BACKSLASH && p.peek().map(|t| t.1) == Some(SLASH)) }) - && next.map_or(false, |t| t.1 == SLASH) { - if match_recursive( - pattern[next.expect("checked prior").0 + 1..].as_bstr(), - text[t_idx..].as_bstr(), - mode, - ) == Match + if next.map_or(NoMatch, |(idx, _)| { + match_recursive(pattern[idx + 1..].as_bstr(), text[t_idx..].as_bstr(), mode) + }) == Match { return Match; } diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index 9de10224ce5..413f894dd56 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -245,9 +245,9 @@ fn corpus() { #[test] fn simple_double_star() { - let (_pattern, actual) = multi_match("**/bar**", "foo/bar/baz"); + let (_pattern, actual) = multi_match("test/**", "test/one/two"); assert!(!actual.any_panicked()); - assert_eq!(actual, expect_multi(0, 0, 1, 1)); + assert_eq!(actual, expect_multi(1, 1, 1, 1)); } fn multi_match(pattern_text: &str, text: &str) -> (Pattern, MultiMatch) { From 48990af81110a411ad07e199916005a8885db920 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 11:19:17 +0800 Subject: [PATCH 46/56] adjust wildmatch corpus expectations as it won't match our preprocessor (#301) It appears that we do truncate trailing backslashes differently at least in our test setup, so the actual pattern used for matching is different and produces different (but correct results. Hence we update the expectation accordingly, instead of adjusting the pattern parsing, as it seems unnecessary to match such patterns or to adjust the parser to support them. --- git-glob/tests/matching/mod.rs | 5 +---- git-glob/tests/wildmatch/mod.rs | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index def9bed666d..b7b38f0b46c 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -89,10 +89,7 @@ fn compare_baseline_with_ours() { } dbg!(panics); - dbg!(mismatches - .iter() - .filter(|e| e.0.contains(&b'*') && !e.0.contains(&b'[')) - .next()); + dbg!(mismatches.iter().filter(|e| !e.0.contains(&b'[')).next()); assert_eq!( total_correct, total_matches - panics, diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index 413f894dd56..99b8fcc179f 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -71,8 +71,8 @@ fn corpus() { (0,0,0,0, "]", "[!]-]"), (1,1,1,1, "a", "[!]-]"), (0,0,0,0, "", r"\"), - (0,0,0,0, r"XXX/\", r"*/\"), - (0,0,0,0, r"XXX/\", r"*/\\"), + (0,0,1,1, r"XXX/\", r"*/\"), + (0,0,1,1, r"XXX/\", r"*/\\"), (1,1,1,1, "foo", "foo"), (1,1,1,1, "@foo", "@foo"), (0,0,0,0, "foo", "@foo"), From b1a610029e1b40600f90194ce986155238f58101 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 11:21:50 +0800 Subject: [PATCH 47/56] thanks clippy --- git-glob/tests/matching/mod.rs | 2 +- git-glob/tests/wildmatch/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index b7b38f0b46c..4e7cd4ea3e5 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -89,7 +89,7 @@ fn compare_baseline_with_ours() { } dbg!(panics); - dbg!(mismatches.iter().filter(|e| !e.0.contains(&b'[')).next()); + dbg!(mismatches.iter().find(|e| !e.0.contains(&b'['))); assert_eq!( total_correct, total_matches - panics, diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index 99b8fcc179f..01f7cb0deb4 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -223,7 +223,7 @@ fn corpus() { } } - dbg!(failures.iter().filter(|e| !e.1.contains('[')).next()); + dbg!(failures.iter().find(|e| !e.1.contains('['))); dbg!(all_panic, at_least_one_panic); dbg!(tests.len() - at_least_one_panic); dbg!(failures.len()); From 54fe0294e36e6ae9a025ef8661d5e21fd488dc87 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 16:04:36 +0800 Subject: [PATCH 48/56] first steps towards bracket matching (#301) --- git-glob/src/wildmatch.rs | 59 +++++++++++++++++++++++++++++++++ git-glob/tests/matching/mod.rs | 2 +- git-glob/tests/wildmatch/mod.rs | 2 +- 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index 420ae562df9..5f00ba0dc27 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -25,6 +25,10 @@ pub(crate) mod function { const STAR: u8 = b'*'; const BACKSLASH: u8 = b'\\'; const SLASH: u8 = b'/'; + const BRACKET_OPEN: u8 = b'['; + const BRACKET_CLOSE: u8 = b']'; + + const NEGATE_CLASS: u8 = b'!'; fn match_recursive(pattern: &BStr, text: &BStr, mode: Mode) -> Result { use self::Result::*; @@ -155,6 +159,61 @@ pub(crate) mod function { }; }; } + BRACKET_OPEN => { + (p_idx, p_ch) = match p.next() { + Some(t) => t, + None => return AbortAll, + }; + + if p_ch == b'^' { + p_ch = NEGATE_CLASS; + } + let negated = p_ch == NEGATE_CLASS; + let mut next = if negated { p.next() } else { Some((p_idx, p_ch)) }; + let mut prev_p = None::<(usize, u8)>; + let mut matched = false; + loop { + match next { + None => return AbortAll, + Some((_p_idx, mut p_ch)) => match p_ch { + BRACKET_CLOSE => break, + BACKSLASH => match p.next() { + Some((_, p_ch)) => { + if p_ch == t_ch { + matched = true + } + } + None => return AbortAll, + }, + b'-' => { + if prev_p.is_some() && matches!(p.peek(), Some((_, c)) if *c != BRACKET_CLOSE) { + (_, p_ch) = p.next().expect("peeked"); + if p_ch == BACKSLASH { + (_, p_ch) = match p.next() { + Some(t) => t, + None => return AbortAll, + }; + } + if t_ch <= p_ch && t_ch >= prev_p.expect("matched prior").1 { + matched = true; + } + } + } + _ => { + if p_ch == t_ch { + matched = true; + } + } + }, + }; + prev_p = next; + next = p.next(); + } + if matched == negated || mode.contains(Mode::SLASH_IS_LITERAL) && t_ch == SLASH { + return NoMatch; + } + todo!("bracket support post loop"); + } non_glob_ch => { if non_glob_ch != t_ch { return NoMatch; diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index 4e7cd4ea3e5..607718bbec9 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -89,7 +89,7 @@ fn compare_baseline_with_ours() { } dbg!(panics); - dbg!(mismatches.iter().find(|e| !e.0.contains(&b'['))); + dbg!(mismatches.first()); assert_eq!( total_correct, total_matches - panics, diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index 01f7cb0deb4..080d3e66fdf 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -223,7 +223,7 @@ fn corpus() { } } - dbg!(failures.iter().find(|e| !e.1.contains('['))); + dbg!(failures.first()); dbg!(all_panic, at_least_one_panic); dbg!(tests.len() - at_least_one_panic); dbg!(failures.len()); From fa0440fb3c80f8052e08526cf260e929722ccf02 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 16:29:07 +0800 Subject: [PATCH 49/56] refactor (#301) --- git-glob/src/wildmatch.rs | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index 5f00ba0dc27..8a95db482cf 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -170,7 +170,7 @@ pub(crate) mod function { } let negated = p_ch == NEGATE_CLASS; let mut next = if negated { p.next() } else { Some((p_idx, p_ch)) }; - let mut prev_p = None::<(usize, u8)>; + let mut prev_p_ch = 0; let mut matched = false; loop { match next { @@ -181,32 +181,37 @@ pub(crate) mod function { Some((_, p_ch)) => { if p_ch == t_ch { matched = true + } else { + prev_p_ch = p_ch; } } None => return AbortAll, }, - b'-' => { - if prev_p.is_some() && matches!(p.peek(), Some((_, c)) if *c != BRACKET_CLOSE) { - (_, p_ch) = p.next().expect("peeked"); - if p_ch == BACKSLASH { - (_, p_ch) = match p.next() { - Some(t) => t, - None => return AbortAll, - }; - } - if t_ch <= p_ch && t_ch >= prev_p.expect("matched prior").1 { - matched = true; - } + b'-' if prev_p_ch != 0 + && p.peek().is_some() + && p.peek().map(|t| t.1) != Some(BRACKET_CLOSE) => + { + p_ch = p.next().expect("peeked").1; + if p_ch == BACKSLASH { + p_ch = match p.next() { + Some(t) => t.1, + None => return AbortAll, + }; + } + if t_ch <= p_ch && t_ch >= prev_p_ch { + matched = true; } + prev_p_ch = 0; } + BRACKET_OPEN => todo!("class open"), _ => { + prev_p_ch = p_ch; if p_ch == t_ch { matched = true; } } }, }; - prev_p = next; next = p.next(); } if matched == negated || mode.contains(Mode::SLASH_IS_LITERAL) && t_ch == SLASH { From 97aa9ed22ccb927147a1e456ee6e3510ecc9f90a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 16:47:08 +0800 Subject: [PATCH 50/56] make bracket matching work better (#301) --- git-glob/src/wildmatch.rs | 6 ++++-- git-glob/tests/wildmatch/mod.rs | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index 8a95db482cf..a4b6298cb10 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -176,7 +176,6 @@ pub(crate) mod function { match next { None => return AbortAll, Some((_p_idx, mut p_ch)) => match p_ch { - BRACKET_CLOSE => break, BACKSLASH => match p.next() { Some((_, p_ch)) => { if p_ch == t_ch { @@ -213,11 +212,14 @@ pub(crate) mod function { }, }; next = p.next(); + if let Some((_, BRACKET_CLOSE)) = next { + break; + } } if matched == negated || mode.contains(Mode::SLASH_IS_LITERAL) && t_ch == SLASH { return NoMatch; } - todo!("bracket support post loop"); + continue; } non_glob_ch => { if non_glob_ch != t_ch { diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index 080d3e66fdf..7c0560d7582 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -244,8 +244,8 @@ fn corpus() { } #[test] -fn simple_double_star() { - let (_pattern, actual) = multi_match("test/**", "test/one/two"); +fn empty_brackets() { + let (_pattern, actual) = multi_match("a[]]b", "a]b"); assert!(!actual.any_panicked()); assert_eq!(actual, expect_multi(1, 1, 1, 1)); } From c64f71c38ff404e9c9f150e3e6d3e02ca11e9235 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 17:01:23 +0800 Subject: [PATCH 51/56] more bracket-range tests succeed (#301) --- git-glob/src/wildmatch.rs | 7 +++++++ git-glob/tests/wildmatch/mod.rs | 6 +++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index a4b6298cb10..c552102a9d8 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -199,6 +199,13 @@ pub(crate) mod function { } if t_ch <= p_ch && t_ch >= prev_p_ch { matched = true; + } else if mode.contains(Mode::IGNORE_CASE) && t_ch.is_ascii_lowercase() { + let t_ch_upper = t_ch.to_ascii_uppercase(); + if t_ch_upper <= p_ch.to_ascii_uppercase() + && t_ch_upper >= prev_p_ch.to_ascii_uppercase() + { + matched = true; + } } prev_p_ch = 0; } diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index 7c0560d7582..2dc1ca3599b 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -106,8 +106,8 @@ fn corpus() { (0,0,0,0, "q", "[a-c[:digit:]x-z]"), // Additional tests, including some malformed wild(patterns - (1,1,1,1, "]", "[\\-^]"), - (0,0,0,0, "[", "[\\-^]"), + (1,1,1,1, "]", r"[\\-^]"), + (0,0,0,0, "[", r"[\\-^]"), (1,1,1,1, "-", r"[\-_]"), (1,1,1,1, "]", r"[\]]"), (0,0,0,0, r"\]", r"[\]]"), @@ -245,7 +245,7 @@ fn corpus() { #[test] fn empty_brackets() { - let (_pattern, actual) = multi_match("a[]]b", "a]b"); + let (_pattern, actual) = multi_match(r"[A-\\]", "G"); assert!(!actual.any_panicked()); assert_eq!(actual, expect_multi(1, 1, 1, 1)); } From 3afe2d2b862c9a22b90cbfbf75da6c84ca91ebf4 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 17:08:23 +0800 Subject: [PATCH 52/56] =?UTF-8?q?fix=20all=20remaining=20bracket=20tests?= =?UTF-8?q?=E2=80=A6=20(#301)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …that don't involve character classes. --- git-glob/src/wildmatch.rs | 6 ++++-- git-glob/tests/wildmatch/mod.rs | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index c552102a9d8..07321a2e5c9 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -201,8 +201,10 @@ pub(crate) mod function { matched = true; } else if mode.contains(Mode::IGNORE_CASE) && t_ch.is_ascii_lowercase() { let t_ch_upper = t_ch.to_ascii_uppercase(); - if t_ch_upper <= p_ch.to_ascii_uppercase() - && t_ch_upper >= prev_p_ch.to_ascii_uppercase() + if (t_ch_upper <= p_ch.to_ascii_uppercase() + && t_ch_upper >= prev_p_ch.to_ascii_uppercase()) + || (t_ch_upper <= prev_p_ch.to_ascii_uppercase() + && t_ch_upper >= p_ch.to_ascii_uppercase()) { matched = true; } diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index 2dc1ca3599b..362a668dcf3 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -244,10 +244,10 @@ fn corpus() { } #[test] -fn empty_brackets() { - let (_pattern, actual) = multi_match(r"[A-\\]", "G"); +fn brackets() { + let (_pattern, actual) = multi_match(r"[B-a]", "A"); assert!(!actual.any_panicked()); - assert_eq!(actual, expect_multi(1, 1, 1, 1)); + assert_eq!(actual, expect_multi(0, 1, 0, 1)); } fn multi_match(pattern_text: &str, text: &str) -> (Pattern, MultiMatch) { From 6b8d0d20b449f6adffd403d0555596041a6c1903 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 19:24:47 +0800 Subject: [PATCH 53/56] frame for character classes (#301) --- git-glob/src/wildmatch.rs | 45 ++++++++++++++++++++++++++++++++-- git-glob/tests/matching/mod.rs | 1 - 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index 07321a2e5c9..41cb94ddc41 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -27,6 +27,7 @@ pub(crate) mod function { const SLASH: u8 = b'/'; const BRACKET_OPEN: u8 = b'['; const BRACKET_CLOSE: u8 = b']'; + const COLON: u8 = b':'; const NEGATE_CLASS: u8 = b'!'; @@ -175,7 +176,7 @@ pub(crate) mod function { loop { match next { None => return AbortAll, - Some((_p_idx, mut p_ch)) => match p_ch { + Some((p_idx, mut p_ch)) => match p_ch { BACKSLASH => match p.next() { Some((_, p_ch)) => { if p_ch == t_ch { @@ -211,7 +212,47 @@ pub(crate) mod function { } prev_p_ch = 0; } - BRACKET_OPEN => todo!("class open"), + BRACKET_OPEN if matches!(p.peek(), Some((_, COLON))) => { + p.next(); + while p.peek().map_or(false, |t| t.1 != BRACKET_CLOSE) { + p.next(); + } + let closing_bracket_idx = match p.next() { + Some((idx, _)) => idx, + None => return AbortAll, + }; + const BRACKET__COLON__BRACKET: usize = 3; + if closing_bracket_idx - p_idx < BRACKET__COLON__BRACKET + || pattern[closing_bracket_idx - 1] != COLON + { + if t_ch == BRACKET_OPEN { + matched = true + } + p = pattern[p_idx + 1..] + .iter() + .map(possibly_lowercase) + .enumerate() + .peekable(); + } else { + let class = &pattern.as_ref()[p_idx + 2..closing_bracket_idx - 1]; + match class { + b"alnum" => todo!("alnum"), + b"alpha" => todo!("alpha"), + b"blank" => todo!("blank"), + b"cntrl" => todo!("cntrl"), + b"digit" => todo!("digit"), + b"graph" => todo!("graph"), + b"lower" => todo!("lower"), + b"print" => todo!("print"), + b"punct" => todo!("punct"), + b"space" => todo!("space"), + b"upper" => todo!("upper"), + b"xdigit" => todo!("xdigit"), + _ => return AbortAll, + }; + prev_p_ch = 0; + } + } _ => { prev_p_ch = p_ch; if p_ch == t_ch { diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index 607718bbec9..f874a339f63 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -42,7 +42,6 @@ impl<'a> Baseline<'a> { } #[test] -#[ignore] fn compare_baseline_with_ours() { let dir = git_testtools::scripted_fixture_repo_read_only("make_baseline.sh").unwrap(); let (mut total_matches, mut total_correct, mut panics) = (0, 0, 0); From 538d41d51d7cdc472b2a712823a5a69810f75015 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 19:43:22 +0800 Subject: [PATCH 54/56] add all character classes sans some of the more obscure ones (#301) --- git-glob/src/wildmatch.rs | 50 ++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index 41cb94ddc41..e193a32fb39 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -236,18 +236,52 @@ pub(crate) mod function { } else { let class = &pattern.as_ref()[p_idx + 2..closing_bracket_idx - 1]; match class { - b"alnum" => todo!("alnum"), - b"alpha" => todo!("alpha"), - b"blank" => todo!("blank"), + b"alnum" => { + if t_ch.is_ascii_alphanumeric() { + matched = true; + } + } + b"alpha" => { + if t_ch.is_ascii_alphabetic() { + matched = true; + } + } + b"blank" => { + if t_ch.is_ascii_whitespace() { + matched = true; + } + } b"cntrl" => todo!("cntrl"), - b"digit" => todo!("digit"), + b"digit" => { + if t_ch.is_ascii_digit() { + matched = true; + } + } b"graph" => todo!("graph"), - b"lower" => todo!("lower"), + b"lower" => { + if t_ch.is_ascii_lowercase() { + matched = true; + } + } b"print" => todo!("print"), b"punct" => todo!("punct"), - b"space" => todo!("space"), - b"upper" => todo!("upper"), - b"xdigit" => todo!("xdigit"), + b"space" => { + if t_ch == b' ' { + matched = true; + } + } + b"upper" => { + if t_ch.is_ascii_uppercase() + || mode.contains(Mode::IGNORE_CASE) && t_ch.is_ascii_lowercase() + { + matched = true; + } + } + b"xdigit" => { + if t_ch.is_ascii_hexdigit() { + matched = true; + } + } _ => return AbortAll, }; prev_p_ch = 0; From d3a7349b707911670f17a92a0f82681544ebc769 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 19:53:49 +0800 Subject: [PATCH 55/56] all wildmatch tests succeed (#301) --- git-glob/src/wildmatch.rs | 25 +++++++++++++++++++++---- git-glob/tests/matching/mod.rs | 2 -- git-glob/tests/wildmatch/mod.rs | 7 ------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index e193a32fb39..ddfe574cc8e 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -251,20 +251,37 @@ pub(crate) mod function { matched = true; } } - b"cntrl" => todo!("cntrl"), + b"cntrl" => { + if t_ch.is_ascii_control() { + matched = true; + } + } b"digit" => { if t_ch.is_ascii_digit() { matched = true; } } - b"graph" => todo!("graph"), + + b"graph" => { + if t_ch.is_ascii_graphic() { + matched = true; + } + } b"lower" => { if t_ch.is_ascii_lowercase() { matched = true; } } - b"print" => todo!("print"), - b"punct" => todo!("punct"), + b"print" => { + if (0x20u8..=0x7e).contains(&t_ch) { + matched = true; + } + } + b"punct" => { + if t_ch.is_ascii_punctuation() { + matched = true; + } + } b"space" => { if t_ch == b' ' { matched = true; diff --git a/git-glob/tests/matching/mod.rs b/git-glob/tests/matching/mod.rs index f874a339f63..1990c3d2ad6 100644 --- a/git-glob/tests/matching/mod.rs +++ b/git-glob/tests/matching/mod.rs @@ -87,8 +87,6 @@ fn compare_baseline_with_ours() { } } - dbg!(panics); - dbg!(mismatches.first()); assert_eq!( total_correct, total_matches - panics, diff --git a/git-glob/tests/wildmatch/mod.rs b/git-glob/tests/wildmatch/mod.rs index 362a668dcf3..433bf902998 100644 --- a/git-glob/tests/wildmatch/mod.rs +++ b/git-glob/tests/wildmatch/mod.rs @@ -4,7 +4,6 @@ use std::fmt::{Debug, Display, Formatter}; use std::panic::catch_unwind; #[test] -#[ignore] fn corpus() { // based on git/t/t3070.sh let tests = [ @@ -207,14 +206,12 @@ fn corpus() { ]; let mut failures = Vec::new(); - let mut all_panic = 0; let mut at_least_one_panic = 0; for (path_match, path_imatch, glob_match, glob_imatch, text, pattern_text) in tests { let (pattern, actual) = multi_match(pattern_text, text); let expected = expect_multi(path_match, path_imatch, glob_match, glob_imatch); if actual.all_panicked() { - all_panic += 1; at_least_one_panic += 1; } else if actual != expected { failures.push((pattern, pattern_text, text, actual, expected)); @@ -223,10 +220,6 @@ fn corpus() { } } - dbg!(failures.first()); - dbg!(all_panic, at_least_one_panic); - dbg!(tests.len() - at_least_one_panic); - dbg!(failures.len()); assert_eq!(failures.len(), 0); assert_eq!(at_least_one_panic, 0, "not a single panic in any invocation"); From 8f4969fe7c2e3f3bb38275d5e4ccb08d0bde02bb Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 13 Apr 2022 20:32:24 +0800 Subject: [PATCH 56/56] docs for git-glob (#301) --- Cargo.lock | 1 + git-glob/src/lib.rs | 5 ++++- git-glob/src/pattern.rs | 18 ++++++++++++++++++ git-glob/src/wildmatch.rs | 4 ++++ git-repository/Cargo.toml | 3 ++- git-repository/src/lib.rs | 3 +++ 6 files changed, 32 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8db0a026354..ab5da2832b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1400,6 +1400,7 @@ dependencies = [ "git-config", "git-diff", "git-features", + "git-glob", "git-hash", "git-index", "git-lock", diff --git a/git-glob/src/lib.rs b/git-glob/src/lib.rs index dfc22868ffb..c839523addb 100644 --- a/git-glob/src/lib.rs +++ b/git-glob/src/lib.rs @@ -1,5 +1,6 @@ #![forbid(unsafe_code)] -#![deny(rust_2018_idioms)] +#![deny(rust_2018_idioms, missing_docs)] +//! Provide glob [`Patterns`][Pattern] for matching against paths or anything else. use bstr::BString; @@ -20,8 +21,10 @@ pub struct Pattern { pub base_path: Option, } +/// pub mod pattern; +/// pub mod wildmatch; pub use wildmatch::function::wildmatch; diff --git a/git-glob/src/pattern.rs b/git-glob/src/pattern.rs index 2935f88eeca..28ddb700231 100644 --- a/git-glob/src/pattern.rs +++ b/git-glob/src/pattern.rs @@ -4,6 +4,12 @@ use bstr::{BStr, BString, ByteSlice}; use crate::{pattern, wildmatch, Pattern}; bitflags! { + /// Information about a [`Pattern`]. + /// + /// Its main purpose is to accelerate pattern matching, or to negate the match result or to + /// keep special rules only applicable when matching paths. + /// + /// The mode is typically created when parsing the pattern by inspecting it and isn't typically handled by the user. #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct Mode: u32 { /// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one. @@ -19,6 +25,9 @@ bitflags! { } } +/// Describes whether to match a path case sensitively or not. +/// +/// Used in [Pattern::matches_repo_relative_path()]. pub enum Case { /// The case affects the match Sensitive, @@ -27,6 +36,7 @@ pub enum Case { } impl Pattern { + /// Parse the given `text` as pattern, or return `None` if `text` was empty. pub fn from_bytes(text: &[u8]) -> Option { crate::parse::pattern(text).map(|(text, mode, first_wildcard_pos)| Pattern { text, @@ -61,6 +71,8 @@ impl Pattern { /// `basename_start_pos` is the index at which the `path`'s basename starts. /// /// Lastly, `case` folding can be configured as well. + /// + /// Note that this method uses shortcuts to accelerate simple patterns. pub fn matches_repo_relative_path<'a>( &self, path: impl Into<&'a BStr>, @@ -113,6 +125,12 @@ impl Pattern { } } + /// See if `value` matches this pattern in the given `mode`. + /// + /// `mode` can identify `value` as path which won't match the slash character, and can match + /// strings with cases ignored as well. Note that the case folding performed here is ASCII only. + /// + /// Note that this method uses some shortcuts to accelerate simple patterns. pub fn matches<'a>(&self, value: impl Into<&'a BStr>, mode: wildmatch::Mode) -> bool { let value = value.into(); match self.first_wildcard_pos { diff --git a/git-glob/src/wildmatch.rs b/git-glob/src/wildmatch.rs index ddfe574cc8e..9fbd5e9b613 100644 --- a/git-glob/src/wildmatch.rs +++ b/git-glob/src/wildmatch.rs @@ -1,5 +1,6 @@ use bitflags::bitflags; bitflags! { + /// The match mode employed in [`Pattern::matches()`][crate::Pattern::matches()]. #[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] pub struct Mode: u8 { /// Let globs not match the slash `/` literal. @@ -334,6 +335,9 @@ pub(crate) mod function { t.next().map(|_| NoMatch).unwrap_or(Match) } + /// Employ pattern matching to see if `value` matches `pattern`. + /// + /// `mode` can be used to adjust the way the matching is performed. pub fn wildmatch(pattern: &BStr, value: &BStr, mode: Mode) -> bool { match_recursive(pattern, value, mode) == Result::Match } diff --git a/git-repository/Cargo.toml b/git-repository/Cargo.toml index 18183327434..5d00ef8dba0 100644 --- a/git-repository/Cargo.toml +++ b/git-repository/Cargo.toml @@ -49,7 +49,7 @@ max-performance = ["git-features/parallel", "git-features/zlib-ng-compat", "git- local-time-support = ["git-actor/local-time-support"] ## Re-export stability tier 2 crates for convenience and make `Repository` struct fields with types from these crates publicly accessible. ## Doing so is less stable than the stability tier 1 that `git-repository` is a member of. -unstable = ["git-index", "git-worktree", "git-mailmap"] +unstable = ["git-index", "git-worktree", "git-mailmap", "git-glob"] ## Print debugging information about usage of object database caches, useful for tuning cache sizes. cache-efficiency-debug = ["git-features/cache-efficiency-debug"] @@ -78,6 +78,7 @@ git-mailmap = { version = "^0.1.0", path = "../git-mailmap", optional = true } git-features = { version = "^0.20.0", path = "../git-features", features = ["progress"] } # unstable only +git-glob = { version = "^0.1.0", path = "../git-glob", optional = true } git-index = { version = "^0.2.0", path = "../git-index", optional = true } git-worktree = { version = "^0.1.0", path = "../git-worktree", optional = true } diff --git a/git-repository/src/lib.rs b/git-repository/src/lib.rs index 31e44598e0e..54c29e8260e 100644 --- a/git-repository/src/lib.rs +++ b/git-repository/src/lib.rs @@ -88,6 +88,7 @@ //! * [`actor`] //! * [`bstr`][bstr] //! * [`index`] +//! * [`glob`] //! * [`worktree`] //! * [`mailmap`] //! * [`objs`] @@ -126,6 +127,8 @@ pub use git_diff as diff; use git_features::threading::OwnShared; #[cfg(feature = "unstable")] pub use git_features::{parallel, progress, progress::Progress, threading}; +#[cfg(all(feature = "unstable", feature = "git-glob"))] +pub use git_glob as glob; pub use git_hash as hash; #[doc(inline)] #[cfg(all(feature = "unstable", feature = "git-index"))]