-
-
Notifications
You must be signed in to change notification settings - Fork 311
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add git-glob crate with pattern matching parsing from git-attributes:…
…:ignore (#298)
- Loading branch information
Showing
11 changed files
with
278 additions
and
8 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Changelog | ||
|
||
All notable changes to this project will be documented in this file. | ||
|
||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), | ||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
[package] | ||
name = "git-glob" | ||
version = "0.1.0" | ||
repository = "https://github.com/Byron/gitoxide" | ||
license = "MIT/Apache-2.0" | ||
description = "A WIP crate of the gitoxide project dealing with pattern matching" | ||
authors = ["Sebastian Thiel <sebastian.thiel@icloud.com>"] | ||
edition = "2018" | ||
|
||
[lib] | ||
doctest = false | ||
|
||
[features] | ||
## Data structures implement `serde::Serialize` and `serde::Deserialize`. | ||
serde1 = ["serde", "bstr/serde1"] | ||
|
||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
|
||
[dependencies] | ||
git-quote = { version = "^0.2.0", path = "../git-quote" } | ||
|
||
bstr = { version = "0.2.13", default-features = false, features = ["std"]} | ||
bitflags = "1.3.2" | ||
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} | ||
|
||
[dev-dependencies] | ||
git-testtools = { path = "../tests/tools"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#![forbid(unsafe_code)] | ||
#![deny(rust_2018_idioms)] | ||
|
||
pub mod pattern { | ||
use bitflags::bitflags; | ||
|
||
bitflags! { | ||
#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))] | ||
pub struct Mode: u32 { | ||
/// The pattern does not contain a sub-directory and - it doesn't contain slashes after removing the trailing one. | ||
const NO_SUB_DIR = 1 << 0; | ||
/// A pattern that is '*literal', meaning that it ends with what's given here | ||
const ENDS_WITH = 1 << 1; | ||
/// The pattern must match a directory, and not a file. | ||
const MUST_BE_DIR = 1 << 2; | ||
const NEGATIVE = 1 << 3; | ||
} | ||
} | ||
} | ||
|
||
mod parse; | ||
pub use parse::parse_line as parse; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
use crate::pattern; | ||
use crate::pattern::Mode; | ||
use bstr::{BString, ByteSlice}; | ||
|
||
#[inline] | ||
pub fn parse_line(mut line: &[u8]) -> Option<(BString, pattern::Mode)> { | ||
let mut mode = Mode::empty(); | ||
if line.is_empty() { | ||
return None; | ||
}; | ||
if line.first() == Some(&b'!') { | ||
mode |= Mode::NEGATIVE; | ||
line = &line[1..]; | ||
} else if line.first() == Some(&b'\\') { | ||
let second = line.get(1); | ||
if second == Some(&b'!') || second == Some(&b'#') { | ||
line = &line[1..]; | ||
} | ||
} | ||
if line.iter().all(|b| b.is_ascii_whitespace()) { | ||
return None; | ||
} | ||
let mut line = truncate_non_escaped_trailing_spaces(line); | ||
if line.last() == Some(&b'/') { | ||
mode |= Mode::MUST_BE_DIR; | ||
line.pop(); | ||
} | ||
if !line.contains(&b'/') { | ||
mode |= Mode::NO_SUB_DIR; | ||
} | ||
if line.first() == Some(&b'*') && line[1..].find_byteset(br"*?[\").is_none() { | ||
mode |= Mode::ENDS_WITH; | ||
} | ||
Some((line, mode)) | ||
} | ||
|
||
/// We always copy just because that's ultimately needed anyway, not because we always have to. | ||
fn truncate_non_escaped_trailing_spaces(buf: &[u8]) -> BString { | ||
match buf.rfind_not_byteset(br"\ ") { | ||
Some(pos) if pos + 1 == buf.len() => buf.into(), // does not end in (escaped) whitespace | ||
None => buf.into(), | ||
Some(start_of_non_space) => { | ||
// This seems a bit strange but attempts to recreate the git implementation while | ||
// actually removing the escape characters before spaces. We leave other backslashes | ||
// for escapes to be handled by `glob/globset`. | ||
let mut res: BString = buf[..start_of_non_space + 1].into(); | ||
|
||
let mut trailing_bytes = buf[start_of_non_space + 1..].iter(); | ||
let mut bare_spaces = 0; | ||
while let Some(b) = trailing_bytes.next() { | ||
match b { | ||
b' ' => { | ||
bare_spaces += 1; | ||
} | ||
b'\\' => { | ||
res.extend(std::iter::repeat(b' ').take(bare_spaces)); | ||
bare_spaces = 0; | ||
// Skip what follows, like git does, but keep spaces if possible. | ||
if trailing_bytes.next() == Some(&b' ') { | ||
res.push(b' '); | ||
} | ||
} | ||
_ => unreachable!("BUG: this must be either backslash or space"), | ||
} | ||
} | ||
res | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
mod parse; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
use git_glob::pattern::Mode; | ||
|
||
#[test] | ||
fn mark_ends_with_pattern_specifically() { | ||
assert_eq!( | ||
git_glob::parse(br"*literal"), | ||
Some((r"*literal".into(), Mode::NO_SUB_DIR | Mode::ENDS_WITH)) | ||
); | ||
assert_eq!( | ||
git_glob::parse(br"**literal"), | ||
Some((r"**literal".into(), Mode::NO_SUB_DIR)), | ||
"double-asterisk won't allow for fast comparisons" | ||
); | ||
assert_eq!( | ||
git_glob::parse(br"*litera[l]"), | ||
Some((r"*litera[l]".into(), Mode::NO_SUB_DIR)) | ||
); | ||
assert_eq!( | ||
git_glob::parse(br"*litera?"), | ||
Some((r"*litera?".into(), Mode::NO_SUB_DIR)) | ||
); | ||
assert_eq!( | ||
git_glob::parse(br"*litera\?"), | ||
Some((r"*litera\?".into(), Mode::NO_SUB_DIR)), | ||
"for now we don't handle escapes properly like git seems to do" | ||
); | ||
} | ||
|
||
#[test] | ||
fn whitespace_only_is_ignored() { | ||
assert!(git_glob::parse(b"\n\r\n\t\t \n").is_none()); | ||
} | ||
|
||
#[test] | ||
fn hash_symbols_are_not_special() { | ||
assert_eq!( | ||
git_glob::parse(b"# hello world"), | ||
Some(("# hello world".into(), Mode::NO_SUB_DIR)) | ||
); | ||
} | ||
|
||
#[test] | ||
fn backslashes_before_hashes_are_no_comments() { | ||
assert_eq!(git_glob::parse(br"\#hello"), Some((r"#hello".into(), Mode::NO_SUB_DIR))); | ||
} | ||
|
||
#[test] | ||
fn backslashes_are_part_of_the_pattern_if_not_in_specific_positions() { | ||
assert_eq!( | ||
git_glob::parse(br"\hello\world"), | ||
Some((r"\hello\world".into(), Mode::NO_SUB_DIR)) | ||
); | ||
} | ||
|
||
#[test] | ||
fn leading_exclamation_mark_negates_pattern() { | ||
assert_eq!( | ||
git_glob::parse(b"!hello"), | ||
Some(("hello".into(), Mode::NEGATIVE | Mode::NO_SUB_DIR)) | ||
); | ||
} | ||
|
||
#[test] | ||
fn leading_exclamation_marks_can_be_escaped_with_backslash() { | ||
assert_eq!(git_glob::parse(br"\!hello"), Some(("!hello".into(), Mode::NO_SUB_DIR))); | ||
} | ||
|
||
#[test] | ||
fn absence_of_sub_directories_are_marked() { | ||
assert_eq!(git_glob::parse(br"a/b"), Some(("a/b".into(), Mode::empty()))); | ||
assert_eq!(git_glob::parse(br"ab"), Some(("ab".into(), Mode::NO_SUB_DIR))); | ||
} | ||
|
||
#[test] | ||
fn trailing_slashes_are_marked_and_removed() { | ||
assert_eq!( | ||
git_glob::parse(b"dir/"), | ||
Some(("dir".into(), Mode::MUST_BE_DIR | Mode::NO_SUB_DIR)) | ||
); | ||
assert_eq!( | ||
git_glob::parse(b"dir///"), | ||
Some(("dir//".into(), Mode::MUST_BE_DIR)), | ||
"but only the last slash is removed" | ||
); | ||
} | ||
|
||
#[test] | ||
fn trailing_spaces_are_ignored() { | ||
assert_eq!(git_glob::parse(br"a "), Some(("a".into(), Mode::NO_SUB_DIR))); | ||
assert_eq!( | ||
git_glob::parse(b"a\t\t "), | ||
Some(("a\t\t".into(), Mode::NO_SUB_DIR)), | ||
"trailing tabs are not ignored" | ||
); | ||
} | ||
|
||
#[test] | ||
fn trailing_spaces_can_be_escaped_to_be_literal() { | ||
assert_eq!( | ||
git_glob::parse(br"a \ "), | ||
Some(("a ".into(), Mode::NO_SUB_DIR)), | ||
"a single escape in front of the last desired space is enough" | ||
); | ||
assert_eq!( | ||
git_glob::parse(br"a b c "), | ||
Some(("a b c".into(), Mode::NO_SUB_DIR)), | ||
"spaces in the middle are fine" | ||
); | ||
assert_eq!( | ||
git_glob::parse(br"a\ \ \ "), | ||
Some(("a ".into(), Mode::NO_SUB_DIR)), | ||
"one can also escape every single one" | ||
); | ||
assert_eq!( | ||
git_glob::parse(br"a \ "), | ||
Some(("a ".into(), Mode::NO_SUB_DIR)), | ||
"or just the one in the middle, losing the last actual space" | ||
); | ||
assert_eq!( | ||
git_glob::parse(br"a \"), | ||
Some(("a ".into(), Mode::NO_SUB_DIR)), | ||
"escaping nothing also works as a whitespace protection" | ||
); | ||
assert_eq!( | ||
git_glob::parse(br"a \\\ "), | ||
Some((r"a ".into(), Mode::NO_SUB_DIR)), | ||
"strange things like these work too" | ||
); | ||
assert_eq!( | ||
git_glob::parse(br"a \\ "), | ||
Some((r"a ".into(), Mode::NO_SUB_DIR)), | ||
"strange things like these work as well" | ||
); | ||
} |