Skip to content

Commit

Permalink
globset: permit ** to appear anywhere
Browse files Browse the repository at this point in the history
Previously, `man gitignore` specified that `**` was invalid unless it
was used in one of a few specific circumstances, i.e., `**`, `a/**`,
`**/b` or `a/**/b`. That is, `**` always had to be surrounded by either
a path separator or the beginning/end of the pattern.

It turns out that git itself has treated `**` outside the above contexts
as valid for quite a while, so there was an inconsistency between the
spec `man gitignore` and the implementation, and it wasn't clear which
was actually correct.

@okdana filed a bug against git[1] and got this fixed. The spec was wrong,
which has now been fixed [2] and updated[2].

This commit brings ripgrep in line with git and treats `**` outside of
the above contexts as two consecutive `*` patterns. We deprecate the
`InvalidRecursive` error since it is no longer used.

Fixes #373, Fixes #1098

[1] - https://public-inbox.org/git/C16A9F17-0375-42F9-90A9-A92C9F3D8BBA@dana.is
[2] - git/git@627186d
[3] - https://git-scm.com/docs/gitignore
  • Loading branch information
BurntSushi committed Jan 24, 2019
1 parent 0a16702 commit 9c940b4
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 25 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ Feature enhancements:

Bug fixes:

* [BUG #373](https://github.com/BurntSushi/ripgrep/issues/373),
[BUG #1098](https://github.com/BurntSushi/ripgrep/issues/1098):
`**` is now accepted as valid syntax anywhere in a glob.
* [BUG #1106](https://github.com/BurntSushi/ripgrep/issues/1106):
`--files-with-matches` and `--files-without-match` work with one file.
* [BUG #1093](https://github.com/BurntSushi/ripgrep/pull/1093):
Expand Down
49 changes: 31 additions & 18 deletions globset/src/glob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -837,40 +837,49 @@ impl<'a> Parser<'a> {

fn parse_star(&mut self) -> Result<(), Error> {
let prev = self.prev;
if self.chars.peek() != Some(&'*') {
if self.peek() != Some('*') {
self.push_token(Token::ZeroOrMore)?;
return Ok(());
}
assert!(self.bump() == Some('*'));
if !self.have_tokens()? {
self.push_token(Token::RecursivePrefix)?;
let next = self.bump();
if !next.map(is_separator).unwrap_or(true) {
return Err(self.error(ErrorKind::InvalidRecursive));
if !self.peek().map_or(true, is_separator) {
self.push_token(Token::ZeroOrMore)?;
self.push_token(Token::ZeroOrMore)?;
} else {
self.push_token(Token::RecursivePrefix)?;
assert!(self.bump().map_or(true, is_separator));
}
return Ok(());
}

if !prev.map(is_separator).unwrap_or(false) {
if self.stack.len() <= 1
|| (prev != Some(',') && prev != Some('{')) {
return Err(self.error(ErrorKind::InvalidRecursive));
|| (prev != Some(',') && prev != Some('{'))
{
self.push_token(Token::ZeroOrMore)?;
self.push_token(Token::ZeroOrMore)?;
return Ok(());
}
}
let is_suffix =
match self.chars.peek() {
match self.peek() {
None => {
assert!(self.bump().is_none());
true
}
Some(&',') | Some(&'}') if self.stack.len() >= 2 => {
Some(',') | Some('}') if self.stack.len() >= 2 => {
true
}
Some(&c) if is_separator(c) => {
Some(c) if is_separator(c) => {
assert!(self.bump().map(is_separator).unwrap_or(false));
false
}
_ => return Err(self.error(ErrorKind::InvalidRecursive)),
_ => {
self.push_token(Token::ZeroOrMore)?;
self.push_token(Token::ZeroOrMore)?;
return Ok(());
}
};
match self.pop_token()? {
Token::RecursivePrefix => {
Expand Down Expand Up @@ -976,6 +985,10 @@ impl<'a> Parser<'a> {
self.cur = self.chars.next();
self.cur
}

fn peek(&mut self) -> Option<char> {
self.chars.peek().map(|&ch| ch)
}
}

#[cfg(test)]
Expand Down Expand Up @@ -1161,13 +1174,6 @@ mod tests {
syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);

syntaxerr!(err_rseq1, "a**", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq2, "**a", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq3, "a**b", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq4, "***", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq5, "/a**", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq6, "/**a", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq7, "/a**b", ErrorKind::InvalidRecursive);
syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
Expand Down Expand Up @@ -1228,6 +1234,13 @@ mod tests {
toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
toregex!(re28, "a**", r"^a.*.*$");
toregex!(re29, "**a", r"^.*.*a$");
toregex!(re30, "a**b", r"^a.*.*b$");
toregex!(re31, "***", r"^.*.*.*$");
toregex!(re32, "/a**", r"^/a.*.*$");
toregex!(re33, "/**a", r"^/.*.*a$");
toregex!(re34, "/a**b", r"^/a.*.*b$");

matches!(match1, "a", "a");
matches!(match2, "a*b", "a_b");
Expand Down
9 changes: 7 additions & 2 deletions globset/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,8 +143,13 @@ pub struct Error {
/// The kind of error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ErrorKind {
/// Occurs when a use of `**` is invalid. Namely, `**` can only appear
/// adjacent to a path separator, or the beginning/end of a glob.
/// **DEPRECATED**.
///
/// This error used to occur for consistency with git's glob specification,
/// but the specification now accepts all uses of `**`. When `**` does not
/// appear adjacent to a path separator or at the beginning/end of a glob,
/// it is now treated as two consecutive `*` patterns. As such, this error
/// is no longer used.
InvalidRecursive,
/// Occurs when a character class (e.g., `[abc]`) is not closed.
UnclosedClass,
Expand Down
10 changes: 5 additions & 5 deletions ignore/src/dir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,7 @@ mod tests {
#[test]
fn errored() {
let td = tmpdir("ignore-test-");
wfile(td.path().join(".gitignore"), "f**oo");
wfile(td.path().join(".gitignore"), "{foo");

let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
assert!(err.is_some());
Expand All @@ -878,8 +878,8 @@ mod tests {
#[test]
fn errored_both() {
let td = tmpdir("ignore-test-");
wfile(td.path().join(".gitignore"), "f**oo");
wfile(td.path().join(".ignore"), "fo**o");
wfile(td.path().join(".gitignore"), "{foo");
wfile(td.path().join(".ignore"), "{bar");

let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
assert_eq!(2, partial(err.expect("an error")).len());
Expand All @@ -889,7 +889,7 @@ mod tests {
fn errored_partial() {
let td = tmpdir("ignore-test-");
mkdirp(td.path().join(".git"));
wfile(td.path().join(".gitignore"), "f**oo\nbar");
wfile(td.path().join(".gitignore"), "{foo\nbar");

let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
assert!(err.is_some());
Expand All @@ -899,7 +899,7 @@ mod tests {
#[test]
fn errored_partial_and_ignore() {
let td = tmpdir("ignore-test-");
wfile(td.path().join(".gitignore"), "f**oo\nbar");
wfile(td.path().join(".gitignore"), "{foo\nbar");
wfile(td.path().join(".ignore"), "!bar");

let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
Expand Down
8 changes: 8 additions & 0 deletions tests/regression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,14 @@ rgtest!(r1064, |dir: Dir, mut cmd: TestCommand| {
eqnice!("input:abc\n", cmd.arg("a(.*c)").stdout());
});

// See: https://github.com/BurntSushi/ripgrep/issues/1174
rgtest!(r1098, |dir: Dir, mut cmd: TestCommand| {
dir.create_dir(".git");
dir.create(".gitignore", "a**b");
dir.create("afoob", "test");
cmd.arg("test").assert_err();
});

// See: https://github.com/BurntSushi/ripgrep/issues/1130
rgtest!(r1130, |dir: Dir, mut cmd: TestCommand| {
dir.create("foo", "test");
Expand Down

0 comments on commit 9c940b4

Please sign in to comment.