Skip to content

Commit

Permalink
Add all tests imaginable for branch name sanitization
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Aug 9, 2024
1 parent 8077f3e commit 0401758
Show file tree
Hide file tree
Showing 4 changed files with 378 additions and 25 deletions.
67 changes: 57 additions & 10 deletions gix-validate/src/reference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,35 +27,82 @@ pub mod name {
}

use bstr::BStr;
use std::borrow::Cow;

/// Validate a reference name running all the tests in the book. This disallows lower-case references like `lower`, but also allows
/// ones like `HEAD`, and `refs/lower`.
pub fn name(path: &BStr) -> Result<&BStr, name::Error> {
validate(path, Mode::Complete)
match validate(path, Mode::Complete)? {
Cow::Borrowed(inner) => Ok(inner),
Cow::Owned(_) => {
unreachable!("Without sanitization, there is no chance a sanitized version is returned.")
}
}
}

/// Validate a partial reference name. As it is assumed to be partial, names like `some-name` is allowed
/// even though these would be disallowed with when using [`name()`].
pub fn name_partial(path: &BStr) -> Result<&BStr, name::Error> {
validate(path, Mode::Partial)
match validate(path, Mode::Partial)? {
Cow::Borrowed(inner) => Ok(inner),
Cow::Owned(_) => {
unreachable!("Without sanitization, there is no chance a sanitized version is returned.")
}
}
}

/// The infallible version of [`name_partial()`] which instead of failing, alters `path` and returns it to be a valid
/// partial name, which would also pass [`name_partial()`].
///
/// Note that an empty `path` is replaced with a `-` in order to be valid.
pub fn name_partial_or_sanitize(path: &BStr) -> Cow<'_, BStr> {
validate(path, Mode::PartialSanitize).expect("BUG: errors cannot happen as any issue is fixed instantly")
}

enum Mode {
Complete,
Partial,
/// like Partial, but instead of failing, a sanitized version is returned.
PartialSanitize,
}

fn validate(path: &BStr, mode: Mode) -> Result<&BStr, name::Error> {
crate::tag::name(path)?;
if path[0] == b'/' {
return Err(name::Error::StartsWithSlash);
fn validate(path: &BStr, mode: Mode) -> Result<Cow<'_, BStr>, name::Error> {
let mut out = crate::tag::name_inner(
path,
match mode {
Mode::Complete | Mode::Partial => crate::tag::Mode::Validate,
Mode::PartialSanitize => crate::tag::Mode::Sanitize,
},
)?;
let sanitize = matches!(mode, Mode::PartialSanitize);
if path.get(0) == Some(&b'/') {
if sanitize {
out.to_mut()[0] = b'-';
} else {
return Err(name::Error::StartsWithSlash);
}
}
let mut previous = 0;
let mut saw_slash = false;
for byte in path.iter() {
let mut out_ofs = 0;
for (mut byte_pos, byte) in path.iter().enumerate() {
byte_pos -= out_ofs;
match *byte {
b'/' if previous == b'/' => return Err(name::Error::RepeatedSlash),
b'.' if previous == b'/' => return Err(name::Error::StartsWithDot),
b'/' if previous == b'/' => {
if sanitize {
out.to_mut().remove(byte_pos);
out_ofs += 1;
} else {
return Err(name::Error::RepeatedSlash);
}
}
b'.' if previous == b'/' => {
if sanitize {
out.to_mut()[byte_pos] = b'-';
} else {
return Err(name::Error::StartsWithDot);
}
}
_ => {}
}

Expand All @@ -70,5 +117,5 @@ fn validate(path: &BStr, mode: Mode) -> Result<&BStr, name::Error> {
return Err(name::Error::SomeLowercase);
}
}
Ok(path)
Ok(out)
}
103 changes: 89 additions & 14 deletions gix-validate/src/tag.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use bstr::BStr;
use bstr::{BStr, ByteSlice};
use std::borrow::Cow;

///
#[allow(clippy::empty_docs)]
Expand Down Expand Up @@ -33,36 +34,110 @@ pub mod name {
/// Assure the given `input` resemble a valid git tag name, which is returned unchanged on success.
/// Tag names are provided as names, lik` v1.0` or `alpha-1`, without paths.
pub fn name(input: &BStr) -> Result<&BStr, name::Error> {
match name_inner(input, Mode::Validate)? {
Cow::Borrowed(inner) => Ok(inner),
Cow::Owned(_) => {
unreachable!("When validating, the input isn't changed")
}
}
}

#[derive(Eq, PartialEq)]
pub(crate) enum Mode {
Sanitize,
Validate,
}

pub(crate) fn name_inner(input: &BStr, mode: Mode) -> Result<Cow<'_, BStr>, name::Error> {
let mut out = Cow::Borrowed(input);
let sanitize = matches!(mode, Mode::Sanitize);
if input.is_empty() {
return Err(name::Error::Empty);
return if sanitize {
out.to_mut().push(b'-');
Ok(out)
} else {
Err(name::Error::Empty)
};
}
if *input.last().expect("non-empty") == b'/' {
return Err(name::Error::EndsWithSlash);
if sanitize {
while out.last() == Some(&b'/') {
out.to_mut().pop();
}
let bytes_from_end = out.to_mut().as_bytes_mut().iter_mut().rev();
for b in bytes_from_end.take_while(|b| **b == b'/') {
*b = b'-';
}
} else {
return Err(name::Error::EndsWithSlash);
}
}

let mut previous = 0;
for byte in input.iter() {
let mut out_ofs = 0;
for (mut byte_pos, byte) in input.iter().enumerate() {
byte_pos -= out_ofs;
match byte {
b'\\' | b'^' | b':' | b'[' | b'?' | b' ' | b'~' | b'\0'..=b'\x1F' | b'\x7F' => {
return Err(name::Error::InvalidByte {
byte: (&[*byte][..]).into(),
})
if sanitize {
out.to_mut()[byte_pos] = b'-';
} else {
return Err(name::Error::InvalidByte {
byte: (&[*byte][..]).into(),
});
}
}
b'*' => {
if sanitize {
out.to_mut()[byte_pos] = b'-';
} else {
return Err(name::Error::Asterisk);
}
}

b'.' if previous == b'.' => {
if sanitize {
out.to_mut().remove(byte_pos);
out_ofs += 1;
} else {
return Err(name::Error::DoubleDot);
}
}
b'{' if previous == b'@' => {
if sanitize {
out.to_mut()[byte_pos] = b'-';
} else {
return Err(name::Error::ReflogPortion);
}
}
b'*' => return Err(name::Error::Asterisk),
b'.' if previous == b'.' => return Err(name::Error::DoubleDot),
b'{' if previous == b'@' => return Err(name::Error::ReflogPortion),
_ => {}
}
previous = *byte;
}
if input[0] == b'.' {
return Err(name::Error::StartsWithDot);
if sanitize {
out.to_mut()[0] = b'-';
} else {
return Err(name::Error::StartsWithDot);
}
}
if input[input.len() - 1] == b'.' {
return Err(name::Error::EndsWithDot);
if sanitize {
let last = out.len() - 1;
out.to_mut()[last] = b'-';
} else {
return Err(name::Error::EndsWithDot);
}
}
if input.ends_with(b".lock") {
return Err(name::Error::LockFileSuffix);
if sanitize {
while out.ends_with(b".lock") {
let len_without_suffix = out.len() - b".lock".len();
out.to_mut().truncate(len_without_suffix);
}
} else {
return Err(name::Error::LockFileSuffix);
}
}
Ok(input)
Ok(out)
}
Loading

0 comments on commit 0401758

Please sign in to comment.