Skip to content

Commit

Permalink
Merge branch 'basic-filtering'
Browse files Browse the repository at this point in the history
  • Loading branch information
Byron committed Jun 26, 2023
2 parents 3c8e3c1 + 1517cbc commit 3fd5e16
Show file tree
Hide file tree
Showing 18 changed files with 1,606 additions and 15 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions gix-filter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,9 @@ rust-version = "1.65"
doctest = false

[dependencies]
gix-hash = { version = "^0.11.3", path = "../gix-hash" }
gix-trace = { version = "^0.1.1", path = "../gix-trace" }
gix-object = { version = "^0.31.0", path = "../gix-object" }

bstr = { version = "1.5.0", default-features = false, features = ["std"] }
thiserror = "1.0.38"
156 changes: 156 additions & 0 deletions gix-filter/src/eol/convert_to_git.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
use std::path::{Path, PathBuf};

/// Additional context for use with [`convert_to_git`][super::convert_to_git()].
#[derive(Default, Copy, Clone)]
pub struct Context<'a> {
/// How to perform round-trip checks.
pub round_trip_check: Option<RoundTripCheck<'a>>,
/// Configuration related to EOL.
pub config: crate::eol::Configuration,
}

/// The kind of round-trip check to perform when converting line endings to `git`, i.e. `CRLF` to `LF`.
#[derive(Debug, Copy, Clone)]
pub enum RoundTripCheck<'a> {
/// Fail with an error if conversion isn't round-trip safe.
Fail {
/// The repository-relative path of the file to check. Used in case of error.
rela_path: &'a Path,
},
/// Emit a warning using `gix_trace::warn!`, but don't fail.
///
/// Note that the parent application has to setup tracing to make these events visible, along with a parent `span!`.
Warn {
/// The repository-relative path of the file to check. Used in case of error.
rela_path: &'a Path,
},
}

/// The error returned by [convert_to_git()][super::convert_to_git()].
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("{msg} in '{}'", path.display())]
RoundTrip { msg: &'static str, path: PathBuf },
#[error("Could not obtain index object to check line endings for")]
FetchObjectFromIndex(#[source] Box<dyn std::error::Error + Send + Sync + 'static>),
}

pub(crate) mod function {
use crate::clear_and_set_capacity;
use crate::eol::convert_to_git::{Context, Error, RoundTripCheck};
use crate::eol::{AttributesDigest, Stats};
use bstr::ByteSlice;

/// Given a `src` buffer, change it `git` (`\n`) line endings and store the result in `buf`.
/// Return `true` if `buf` was written or `false` if nothing had to be done.
/// `action` is used to determine if ultimately a conversion should be done or not.
/// When `action` takes certain values, `index_object` is called to write the version of `src` as stored in the index
/// into the buffer and if it is a blob, or return `Ok(None)` if no such object exists.
/// If renormalization is desired, let it return `Ok(None)` at all times to not let it have any influence over the
/// outcome of this function.
/// If `round_trip_check` is not `None`, round-tripping will be validated and handled accordingly.
pub fn convert_to_git<E>(
src: &[u8],
digest: AttributesDigest,
buf: &mut Vec<u8>,
index_object: impl FnOnce(&mut Vec<u8>) -> Result<Option<()>, E>,
Context {
round_trip_check,
config,
}: Context<'_>,
) -> Result<bool, Error>
where
E: std::error::Error + Send + Sync + 'static,
{
if digest == AttributesDigest::Binary || src.is_empty() {
return Ok(false);
}

let stats = Stats::from_bytes(src);
let mut convert_crlf_to_lf = stats.crlf > 0;
if digest.is_auto_text() {
// In this mode, we are supposed to figure out ourselves if we should convert or not.
if stats.is_binary() {
return Ok(false);
}

if let Some(()) = index_object(buf).map_err(|err| Error::FetchObjectFromIndex(Box::new(err)))? {
let has_crlf_in_index = buf
.find_byte(b'\r')
.map(|_| Stats::from_bytes(buf))
.filter(|s| !s.is_binary() && s.crlf > 0)
.is_some();
if has_crlf_in_index {
convert_crlf_to_lf = false;
}
}
}

if let Some(round_trip_check) = round_trip_check {
let mut new_stats = stats;
// simulate to-git conversion/git-add
if convert_crlf_to_lf {
new_stats.lone_lf += new_stats.crlf;
new_stats.crlf = 0;
}
// simulate worktree checkout
if new_stats.will_convert_lf_to_crlf(digest, config) {
new_stats.crlf += new_stats.lone_lf;
new_stats.lone_lf = 0;
}
if stats.crlf > 0 && new_stats.crlf == 0 {
// CRLF would not be restored by checkout
match round_trip_check {
RoundTripCheck::Fail { rela_path } => {
return Err(Error::RoundTrip {
msg: "CRLF would be replaced by LF",
path: rela_path.to_owned(),
})
}
#[allow(unused_variables)]
RoundTripCheck::Warn { rela_path } => {
gix_trace::warn!(
"in the working copy of '{}', CRLF will be replaced by LF next time git touches it",
rela_path.display()
)
}
}
} else if stats.lone_lf > 0 && new_stats.lone_lf == 0 {
// CRLF would be added by checkout
match round_trip_check {
RoundTripCheck::Fail { rela_path } => {
return Err(Error::RoundTrip {
msg: "LF would be replaced by CRLF",
path: rela_path.to_owned(),
})
}
#[allow(unused_variables)]
RoundTripCheck::Warn { rela_path } => {
gix_trace::warn!(
"in the working copy of '{}', LF will be replaced by CRLF next time git touches it",
rela_path.display()
)
}
}
}
}

if !convert_crlf_to_lf {
return Ok(false);
}

clear_and_set_capacity(buf, src.len() - stats.crlf);
if stats.lone_cr == 0 {
buf.extend(src.iter().filter(|b| **b != b'\r'));
} else {
let mut bytes = src.iter().peekable();
while let Some(b) = bytes.next() {
if !(*b == b'\r' && bytes.peek() == Some(&&b'\n')) {
buf.push(*b);
}
}
}
Ok(true)
}
}
40 changes: 40 additions & 0 deletions gix-filter/src/eol/convert_to_worktree.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
use crate::clear_and_set_capacity;
use crate::eol::{AttributesDigest, Configuration, Mode, Stats};
use bstr::{ByteSlice, ByteVec};

/// Convert all `\n` in `src` to `crlf` if `digest` and `config` indicate it, returning `true` if `buf` holds the result, or `false`
/// if no change was made after all.
pub fn convert_to_worktree(src: &[u8], digest: AttributesDigest, buf: &mut Vec<u8>, config: Configuration) -> bool {
if src.is_empty() || digest.to_eol(config) != Some(Mode::CrLf) {
return false;
}
let stats = Stats::from_bytes(src);
if !stats.will_convert_lf_to_crlf(digest, config) {
return false;
}

clear_and_set_capacity(buf, src.len() + stats.lone_lf);

let mut ofs = 0;
while let Some(pos) = src[ofs..].find_byteset(b"\r\n") {
match src[ofs + pos] {
b'\r' => {
if src.get(ofs + pos + 1) == Some(&b'\n') {
buf.push_str(&src[ofs..][..pos + 2]);
ofs += pos + 2;
} else {
buf.push_str(&src[ofs..][..pos + 1]);
ofs += pos + 1;
}
}
b'\n' => {
buf.push_str(&src[ofs..][..pos]);
buf.push_str(b"\r\n");
ofs += pos + 1;
}
_ => unreachable!("would only find one of two possible values"),
}
}
buf.push_str(&src[ofs..]);
true
}
64 changes: 64 additions & 0 deletions gix-filter/src/eol/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
///
pub mod convert_to_git;
pub use convert_to_git::function::convert_to_git;

mod convert_to_worktree;
pub use convert_to_worktree::convert_to_worktree;

mod utils;

/// The kind of end of lines to set.
///
/// The default is implemented to be the native line ending for the current platform.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Mode {
/// Equivalent to `git` (`\n`) line-endings.
Lf,
/// Equivalent to `windows` (`\r\n`) line-endings.
CrLf,
}

/// The combination of `crlf`, `text` and `eol` attributes into one neat package.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum AttributesDigest {
/// Equivalent to the `-text` attribute.
Binary,
/// Equivalent to the `text` attribute.
Text,
/// Equivalent to the `text eol=lf` attributes.
TextInput,
/// Equivalent to the `text eol=crlf` attributes.
TextCrlf,
/// Equivalent to the `text=auto` attributes.
TextAuto,
/// Equivalent to the `text=auto eol=crlf` attributes.
TextAutoCrlf,
/// Equivalent to the `text=auto eol=lf` attributes.
TextAutoInput,
}

/// Git Configuration that affects how CRLF conversions are applied.
#[derive(Default, Debug, Copy, Clone)]
pub struct Configuration {
/// Corresponds to `core.autocrlf` and is `None` for `input`, `Some(true)` if `true` or `Some(false)` if `false`.
pub auto_crlf: Option<bool>,
/// Corresponds to `core.eol`, and is `None` if unset or set to `native`, or `Some(<mode>)` respectively.
pub eol: Option<Mode>,
}

/// Statistics about a buffer that helps to safely perform EOL conversions
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Stats {
/// The amount of null bytes.
pub null: usize,
/// The amount of lone carriage returns (`\r`).
pub lone_cr: usize,
/// The amount of lone line feeds (`\n`).
pub lone_lf: usize,
/// The amount carriage returns followed by line feeds
pub crlf: usize,
/// The estimate of printable characters.
pub printable: usize,
/// The estimate of characters that can't be printed.
pub non_printable: usize,
}
Loading

0 comments on commit 3fd5e16

Please sign in to comment.