From fa0437899292e9e23db932041220bbeda732c246 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 23 Jun 2023 15:03:45 +0200 Subject: [PATCH 1/8] Add some configuration variables as planned due to relevancy with filters --- src/plumbing/progress.rs | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/plumbing/progress.rs b/src/plumbing/progress.rs index 668f3121429..5484ec2bfee 100644 --- a/src/plumbing/progress.rs +++ b/src/plumbing/progress.rs @@ -102,7 +102,19 @@ impl Tabled for Record { static GIT_CONFIG: &[Record] = &[ Record { config: "core.safeCRLF", - usage: Planned { note: Some("safety is not optional") }, + usage: Planned { note: Some("safety is not optional (but we will respect the setting)") }, + }, + Record { + config: "core.autocrlf", + usage: Planned { note: Some("for filtering system") }, + }, + Record { + config: "core.eol", + usage: Planned {note: Some("needed for filters, but also for doing diffs correctly")} + }, + Record { + config: "core.checkRoundtripEncoding", + usage: Planned { note: Some("needed once working-tree-encoding attributes are supported") } }, Record { config: "core.hideDotFiles", @@ -124,10 +136,6 @@ static GIT_CONFIG: &[Record] = &[ config: "core.alternateRefsPrefixes", usage: NotPlanned { reason: "seems like a niche feature, but can be implemented if there is demand" } }, - Record { - config: "core.checkRoundtripEncoding", - usage: Planned { note: Some("needed once working-tree-encoding attributes are supported") } - }, Record { config: "core.bigFileThreshold", usage: Planned { note: Some("unfortunately we can't stream packed files yet, even if not delta-compressed, but respecting the threshold for other operations is definitely a must") } @@ -156,6 +164,10 @@ static GIT_CONFIG: &[Record] = &[ config: "core.sparseCheckoutCone", usage: Planned { note: Some("this is a nice improvement over spareCheckout alone and should one day be available too") }, }, + Record { + config: "core.gitProxy", + usage: NotPlanned { reason: "the transport mechanism works differently enough to not support it for now, but of course it's possible to add support if there is demand" }, + }, Record { config: "checkout.defaultRemote", usage: Planned { note: Some("needed for correct checkout behaviour, similar to what git does") }, @@ -230,10 +242,6 @@ static GIT_CONFIG: &[Record] = &[ reason: "no plan to implement format-patch or request-pull summary" }, }, - Record { - config: "core.eol", - usage: Planned {note: Some("needed for filters, but also for doing diffs correctly")} - }, Record { config: "core.fsync", usage: Planned {note: Some("more safety for disk write operations is a good thing, definitely on the server")} From 496445ca97687a38ecb80e871a1cbdc7ecd6b313 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 23 Jun 2023 18:32:20 +0200 Subject: [PATCH 2/8] feat: `ident::undo()` to replace `$Id: XXX$` with `$Id$` --- Cargo.lock | 3 ++ gix-filter/Cargo.toml | 1 + gix-filter/src/lib.rs | 47 +++++++++++++++++++++++++++- gix-filter/tests/filter.rs | 1 + gix-filter/tests/ident/mod.rs | 58 +++++++++++++++++++++++++++++++++++ 5 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 gix-filter/tests/filter.rs create mode 100644 gix-filter/tests/ident/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 966fd09d86a..a383fef9d51 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1648,6 +1648,9 @@ version = "0.0.0" [[package]] name = "gix-filter" version = "0.0.0" +dependencies = [ + "bstr", +] [[package]] name = "gix-fs" diff --git a/gix-filter/Cargo.toml b/gix-filter/Cargo.toml index 5423c02be6c..8644837d98f 100644 --- a/gix-filter/Cargo.toml +++ b/gix-filter/Cargo.toml @@ -12,3 +12,4 @@ rust-version = "1.65" doctest = false [dependencies] +bstr = { version = "1.5.0", default-features = false, features = ["std"] } diff --git a/gix-filter/src/lib.rs b/gix-filter/src/lib.rs index 3a6cd994a53..8d163242114 100644 --- a/gix-filter/src/lib.rs +++ b/gix-filter/src/lib.rs @@ -1,2 +1,47 @@ -#![deny(rust_2018_idioms)] +//! A library for implementing everything needed to deal with git filter pipelines. +//! +//! Generally, multiple filters are applied in a row forming a pipeline, with each filter being a stage in that pipeline. +//! This pipeline is pre-determined with each stage being configurable. +//! +//! The transformation on an input buffer goes in two ways: either a filter is applied, or its effects are undone. Differentiating +//! between these states is important to avoid comparing unfiltered buffers with filtered ones, for example. +//! +//! This crate implements the building blocks in terms of applying and undoing filters, along with logic to decide whether +//! or not to apply such a filter. +#![deny(rust_2018_idioms, missing_docs)] #![forbid(unsafe_code)] + +/// +pub mod ident { + use bstr::{BStr, ByteSlice, ByteVec}; + use std::borrow::Cow; + use std::ops::Range; + + /// Undo identifiers like `$Id:$` to `$Id$`. Newlines between dollars are ignored. + pub fn undo(mut input: Cow<'_, BStr>) -> Cow<'_, BStr> { + fn find_range(input: &[u8]) -> Option> { + let mut ofs = 0; + loop { + let mut cursor = input.get(ofs..)?; + let start = cursor.find(b"$Id:")?; + cursor = cursor.get((start + 4)..)?; + let maybe_end = cursor.find_byteset(b"$\n")?; + if cursor[maybe_end] == b'\n' { + ofs += start + 4 + maybe_end + 1; + continue; + } else { + return Some((ofs + start)..(ofs + start + 4 + maybe_end + 1)); + } + } + } + + let mut ofs = 0; + while let Some(range) = find_range(&input[ofs..]) { + input + .to_mut() + .replace_range((range.start + ofs)..(range.end + ofs), b"$Id$"); + ofs += range.start + 4; + } + input + } +} diff --git a/gix-filter/tests/filter.rs b/gix-filter/tests/filter.rs new file mode 100644 index 00000000000..ec251fad9c8 --- /dev/null +++ b/gix-filter/tests/filter.rs @@ -0,0 +1 @@ +mod ident; diff --git a/gix-filter/tests/ident/mod.rs b/gix-filter/tests/ident/mod.rs new file mode 100644 index 00000000000..3019625f503 --- /dev/null +++ b/gix-filter/tests/ident/mod.rs @@ -0,0 +1,58 @@ +use bstr::BStr; +use std::borrow::Cow; + +fn cowstr(input: &str) -> Cow<'_, BStr> { + Cow::Borrowed(input.into()) +} +mod undo { + use crate::ident::cowstr; + use std::borrow::Cow; + + #[test] + fn no_id_changes_nothing() { + let cow = gix_filter::ident::undo(cowstr("hello")); + assert!(matches!(cow, Cow::Borrowed(_)), "the buffer is not touched"); + assert_eq!(cow.as_ref(), "hello"); + } + + #[test] + fn empty() { + assert!(matches!(gix_filter::ident::undo(cowstr("")), Cow::Borrowed(_))); + } + + #[test] + fn nothing_if_newline_between_dollars() { + assert!(matches!(gix_filter::ident::undo(cowstr(" $Id: \n$")), Cow::Borrowed(_))); + } + + #[test] + fn nothing_if_it_is_not_id() { + assert!( + matches!(gix_filter::ident::undo(cowstr(" $id: something$")), Cow::Borrowed(_)), + "it's matching case-sensitively" + ); + } + + #[test] + fn anything_between_dollar_id_dollar() { + assert_eq!( + gix_filter::ident::undo(cowstr(" $Id: something$\nhello")).as_ref(), + " $Id$\nhello" + ); + } + + #[test] + fn multiple() { + assert_eq!( + gix_filter::ident::undo(cowstr( + "$Id: a\n$ $Id: something$\nhello$Id: hex$\nlast $Id:other$\n$Id: \n$" + )) + .as_ref(), + "$Id: a\n$ $Id$\nhello$Id$\nlast $Id$\n$Id: \n$", + ); + assert_eq!( + gix_filter::ident::undo(cowstr("$Id: a\n$$Id:$$Id: hex$\n$Id:other$$Id: $end")).as_ref(), + "$Id: a\n$$Id$$Id$\n$Id$$Id$end", + ); + } +} From 306c8eabcffe80da1d627283c4b188a1b979f692 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 24 Jun 2023 15:48:19 +0200 Subject: [PATCH 3/8] feat: add `ident::apply()` to substitute `$Id$` with `$Id: $` --- Cargo.lock | 2 ++ gix-filter/Cargo.toml | 3 +++ gix-filter/src/lib.rs | 26 +++++++++++++++++++ gix-filter/tests/ident/mod.rs | 49 +++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index a383fef9d51..ac41bcb2ad0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1650,6 +1650,8 @@ name = "gix-filter" version = "0.0.0" dependencies = [ "bstr", + "gix-hash 0.11.3", + "gix-object 0.31.0", ] [[package]] diff --git a/gix-filter/Cargo.toml b/gix-filter/Cargo.toml index 8644837d98f..cbac569ceec 100644 --- a/gix-filter/Cargo.toml +++ b/gix-filter/Cargo.toml @@ -12,4 +12,7 @@ rust-version = "1.65" doctest = false [dependencies] +gix-hash = { version = "^0.11.3", path = "../gix-hash" } +gix-object = { version = "^0.31.0", path = "../gix-object" } + bstr = { version = "1.5.0", default-features = false, features = ["std"] } diff --git a/gix-filter/src/lib.rs b/gix-filter/src/lib.rs index 8d163242114..80b603d03ad 100644 --- a/gix-filter/src/lib.rs +++ b/gix-filter/src/lib.rs @@ -44,4 +44,30 @@ pub mod ident { } input } + + /// Substitute all occurrences of `$Id$` with `$Id: $` if present and return the changed buffer, with `object_hash` + /// being used accordingly. + /// + /// ### Deviation + /// + /// `Git` also tries to cleanup 'stray' substituted `$Id: $`, but we don't do that, sticking exactly to what ought to be done. + /// The respective code is up to 16 years old and one might assume that `git` by now handles checking and checkout filters correctly. + pub fn apply(mut input: Cow<'_, BStr>, object_hash: gix_hash::Kind) -> Cow<'_, BStr> { + let mut buf: [u8; b": $".len() + gix_hash::Kind::longest().len_in_hex()] = std::array::from_fn(|_| 0); + let mut id = None; + let mut ofs = 0; + while let Some(pos) = input[ofs..].find(b"$Id$") { + let id = id.get_or_insert_with(|| gix_object::compute_hash(object_hash, gix_object::Kind::Blob, &input)); + + buf[..2].copy_from_slice(b": "); + let _ = id.hex_to_buf(&mut buf[2..][..object_hash.len_in_hex()]); + let replaced_id = &mut buf[..2 + object_hash.len_in_hex() + 1]; + *replaced_id.last_mut().expect("present") = b'$'; + input + .to_mut() + .replace_range((ofs + pos + 3)..(ofs + pos + 4), &*replaced_id); + ofs += pos + 3 + replaced_id.len(); + } + input + } } diff --git a/gix-filter/tests/ident/mod.rs b/gix-filter/tests/ident/mod.rs index 3019625f503..58727bbbc43 100644 --- a/gix-filter/tests/ident/mod.rs +++ b/gix-filter/tests/ident/mod.rs @@ -56,3 +56,52 @@ mod undo { ); } } + +mod apply { + use crate::ident::cowstr; + use gix_filter::ident; + use std::borrow::Cow; + + #[test] + fn no_change() { + for input_no_match in [ + "", + "nothing", + "$ID$ case sensitive matching", + "$Id: expanded is ignored$", + ] { + let res = ident::apply(cowstr(input_no_match), gix_hash::Kind::Sha1); + assert!( + matches!(res, Cow::Borrowed(_)), + "no substitution happens, so no mutable version of the Cow is created" + ); + assert_eq!(res.as_ref(), input_no_match, "there definitely is no change"); + } + } + + #[test] + fn simple() { + assert_eq!( + ident::apply(cowstr("$Id$"), gix_hash::Kind::Sha1).as_ref(), + "$Id: b3f5ebfb5843bc43ceecff6d4f26bb37c615beb1$" + ); + + assert_eq!( + ident::apply(cowstr("$Id$ $Id$"), gix_hash::Kind::Sha1).as_ref(), + "$Id: f6f3176060328ef7030a8b8eeda57fbf0587b2f9$ $Id: f6f3176060328ef7030a8b8eeda57fbf0587b2f9$" + ); + } + + #[test] + fn round_trips() { + for input in [ + "hi\n$Id$\nho\n\t$Id$$Id$$Id$", + "$Id$", + "$Id$ and one more $Id$ and done", + ] { + let res = ident::apply(cowstr(input), gix_hash::Kind::Sha1); + assert_ne!(res.as_ref(), input, "the input was rewritten"); + assert_eq!(ident::undo(res).as_ref(), input, "the filter can be undone perfectly"); + } + } +} From 9bb9c48e0c935179885b774cd685bcaf1008c043 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 25 Jun 2023 10:38:12 +0200 Subject: [PATCH 4/8] refactor --- gix-filter/src/ident.rs | 57 +++++++++++++++++++++++++++++++++++++++ gix-filter/src/lib.rs | 60 +---------------------------------------- 2 files changed, 58 insertions(+), 59 deletions(-) create mode 100644 gix-filter/src/ident.rs diff --git a/gix-filter/src/ident.rs b/gix-filter/src/ident.rs new file mode 100644 index 00000000000..611824a2943 --- /dev/null +++ b/gix-filter/src/ident.rs @@ -0,0 +1,57 @@ +use bstr::{BStr, ByteSlice, ByteVec}; +use std::borrow::Cow; +use std::ops::Range; + +/// Undo identifiers like `$Id:$` to `$Id$`. Newlines between dollars are ignored. +pub fn undo(mut input: Cow<'_, BStr>) -> Cow<'_, BStr> { + fn find_range(input: &[u8]) -> Option> { + let mut ofs = 0; + loop { + let mut cursor = input.get(ofs..)?; + let start = cursor.find(b"$Id:")?; + cursor = cursor.get((start + 4)..)?; + let maybe_end = cursor.find_byteset(b"$\n")?; + if cursor[maybe_end] == b'\n' { + ofs += start + 4 + maybe_end + 1; + continue; + } else { + return Some((ofs + start)..(ofs + start + 4 + maybe_end + 1)); + } + } + } + + let mut ofs = 0; + while let Some(range) = find_range(&input[ofs..]) { + input + .to_mut() + .replace_range((range.start + ofs)..(range.end + ofs), b"$Id$"); + ofs += range.start + 4; + } + input +} + +/// Substitute all occurrences of `$Id$` with `$Id: $` if present and return the changed buffer, with `object_hash` +/// being used accordingly. +/// +/// ### Deviation +/// +/// `Git` also tries to cleanup 'stray' substituted `$Id: $`, but we don't do that, sticking exactly to what ought to be done. +/// The respective code is up to 16 years old and one might assume that `git` by now handles checking and checkout filters correctly. +pub fn apply(mut input: Cow<'_, BStr>, object_hash: gix_hash::Kind) -> Cow<'_, BStr> { + let mut buf: [u8; b": $".len() + gix_hash::Kind::longest().len_in_hex()] = std::array::from_fn(|_| 0); + let mut id = None; + let mut ofs = 0; + while let Some(pos) = input[ofs..].find(b"$Id$") { + let id = id.get_or_insert_with(|| gix_object::compute_hash(object_hash, gix_object::Kind::Blob, &input)); + + buf[..2].copy_from_slice(b": "); + let _ = id.hex_to_buf(&mut buf[2..][..object_hash.len_in_hex()]); + let replaced_id = &mut buf[..2 + object_hash.len_in_hex() + 1]; + *replaced_id.last_mut().expect("present") = b'$'; + input + .to_mut() + .replace_range((ofs + pos + 3)..(ofs + pos + 4), &*replaced_id); + ofs += pos + 3 + replaced_id.len(); + } + input +} diff --git a/gix-filter/src/lib.rs b/gix-filter/src/lib.rs index 80b603d03ad..04b27e31d14 100644 --- a/gix-filter/src/lib.rs +++ b/gix-filter/src/lib.rs @@ -12,62 +12,4 @@ #![forbid(unsafe_code)] /// -pub mod ident { - use bstr::{BStr, ByteSlice, ByteVec}; - use std::borrow::Cow; - use std::ops::Range; - - /// Undo identifiers like `$Id:$` to `$Id$`. Newlines between dollars are ignored. - pub fn undo(mut input: Cow<'_, BStr>) -> Cow<'_, BStr> { - fn find_range(input: &[u8]) -> Option> { - let mut ofs = 0; - loop { - let mut cursor = input.get(ofs..)?; - let start = cursor.find(b"$Id:")?; - cursor = cursor.get((start + 4)..)?; - let maybe_end = cursor.find_byteset(b"$\n")?; - if cursor[maybe_end] == b'\n' { - ofs += start + 4 + maybe_end + 1; - continue; - } else { - return Some((ofs + start)..(ofs + start + 4 + maybe_end + 1)); - } - } - } - - let mut ofs = 0; - while let Some(range) = find_range(&input[ofs..]) { - input - .to_mut() - .replace_range((range.start + ofs)..(range.end + ofs), b"$Id$"); - ofs += range.start + 4; - } - input - } - - /// Substitute all occurrences of `$Id$` with `$Id: $` if present and return the changed buffer, with `object_hash` - /// being used accordingly. - /// - /// ### Deviation - /// - /// `Git` also tries to cleanup 'stray' substituted `$Id: $`, but we don't do that, sticking exactly to what ought to be done. - /// The respective code is up to 16 years old and one might assume that `git` by now handles checking and checkout filters correctly. - pub fn apply(mut input: Cow<'_, BStr>, object_hash: gix_hash::Kind) -> Cow<'_, BStr> { - let mut buf: [u8; b": $".len() + gix_hash::Kind::longest().len_in_hex()] = std::array::from_fn(|_| 0); - let mut id = None; - let mut ofs = 0; - while let Some(pos) = input[ofs..].find(b"$Id$") { - let id = id.get_or_insert_with(|| gix_object::compute_hash(object_hash, gix_object::Kind::Blob, &input)); - - buf[..2].copy_from_slice(b": "); - let _ = id.hex_to_buf(&mut buf[2..][..object_hash.len_in_hex()]); - let replaced_id = &mut buf[..2 + object_hash.len_in_hex() + 1]; - *replaced_id.last_mut().expect("present") = b'$'; - input - .to_mut() - .replace_range((ofs + pos + 3)..(ofs + pos + 4), &*replaced_id); - ofs += pos + 3 + replaced_id.len(); - } - input - } -} +pub mod ident; From b79ffeb9ed584c47f2609eea261e1ada557a744c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sun, 25 Jun 2023 11:33:42 +0200 Subject: [PATCH 5/8] feat: `eol::Stats::from_bytes()` to obtain stats about a buffer. It can help to determine if it is binary and if line conversions should be performed at all. --- Cargo.lock | 1 + gix-filter/Cargo.toml | 1 + gix-filter/src/lib.rs | 184 ++++++++++++++++++++++++++++++++++++ gix-filter/tests/eol/mod.rs | 117 +++++++++++++++++++++++ gix-filter/tests/filter.rs | 3 + 5 files changed, 306 insertions(+) create mode 100644 gix-filter/tests/eol/mod.rs diff --git a/Cargo.lock b/Cargo.lock index ac41bcb2ad0..f032dd76652 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1652,6 +1652,7 @@ dependencies = [ "bstr", "gix-hash 0.11.3", "gix-object 0.31.0", + "thiserror", ] [[package]] diff --git a/gix-filter/Cargo.toml b/gix-filter/Cargo.toml index cbac569ceec..a348fa1da3c 100644 --- a/gix-filter/Cargo.toml +++ b/gix-filter/Cargo.toml @@ -16,3 +16,4 @@ gix-hash = { version = "^0.11.3", path = "../gix-hash" } gix-object = { version = "^0.31.0", path = "../gix-object" } bstr = { version = "1.5.0", default-features = false, features = ["std"] } +thiserror = "1.0.38" diff --git a/gix-filter/src/lib.rs b/gix-filter/src/lib.rs index 04b27e31d14..764022ec873 100644 --- a/gix-filter/src/lib.rs +++ b/gix-filter/src/lib.rs @@ -13,3 +13,187 @@ /// pub mod ident; + +/// utilities related to handling line endings in buffers +pub mod eol { + use crate::clear_and_set_capacity; + use bstr::ByteSlice; + + /// The combination of `crlf`, `text` and `eol` attributes into one neat package. + #[derive(Debug, Copy, Clone, Eq, PartialEq)] + pub enum AttributesDigest { + /// Equivalent to the `-text` attribute. + Binary, + /// Equivalent to the `text` attribute. + Text, + /// Equivalent to the `text eol=lf` attributes. + TextInput, + /// Equivalent to the `text eol=crlf` attributes. + TextCrlf, + /// Equivalent to the `text=auto` attributes. + TextAuto, + /// Equivalent to the `text=auto eol=crlf` attributes. + TextAutoCrlf, + /// Equivalent to the `text=auto eol=lf` attributes. + TextAutoInput, + } + + /// + pub mod convert_to_git { + /// The error returned by [convert_to_git()][super::convert_to_git()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("{msg}")] + RoundTrip { msg: &'static str }, + #[error("Could not obtain index object to check line endings for")] + FetchObjectFromIndex(#[source] Box), + } + } + + /// Given a `src` buffer, change it `git` (`\n`) line endings and store the result in `buf`. + /// Return `true` if `buf` was written or `false` if nothing had to be done. + /// `action` is used to determine if ultimately a conversion should be done or not. + /// When `action` takes certain values, `index_object` is called to write the version of `src` as stored in the index + /// into the buffer and if it is a blob, or return `Ok(None)` if no such object exists. + /// If renormalization is desired, let it return `Ok(None)` at all times to not let it have any influence over the + /// outcome of this function. + pub fn convert_to_git( + src: &[u8], + action: AttributesDigest, + buf: &mut Vec, + index_object: impl FnOnce(&mut Vec) -> Result, E>, + ) -> Result + where + E: std::error::Error + Send + Sync + 'static, + { + if action == AttributesDigest::Binary || src.is_empty() { + return Ok(false); + } + + let stats = Stats::from_bytes(src); + let mut convert_crlf_to_lf = stats.crlf > 0; + if matches!( + action, + AttributesDigest::TextAuto | AttributesDigest::TextAutoCrlf | AttributesDigest::TextAutoInput + ) { + // In this mode, we are supposed to figure out ourselves if we should convert or not. + if stats.is_binary() { + return Ok(false); + } + + if let Some(()) = + index_object(buf).map_err(|err| convert_to_git::Error::FetchObjectFromIndex(Box::new(err)))? + { + let has_crlf_in_index = buf + .find_byte(b'\r') + .map(|_| Stats::from_bytes(buf)) + .filter(|s| !s.is_binary() && s.crlf > 0) + .is_some(); + if has_crlf_in_index { + convert_crlf_to_lf = false; + } + } + } + + Ok(if convert_crlf_to_lf { + clear_and_set_capacity(buf, src.len() - stats.crlf); + if stats.lone_cr == 0 { + buf.extend(src.iter().filter(|b| **b != b'\r')); + } else { + let mut bytes = src.iter().peekable(); + while let Some(b) = bytes.next() { + if !(*b == b'\r' && bytes.peek() == Some(&&b'\n')) { + buf.push(*b); + } + } + } + true + } else { + false + }) + } + + /// Statistics about a buffer that helps to safely perform EOL conversions + #[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub struct Stats { + /// The amount of null bytes. + pub null: usize, + /// The amount of lone carriage returns (`\r`). + pub lone_cr: usize, + /// The amount of lone line feeds (`\n`). + pub lone_lf: usize, + /// The amount carriage returns followed by line feeds + pub crlf: usize, + /// The estimate of printable characters. + pub printable: usize, + /// The estimate of characters that can't be printed. + pub non_printable: usize, + } + + impl Stats { + /// Gather statistics from the given `bytes`. + /// + /// Note that the entire buffer will be scanned. + pub fn from_bytes(bytes: &[u8]) -> Self { + let mut bytes = bytes.iter().peekable(); + let mut null = 0; + let mut lone_cr = 0; + let mut lone_lf = 0; + let mut crlf = 0; + let mut printable = 0; + let mut non_printable = 0; + while let Some(b) = bytes.next() { + if *b == b'\r' { + match bytes.peek() { + Some(n) if **n == b'\n' => { + bytes.next(); + crlf += 1 + } + _ => lone_cr += 1, + } + continue; + } + if *b == b'\n' { + lone_lf += 1; + continue; + } + if *b == 127 { + non_printable += 1; + } else if *b < 32 { + match *b { + 8 /* \b */ | b'\t' | 27 /* \033 */ | 12 /* \014 */ => printable += 1, + 0 => { + non_printable += 1; + null += 1; + }, + _ => non_printable += 1, + } + } else { + printable += 1; + } + } + + Self { + null, + lone_cr, + lone_lf, + crlf, + printable, + non_printable, + } + } + + /// Returns `true` if these statistics are typical for a binary file. + pub fn is_binary(&self) -> bool { + self.lone_cr > 0 || self.null > 0 || (self.printable >> 7) < self.non_printable + } + } +} + +fn clear_and_set_capacity(buf: &mut Vec, cap: usize) { + buf.clear(); + if buf.capacity() < cap { + buf.reserve(cap - buf.capacity()); + } +} diff --git a/gix-filter/tests/eol/mod.rs b/gix-filter/tests/eol/mod.rs new file mode 100644 index 00000000000..ba191e391c2 --- /dev/null +++ b/gix-filter/tests/eol/mod.rs @@ -0,0 +1,117 @@ +mod stats { + mod from_bytes { + use gix_filter::eol; + + #[test] + fn all() { + let stats = eol::Stats::from_bytes(b"\n\r\nhi\rho\0\tanother line\nother\r\nmixed"); + assert_eq!( + stats, + eol::Stats { + null: 1, + lone_cr: 1, + lone_lf: 2, + crlf: 2, + printable: 27, + non_printable: 1, + } + ); + assert!(stats.is_binary()); + } + } +} + +mod convert_to_git { + use bstr::{ByteSlice, ByteVec}; + use gix_filter::eol; + use gix_filter::eol::AttributesDigest; + + #[test] + fn with_binary_attribute_is_never_converted() { + let mut buf = Vec::new(); + let changed = eol::convert_to_git(b"hi\r\nho", AttributesDigest::Binary, &mut buf, no_call).expect("no error"); + assert!(!changed, "the user marked it as binary so it's never being touched"); + } + + #[test] + fn no_crlf_means_no_work() -> crate::Result { + let mut buf = Vec::new(); + let changed = eol::convert_to_git(b"hi", AttributesDigest::TextCrlf, &mut buf, no_call).expect("no error"); + assert!(!changed); + + let changed = + eol::convert_to_git(b"hi", AttributesDigest::TextAutoCrlf, &mut buf, no_object_in_index).expect("no error"); + assert!(!changed, "in auto-mode, the object is queried in the index as well."); + Ok(()) + } + + #[test] + fn detected_as_binary() -> crate::Result { + let mut buf = Vec::new(); + let changed = eol::convert_to_git( + b"hi\0zero makes it binary", + AttributesDigest::TextAuto, + &mut buf, + no_call, + ) + .expect("no error"); + assert!( + !changed, + "in auto-mode, we have a heuristic to see if the buffer is binary" + ); + Ok(()) + } + + #[test] + fn fast_conversion_by_stripping_cr() -> crate::Result { + let mut buf = Vec::new(); + let changed = + eol::convert_to_git(b"a\r\nb\r\nc", AttributesDigest::TextCrlf, &mut buf, no_call).expect("no error"); + assert!(changed); + assert_eq!(buf.as_bstr(), "a\nb\nc", "here carriage returns can just be stripped"); + Ok(()) + } + + #[test] + fn slower_conversion_due_to_lone_cr() -> crate::Result { + let mut buf = Vec::new(); + let changed = + eol::convert_to_git(b"\r\ra\r\nb\r\nc", AttributesDigest::TextCrlf, &mut buf, no_call).expect("no error"); + assert!(changed); + assert_eq!( + buf.as_bstr(), + "\r\ra\nb\nc", + "here carriage returns cannot be stripped but must be handled in pairs" + ); + Ok(()) + } + + #[test] + fn crlf_in_index_prevents_conversion_to_lf() -> crate::Result { + let mut buf = Vec::new(); + let mut called = false; + let changed = eol::convert_to_git(b"elligible\n", AttributesDigest::TextAutoInput, &mut buf, |buf| { + called = true; + buf.clear(); + buf.push_str("with CRLF\r\n"); + Ok::<_, std::convert::Infallible>(Some(())) + }) + .expect("no error"); + assert!(called, "in auto mode, the index is queried as well"); + assert!( + !changed, + "we saw the CRLF is present in the index, so it's unsafe to make changes" + ); + Ok(()) + } + + #[allow(clippy::ptr_arg)] + fn no_call(_buf: &mut Vec) -> std::io::Result> { + unreachable!("index function will not be called") + } + + #[allow(clippy::ptr_arg)] + fn no_object_in_index(_buf: &mut Vec) -> std::io::Result> { + Ok(Some(())) + } +} diff --git a/gix-filter/tests/filter.rs b/gix-filter/tests/filter.rs index ec251fad9c8..fffceb2a79f 100644 --- a/gix-filter/tests/filter.rs +++ b/gix-filter/tests/filter.rs @@ -1 +1,4 @@ +mod eol; mod ident; + +pub type Result = std::result::Result>; From d5eba46afeaadb9ff113421bdb43466d67cfeac6 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 26 Jun 2023 09:44:49 +0200 Subject: [PATCH 6/8] feat: Support for events (e.g. warn!()) via macro. Thanks to these, one can now emit warnings which could be visible if the application compiled in support for it. This is a nice alternative compared to having to have a `progress` around. --- gix-trace/src/disabled.rs | 68 +++++ gix-trace/src/enabled.rs | 91 ++++++- gix-trace/src/lib.rs | 530 +++++++++++++++++++++++++++++++++++++- gix-trace/tests/trace.rs | 27 +- 4 files changed, 711 insertions(+), 5 deletions(-) diff --git a/gix-trace/src/disabled.rs b/gix-trace/src/disabled.rs index 3f6946f2e5a..a4a12787cf2 100644 --- a/gix-trace/src/disabled.rs +++ b/gix-trace/src/disabled.rs @@ -35,3 +35,71 @@ macro_rules! span { ) }; } + +/// Create an event with the given level. +#[macro_export] +macro_rules! event { + (target: $target:expr, $lvl:expr, { $($fields:tt)* } )=> ( + {} + ); + (target: $target:expr, $lvl:expr, { $($fields:tt)* }, $($arg:tt)+ ) => ( + $crate::event!( + target: $target, + $lvl, + { message = format_args!($($arg)+), $($fields)* } + ) + ); + (target: $target:expr, $lvl:expr, $($k:ident).+ = $($fields:tt)* ) => ( + $crate::event!(target: $target, $lvl, { $($k).+ = $($fields)* }) + ); + (target: $target:expr, $lvl:expr, $($arg:tt)+ ) => ( + $crate::event!(target: $target, $lvl, { $($arg)+ }) + ); + ( $lvl:expr, { $($fields:tt)* }, $($arg:tt)+ ) => ( + $crate::event!( + target: module_path!(), + $lvl, + { message = format_args!($($arg)+), $($fields)* } + ) + ); + ($lvl:expr, $($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $lvl, + { $($k).+ = $($field)*} + ) + ); + ($lvl:expr, $($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $lvl, + { $($k).+, $($field)*} + ) + ); + ($lvl:expr, ?$($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $lvl, + { ?$($k).+, $($field)*} + ) + ); + ($lvl:expr, %$($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $lvl, + { %$($k).+, $($field)*} + ) + ); + ($lvl:expr, ?$($k:ident).+) => ( + $crate::event!($lvl, ?$($k).+,) + ); + ($lvl:expr, %$($k:ident).+) => ( + $crate::event!($lvl, %$($k).+,) + ); + ($lvl:expr, $($k:ident).+) => ( + $crate::event!($lvl, $($k).+,) + ); + ( $lvl:expr, $($arg:tt)+ ) => ( + $crate::event!(target: module_path!(), $lvl, { $($arg)+ }) + ); +} diff --git a/gix-trace/src/enabled.rs b/gix-trace/src/enabled.rs index 8429ac02d8d..55468401667 100644 --- a/gix-trace/src/enabled.rs +++ b/gix-trace/src/enabled.rs @@ -1,6 +1,6 @@ use tracing_core::{dispatcher::get_default as with_dispatcher, span, span::Id, Dispatch}; // these are used later in macros. -pub use tracing_core::{field, metadata, Metadata}; +pub use tracing_core::{field, metadata, Event, Metadata}; /// An entered span which will exit on drop. #[derive(Clone)] @@ -89,7 +89,6 @@ impl crate::Level { } /// A macro to create a span. -#[cfg(feature = "tracing")] #[macro_export] macro_rules! span { (target: $target:expr, $lvl:expr, $name:expr, $($fields:tt)*) => { @@ -132,6 +131,94 @@ macro_rules! span { }; } +/// Create an event with the given level. +#[macro_export] +macro_rules! event { + (target: $target:expr, $lvl:expr, { $($fields:tt)* } )=> ( + { + static META: $crate::Metadata<'static> = { + $crate::metadata! { + name: concat!( + "event ", + file!(), + ":", + line!() + ), + target: $target, + level: $lvl, + fields: $crate::fieldset!( $($fields)* ), + callsite: &$crate::MetaOnlyCallsite(&META), + kind: $crate::metadata::Kind::EVENT, + } + }; + $crate::Event::dispatch( + &META, + &$crate::valueset!(META.fields(), $($fields)*) + ); + } + ); + (target: $target:expr, $lvl:expr, { $($fields:tt)* }, $($arg:tt)+ ) => ( + $crate::event!( + target: $target, + $lvl, + { message = format_args!($($arg)+), $($fields)* } + ) + ); + (target: $target:expr, $lvl:expr, $($k:ident).+ = $($fields:tt)* ) => ( + $crate::event!(target: $target, $lvl, { $($k).+ = $($fields)* }) + ); + (target: $target:expr, $lvl:expr, $($arg:tt)+ ) => ( + $crate::event!(target: $target, $lvl, { $($arg)+ }) + ); + ( $lvl:expr, { $($fields:tt)* }, $($arg:tt)+ ) => ( + $crate::event!( + target: module_path!(), + $lvl, + { message = format_args!($($arg)+), $($fields)* } + ) + ); + ($lvl:expr, $($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $lvl, + { $($k).+ = $($field)*} + ) + ); + ($lvl:expr, $($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $lvl, + { $($k).+, $($field)*} + ) + ); + ($lvl:expr, ?$($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $lvl, + { ?$($k).+, $($field)*} + ) + ); + ($lvl:expr, %$($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $lvl, + { %$($k).+, $($field)*} + ) + ); + ($lvl:expr, ?$($k:ident).+) => ( + $crate::event!($lvl, ?$($k).+,) + ); + ($lvl:expr, %$($k:ident).+) => ( + $crate::event!($lvl, %$($k).+,) + ); + ($lvl:expr, $($k:ident).+) => ( + $crate::event!($lvl, $($k).+,) + ); + ( $lvl:expr, $($arg:tt)+ ) => ( + $crate::event!(target: module_path!(), $lvl, { $($arg)+ }) + ); +} + // Copied from`tracing`, would be nice to have it in `tracing-core`. #[doc(hidden)] #[macro_export] diff --git a/gix-trace/src/lib.rs b/gix-trace/src/lib.rs index c156a4fd6e5..ab9409534e7 100644 --- a/gix-trace/src/lib.rs +++ b/gix-trace/src/lib.rs @@ -46,13 +46,46 @@ impl Span { #[cfg(feature = "tracing")] #[doc(hidden)] -pub use enabled::{metadata, MetaOnlyCallsite, Metadata}; +pub use enabled::{metadata, Event, MetaOnlyCallsite, Metadata}; #[cfg(not(feature = "tracing"))] mod disabled; #[cfg(not(feature = "tracing"))] pub use disabled::Span; +/// +pub mod event { + #[cfg(feature = "tracing")] + pub use tracing_core::Level; + + /// All available tracing levels for use in `event!()` macro. + #[cfg(not(feature = "tracing"))] + #[repr(usize)] + #[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] + pub enum Level { + /// The "trace" level. + /// + /// Designates very low priority, often extremely verbose, information. + TRACE = 0, + /// The "debug" level. + /// + /// Designates lower priority information. + DEBUG = 1, + /// The "info" level. + /// + /// Designates useful information. + INFO = 2, + /// The "warn" level. + /// + /// Designates hazardous situations. + WARN = 3, + /// The "error" level. + /// + /// Designates very serious errors. + ERROR = 4, + } +} + /// Create a new [coarse][Level::Coarse] span. #[macro_export] macro_rules! coarse { @@ -102,3 +135,498 @@ macro_rules! detail { }; ($name:expr) => {$crate::coarse!($name,)}; } + +/// Emit an error event. +#[macro_export] +macro_rules! error { + (target: $target:expr, { $($field:tt)* }, $($arg:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::ERROR, { $($field)* }, $($arg)*) + ); + (target: $target:expr, $($k:ident).+ $($field:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::ERROR, { $($k).+ $($field)* }) + ); + (target: $target:expr, ?$($k:ident).+ $($field:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::ERROR, { ?$($k).+ $($field)* }) + ); + (target: $target:expr, %$($k:ident).+ $($field:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::ERROR, { %$($k).+ $($field)* }) + ); + (target: $target:expr, $($arg:tt)+ ) => ( + $crate::event!(target: $target, $crate::event::Level::ERROR, {}, $($arg)+) + ); + ({ $($field:tt)+ }, $($arg:tt)+ ) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::ERROR, + { $($field)+ }, + $($arg)+ + ) + ); + ($($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::ERROR, + { $($k).+ = $($field)*} + ) + ); + (?$($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::ERROR, + { ?$($k).+ = $($field)*} + ) + ); + (%$($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::ERROR, + { %$($k).+ = $($field)*} + ) + ); + ($($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::ERROR, + { $($k).+, $($field)*} + ) + ); + (?$($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::ERROR, + { ?$($k).+, $($field)*} + ) + ); + (%$($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::ERROR, + { %$($k).+, $($field)*} + ) + ); + (?$($k:ident).+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::ERROR, + { ?$($k).+ } + ) + ); + (%$($k:ident).+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::ERROR, + { %$($k).+ } + ) + ); + ($($k:ident).+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::ERROR, + { $($k).+ } + ) + ); + ($($arg:tt)+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::ERROR, + {}, + $($arg)+ + ) + ); +} + +/// Emit a warn event. +#[macro_export] +macro_rules! warn { + (target: $target:expr, { $($field:tt)* }, $($arg:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::WARN, { $($field)* }, $($arg)*) + ); + (target: $target:expr, $($k:ident).+ $($field:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::WARN, { $($k).+ $($field)* }) + ); + (target: $target:expr, ?$($k:ident).+ $($field:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::WARN, { ?$($k).+ $($field)* }) + ); + (target: $target:expr, %$($k:ident).+ $($field:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::WARN, { %$($k).+ $($field)* }) + ); + (target: $target:expr, $($arg:tt)+ ) => ( + $crate::event!(target: $target, $crate::event::Level::WARN, {}, $($arg)+) + ); + ({ $($field:tt)+ }, $($arg:tt)+ ) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::WARN, + { $($field)+ }, + $($arg)+ + ) + ); + ($($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::WARN, + { $($k).+ = $($field)*} + ) + ); + (?$($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::WARN, + { ?$($k).+ = $($field)*} + ) + ); + (%$($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::WARN, + { %$($k).+ = $($field)*} + ) + ); + ($($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::WARN, + { $($k).+, $($field)*} + ) + ); + (?$($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::WARN, + { ?$($k).+, $($field)*} + ) + ); + (%$($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::WARN, + { %$($k).+, $($field)*} + ) + ); + (?$($k:ident).+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::WARN, + { ?$($k).+ } + ) + ); + (%$($k:ident).+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::WARN, + { %$($k).+ } + ) + ); + ($($k:ident).+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::WARN, + { $($k).+ } + ) + ); + ($($arg:tt)+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::WARN, + {}, + $($arg)+ + ) + ); +} + +/// Emit an info event. +#[macro_export] +macro_rules! info { + (target: $target:expr, { $($field:tt)* }, $($arg:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::INFO, { $($field)* }, $($arg)*) + ); + (target: $target:expr, $($k:ident).+ $($field:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::INFO, { $($k).+ $($field)* }) + ); + (target: $target:expr, ?$($k:ident).+ $($field:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::INFO, { ?$($k).+ $($field)* }) + ); + (target: $target:expr, %$($k:ident).+ $($field:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::INFO, { %$($k).+ $($field)* }) + ); + (target: $target:expr, $($arg:tt)+ ) => ( + $crate::event!(target: $target, $crate::event::Level::INFO, {}, $($arg)+) + ); + ({ $($field:tt)+ }, $($arg:tt)+ ) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::INFO, + { $($field)+ }, + $($arg)+ + ) + ); + ($($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::INFO, + { $($k).+ = $($field)*} + ) + ); + (?$($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::INFO, + { ?$($k).+ = $($field)*} + ) + ); + (%$($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::INFO, + { %$($k).+ = $($field)*} + ) + ); + ($($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::INFO, + { $($k).+, $($field)*} + ) + ); + (?$($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::INFO, + { ?$($k).+, $($field)*} + ) + ); + (%$($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::INFO, + { %$($k).+, $($field)*} + ) + ); + (?$($k:ident).+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::INFO, + { ?$($k).+ } + ) + ); + (%$($k:ident).+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::INFO, + { %$($k).+ } + ) + ); + ($($k:ident).+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::INFO, + { $($k).+ } + ) + ); + ($($arg:tt)+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::INFO, + {}, + $($arg)+ + ) + ); +} + +/// Emit a debug event. +#[macro_export] +macro_rules! debug { + (target: $target:expr, { $($field:tt)* }, $($arg:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::DEBUG, { $($field)* }, $($arg)*) + ); + (target: $target:expr, $($k:ident).+ $($field:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::DEBUG, { $($k).+ $($field)* }) + ); + (target: $target:expr, ?$($k:ident).+ $($field:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::DEBUG, { ?$($k).+ $($field)* }) + ); + (target: $target:expr, %$($k:ident).+ $($field:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::DEBUG, { %$($k).+ $($field)* }) + ); + (target: $target:expr, $($arg:tt)+ ) => ( + $crate::event!(target: $target, $crate::event::Level::DEBUG, {}, $($arg)+) + ); + ({ $($field:tt)+ }, $($arg:tt)+ ) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::DEBUG, + { $($field)+ }, + $($arg)+ + ) + ); + ($($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::DEBUG, + { $($k).+ = $($field)*} + ) + ); + (?$($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::DEBUG, + { ?$($k).+ = $($field)*} + ) + ); + (%$($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::DEBUG, + { %$($k).+ = $($field)*} + ) + ); + ($($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::DEBUG, + { $($k).+, $($field)*} + ) + ); + (?$($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::DEBUG, + { ?$($k).+, $($field)*} + ) + ); + (%$($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::DEBUG, + { %$($k).+, $($field)*} + ) + ); + (?$($k:ident).+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::DEBUG, + { ?$($k).+ } + ) + ); + (%$($k:ident).+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::DEBUG, + { %$($k).+ } + ) + ); + ($($k:ident).+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::DEBUG, + { $($k).+ } + ) + ); + ($($arg:tt)+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::DEBUG, + {}, + $($arg)+ + ) + ); +} + +/// Emit a trace event. +#[macro_export] +macro_rules! trace { + (target: $target:expr, { $($field:tt)* }, $($arg:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::TRACE, { $($field)* }, $($arg)*) + ); + (target: $target:expr, $($k:ident).+ $($field:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::TRACE, { $($k).+ $($field)* }) + ); + (target: $target:expr, ?$($k:ident).+ $($field:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::TRACE, { ?$($k).+ $($field)* }) + ); + (target: $target:expr, %$($k:ident).+ $($field:tt)* ) => ( + $crate::event!(target: $target, $crate::event::Level::TRACE, { %$($k).+ $($field)* }) + ); + (target: $target:expr, $($arg:tt)+ ) => ( + $crate::event!(target: $target, $crate::event::Level::TRACE, {}, $($arg)+) + ); + ({ $($field:tt)+ }, $($arg:tt)+ ) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::TRACE, + { $($field)+ }, + $($arg)+ + ) + ); + ($($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::TRACE, + { $($k).+ = $($field)*} + ) + ); + (?$($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::TRACE, + { ?$($k).+ = $($field)*} + ) + ); + (%$($k:ident).+ = $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::TRACE, + { %$($k).+ = $($field)*} + ) + ); + ($($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::TRACE, + { $($k).+, $($field)*} + ) + ); + (?$($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::TRACE, + { ?$($k).+, $($field)*} + ) + ); + (%$($k:ident).+, $($field:tt)*) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::TRACE, + { %$($k).+, $($field)*} + ) + ); + (?$($k:ident).+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::TRACE, + { ?$($k).+ } + ) + ); + (%$($k:ident).+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::TRACE, + { %$($k).+ } + ) + ); + ($($k:ident).+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::TRACE, + { $($k).+ } + ) + ); + ($($arg:tt)+) => ( + $crate::event!( + target: module_path!(), + $crate::event::Level::TRACE, + {}, + $($arg)+ + ) + ); +} diff --git a/gix-trace/tests/trace.rs b/gix-trace/tests/trace.rs index 219729cecc9..1559e1374d6 100644 --- a/gix-trace/tests/trace.rs +++ b/gix-trace/tests/trace.rs @@ -1,4 +1,4 @@ -use gix_trace::{coarse, detail, span}; +use gix_trace::{coarse, debug, detail, error, event, info, span, trace, warn}; #[test] fn span() { let _x = span!(gix_trace::Level::Coarse, "hello"); @@ -12,7 +12,30 @@ fn span() { fn coarse() { let _x = coarse!("hello"); coarse!("hello", x = "value", y = 42); - coarse!(target: "other", "hello", x = "value", y = 42); + coarse!(target: "other", "hello", x = "value", y = 42).into_scope(|| { + event!(gix_trace::event::Level::ERROR, "an error"); + event!(gix_trace::event::Level::WARN, "an info: {}", 42); + event!(gix_trace::event::Level::INFO, answer = 42, field = "some"); + #[derive(Debug)] + #[allow(dead_code)] + struct User { + name: &'static str, + email: &'static str, + } + #[allow(unused_variables)] + let user = User { + name: "ferris", + email: "ferris@example.com", + }; + event!(gix_trace::event::Level::DEBUG, user.name, user.email); + event!(gix_trace::event::Level::TRACE, greeting = ?user, display = %user.name); + + error!("hello {}", 42); + warn!("hello {}", 42); + info!("hello {}", 42); + debug!("hello {}", 42); + trace!("hello {}", 42); + }); } #[test] From e45fec9663f87b7ba4162a9517677f6278c20a98 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 26 Jun 2023 10:34:37 +0200 Subject: [PATCH 7/8] feat: Add `eol::convert_to_git()`. This function supports all the logic that git executes to determine if a converion should actually be done. --- Cargo.lock | 1 + gix-filter/Cargo.toml | 1 + gix-filter/src/eol/convert_to_git.rs | 156 ++++++++++++++++++++++++ gix-filter/src/eol/mod.rs | 61 ++++++++++ gix-filter/src/eol/utils.rs | 124 +++++++++++++++++++ gix-filter/src/lib.rs | 176 +-------------------------- gix-filter/tests/eol/mod.rs | 105 +++++++++++++--- 7 files changed, 435 insertions(+), 189 deletions(-) create mode 100644 gix-filter/src/eol/convert_to_git.rs create mode 100644 gix-filter/src/eol/mod.rs create mode 100644 gix-filter/src/eol/utils.rs diff --git a/Cargo.lock b/Cargo.lock index f032dd76652..b46de69fa80 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1652,6 +1652,7 @@ dependencies = [ "bstr", "gix-hash 0.11.3", "gix-object 0.31.0", + "gix-trace 0.1.1", "thiserror", ] diff --git a/gix-filter/Cargo.toml b/gix-filter/Cargo.toml index a348fa1da3c..3e95ead7180 100644 --- a/gix-filter/Cargo.toml +++ b/gix-filter/Cargo.toml @@ -13,6 +13,7 @@ doctest = false [dependencies] gix-hash = { version = "^0.11.3", path = "../gix-hash" } +gix-trace = { version = "^0.1.1", path = "../gix-trace" } gix-object = { version = "^0.31.0", path = "../gix-object" } bstr = { version = "1.5.0", default-features = false, features = ["std"] } diff --git a/gix-filter/src/eol/convert_to_git.rs b/gix-filter/src/eol/convert_to_git.rs new file mode 100644 index 00000000000..62899bf629f --- /dev/null +++ b/gix-filter/src/eol/convert_to_git.rs @@ -0,0 +1,156 @@ +use std::path::{Path, PathBuf}; + +/// Additional context for use with [`convert_to_git`][super::convert_to_git()]. +#[derive(Default, Copy, Clone)] +pub struct Context<'a> { + /// How to perform round-trip checks. + pub round_trip_check: Option>, + /// Configuration related to EOL. + pub config: crate::eol::Configuration, +} + +/// The kind of round-trip check to perform when converting line endings to `git`, i.e. `CRLF` to `LF`. +#[derive(Debug, Copy, Clone)] +pub enum RoundTripCheck<'a> { + /// Fail with an error if conversion isn't round-trip safe. + Fail { + /// The repository-relative path of the file to check. Used in case of error. + rela_path: &'a Path, + }, + /// Emit a warning using `gix_trace::warn!`, but don't fail. + /// + /// Note that the parent application has to setup tracing to make these events visible, along with a parent `span!`. + Warn { + /// The repository-relative path of the file to check. Used in case of error. + rela_path: &'a Path, + }, +} + +/// The error returned by [convert_to_git()][super::convert_to_git()]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("{msg} in '{}'", path.display())] + RoundTrip { msg: &'static str, path: PathBuf }, + #[error("Could not obtain index object to check line endings for")] + FetchObjectFromIndex(#[source] Box), +} + +pub(crate) mod function { + use crate::clear_and_set_capacity; + use crate::eol::convert_to_git::{Context, Error, RoundTripCheck}; + use crate::eol::{AttributesDigest, Stats}; + use bstr::ByteSlice; + + /// Given a `src` buffer, change it `git` (`\n`) line endings and store the result in `buf`. + /// Return `true` if `buf` was written or `false` if nothing had to be done. + /// `action` is used to determine if ultimately a conversion should be done or not. + /// When `action` takes certain values, `index_object` is called to write the version of `src` as stored in the index + /// into the buffer and if it is a blob, or return `Ok(None)` if no such object exists. + /// If renormalization is desired, let it return `Ok(None)` at all times to not let it have any influence over the + /// outcome of this function. + /// If `round_trip_check` is not `None`, round-tripping will be validated and handled accordingly. + pub fn convert_to_git( + src: &[u8], + digest: AttributesDigest, + buf: &mut Vec, + index_object: impl FnOnce(&mut Vec) -> Result, E>, + Context { + round_trip_check, + config, + }: Context<'_>, + ) -> Result + where + E: std::error::Error + Send + Sync + 'static, + { + if digest == AttributesDigest::Binary || src.is_empty() { + return Ok(false); + } + + let stats = Stats::from_bytes(src); + let mut convert_crlf_to_lf = stats.crlf > 0; + if digest.is_auto_text() { + // In this mode, we are supposed to figure out ourselves if we should convert or not. + if stats.is_binary() { + return Ok(false); + } + + if let Some(()) = index_object(buf).map_err(|err| Error::FetchObjectFromIndex(Box::new(err)))? { + let has_crlf_in_index = buf + .find_byte(b'\r') + .map(|_| Stats::from_bytes(buf)) + .filter(|s| !s.is_binary() && s.crlf > 0) + .is_some(); + if has_crlf_in_index { + convert_crlf_to_lf = false; + } + } + } + + if let Some(round_trip_check) = round_trip_check { + let mut new_stats = stats; + // simulate to-git conversion/git-add + if convert_crlf_to_lf { + new_stats.lone_lf += new_stats.crlf; + new_stats.crlf = 0; + } + // simulate worktree checkout + if new_stats.will_convert_lf_to_crlf(digest, config) { + new_stats.crlf += new_stats.lone_lf; + new_stats.lone_lf = 0; + } + if stats.crlf > 0 && new_stats.crlf == 0 { + // CRLF would not be restored by checkout + match round_trip_check { + RoundTripCheck::Fail { rela_path } => { + return Err(Error::RoundTrip { + msg: "CRLF would be replaced by LF", + path: rela_path.to_owned(), + }) + } + #[allow(unused_variables)] + RoundTripCheck::Warn { rela_path } => { + gix_trace::warn!( + "in the working copy of '{}', CRLF will be replaced by LF next time git touches it", + rela_path.display() + ) + } + } + } else if stats.lone_lf > 0 && new_stats.lone_lf == 0 { + // CRLF would be added by checkout + match round_trip_check { + RoundTripCheck::Fail { rela_path } => { + return Err(Error::RoundTrip { + msg: "LF would be replaced by CRLF", + path: rela_path.to_owned(), + }) + } + #[allow(unused_variables)] + RoundTripCheck::Warn { rela_path } => { + gix_trace::warn!( + "in the working copy of '{}', LF will be replaced by CRLF next time git touches it", + rela_path.display() + ) + } + } + } + } + + if !convert_crlf_to_lf { + return Ok(false); + } + + clear_and_set_capacity(buf, src.len() - stats.crlf); + if stats.lone_cr == 0 { + buf.extend(src.iter().filter(|b| **b != b'\r')); + } else { + let mut bytes = src.iter().peekable(); + while let Some(b) = bytes.next() { + if !(*b == b'\r' && bytes.peek() == Some(&&b'\n')) { + buf.push(*b); + } + } + } + Ok(true) + } +} diff --git a/gix-filter/src/eol/mod.rs b/gix-filter/src/eol/mod.rs new file mode 100644 index 00000000000..24247ec69dc --- /dev/null +++ b/gix-filter/src/eol/mod.rs @@ -0,0 +1,61 @@ +/// +pub mod convert_to_git; +pub use convert_to_git::function::convert_to_git; + +mod utils; + +/// The kind of end of lines to set. +/// +/// The default is implemented to be the native line ending for the current platform. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Mode { + /// Equivalent to `git` (`\n`) line-endings. + Lf, + /// Equivalent to `windows` (`\r\n`) line-endings. + CrLf, +} + +/// The combination of `crlf`, `text` and `eol` attributes into one neat package. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum AttributesDigest { + /// Equivalent to the `-text` attribute. + Binary, + /// Equivalent to the `text` attribute. + Text, + /// Equivalent to the `text eol=lf` attributes. + TextInput, + /// Equivalent to the `text eol=crlf` attributes. + TextCrlf, + /// Equivalent to the `text=auto` attributes. + TextAuto, + /// Equivalent to the `text=auto eol=crlf` attributes. + TextAutoCrlf, + /// Equivalent to the `text=auto eol=lf` attributes. + TextAutoInput, +} + +/// Git Configuration that affects how CRLF conversions are applied. +#[derive(Default, Debug, Copy, Clone)] +pub struct Configuration { + /// Corresponds to `core.autocrlf` and is `None` for `input`, `Some(true)` if `true` or `Some(false)` if `false`. + pub auto_crlf: Option, + /// Corresponds to `core.eol`, and is `None` if unset or set to `native`, or `Some()` respectively. + pub eol: Option, +} + +/// Statistics about a buffer that helps to safely perform EOL conversions +#[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct Stats { + /// The amount of null bytes. + pub null: usize, + /// The amount of lone carriage returns (`\r`). + pub lone_cr: usize, + /// The amount of lone line feeds (`\n`). + pub lone_lf: usize, + /// The amount carriage returns followed by line feeds + pub crlf: usize, + /// The estimate of printable characters. + pub printable: usize, + /// The estimate of characters that can't be printed. + pub non_printable: usize, +} diff --git a/gix-filter/src/eol/utils.rs b/gix-filter/src/eol/utils.rs new file mode 100644 index 00000000000..12eed431f3c --- /dev/null +++ b/gix-filter/src/eol/utils.rs @@ -0,0 +1,124 @@ +use crate::eol::{AttributesDigest, Configuration, Mode, Stats}; + +impl Default for Mode { + fn default() -> Self { + if cfg!(windows) { + Mode::CrLf + } else { + Mode::Lf + } + } +} + +impl AttributesDigest { + /// Return the end-of-line mode this digest would require, or `None` if no conversion would be performed. + pub fn to_eol(&self, config: Configuration) -> Option { + Some(match self { + AttributesDigest::Binary => return None, + AttributesDigest::TextInput | AttributesDigest::TextAutoInput => Mode::Lf, + AttributesDigest::TextCrlf | AttributesDigest::TextAutoCrlf => Mode::CrLf, + AttributesDigest::Text | AttributesDigest::TextAuto => config.to_eol(), + }) + } + + /// Return true if this digest allows for auto-determination of CRLF text conversion. + pub fn is_auto_text(&self) -> bool { + matches!( + self, + AttributesDigest::TextAuto | AttributesDigest::TextAutoCrlf | AttributesDigest::TextAutoInput + ) + } +} + +impl Configuration { + /// Return the line-ending mode that is configured here. + pub fn to_eol(&self) -> Mode { + match self.auto_crlf { + Some(true) => Mode::CrLf, + None => Mode::Lf, + Some(false) => self.eol.unwrap_or_default(), + } + } +} + +impl Stats { + /// Gather statistics from the given `bytes`. + /// + /// Note that the entire buffer will be scanned. + pub fn from_bytes(bytes: &[u8]) -> Self { + let mut bytes = bytes.iter().peekable(); + let mut null = 0; + let mut lone_cr = 0; + let mut lone_lf = 0; + let mut crlf = 0; + let mut printable = 0; + let mut non_printable = 0; + while let Some(b) = bytes.next() { + if *b == b'\r' { + match bytes.peek() { + Some(n) if **n == b'\n' => { + bytes.next(); + crlf += 1 + } + _ => lone_cr += 1, + } + continue; + } + if *b == b'\n' { + lone_lf += 1; + continue; + } + if *b == 127 { + non_printable += 1; + } else if *b < 32 { + match *b { + 8 /* \b */ | b'\t' | 27 /* \033 */ | 12 /* \014 */ => printable += 1, + 0 => { + non_printable += 1; + null += 1; + }, + _ => non_printable += 1, + } + } else { + printable += 1; + } + } + + Self { + null, + lone_cr, + lone_lf, + crlf, + printable, + non_printable, + } + } + + /// Returns `true` if these statistics are typical for a binary file. + pub fn is_binary(&self) -> bool { + self.lone_cr > 0 || self.null > 0 || (self.printable >> 7) < self.non_printable + } + + /// Return `true` if we would convert the buffer from which these stats are derived, knowing only the digest + pub fn will_convert_lf_to_crlf(&self, digest: AttributesDigest, config: Configuration) -> bool { + if digest.to_eol(config) != Some(Mode::CrLf) { + return false; + } + + // nothing to do? + if self.lone_lf == 0 { + return false; + } + + if digest.is_auto_text() { + if self.is_binary() { + return false; + } + // Lone `\r` or mixed LF and CRLF isn't safe as it won't round-trip, and in auto-mode we don't touch it. + if self.lone_cr > 0 || self.crlf > 0 { + return false; + } + } + true + } +} diff --git a/gix-filter/src/lib.rs b/gix-filter/src/lib.rs index 764022ec873..b2211625430 100644 --- a/gix-filter/src/lib.rs +++ b/gix-filter/src/lib.rs @@ -15,181 +15,7 @@ pub mod ident; /// utilities related to handling line endings in buffers -pub mod eol { - use crate::clear_and_set_capacity; - use bstr::ByteSlice; - - /// The combination of `crlf`, `text` and `eol` attributes into one neat package. - #[derive(Debug, Copy, Clone, Eq, PartialEq)] - pub enum AttributesDigest { - /// Equivalent to the `-text` attribute. - Binary, - /// Equivalent to the `text` attribute. - Text, - /// Equivalent to the `text eol=lf` attributes. - TextInput, - /// Equivalent to the `text eol=crlf` attributes. - TextCrlf, - /// Equivalent to the `text=auto` attributes. - TextAuto, - /// Equivalent to the `text=auto eol=crlf` attributes. - TextAutoCrlf, - /// Equivalent to the `text=auto eol=lf` attributes. - TextAutoInput, - } - - /// - pub mod convert_to_git { - /// The error returned by [convert_to_git()][super::convert_to_git()]. - #[derive(Debug, thiserror::Error)] - #[allow(missing_docs)] - pub enum Error { - #[error("{msg}")] - RoundTrip { msg: &'static str }, - #[error("Could not obtain index object to check line endings for")] - FetchObjectFromIndex(#[source] Box), - } - } - - /// Given a `src` buffer, change it `git` (`\n`) line endings and store the result in `buf`. - /// Return `true` if `buf` was written or `false` if nothing had to be done. - /// `action` is used to determine if ultimately a conversion should be done or not. - /// When `action` takes certain values, `index_object` is called to write the version of `src` as stored in the index - /// into the buffer and if it is a blob, or return `Ok(None)` if no such object exists. - /// If renormalization is desired, let it return `Ok(None)` at all times to not let it have any influence over the - /// outcome of this function. - pub fn convert_to_git( - src: &[u8], - action: AttributesDigest, - buf: &mut Vec, - index_object: impl FnOnce(&mut Vec) -> Result, E>, - ) -> Result - where - E: std::error::Error + Send + Sync + 'static, - { - if action == AttributesDigest::Binary || src.is_empty() { - return Ok(false); - } - - let stats = Stats::from_bytes(src); - let mut convert_crlf_to_lf = stats.crlf > 0; - if matches!( - action, - AttributesDigest::TextAuto | AttributesDigest::TextAutoCrlf | AttributesDigest::TextAutoInput - ) { - // In this mode, we are supposed to figure out ourselves if we should convert or not. - if stats.is_binary() { - return Ok(false); - } - - if let Some(()) = - index_object(buf).map_err(|err| convert_to_git::Error::FetchObjectFromIndex(Box::new(err)))? - { - let has_crlf_in_index = buf - .find_byte(b'\r') - .map(|_| Stats::from_bytes(buf)) - .filter(|s| !s.is_binary() && s.crlf > 0) - .is_some(); - if has_crlf_in_index { - convert_crlf_to_lf = false; - } - } - } - - Ok(if convert_crlf_to_lf { - clear_and_set_capacity(buf, src.len() - stats.crlf); - if stats.lone_cr == 0 { - buf.extend(src.iter().filter(|b| **b != b'\r')); - } else { - let mut bytes = src.iter().peekable(); - while let Some(b) = bytes.next() { - if !(*b == b'\r' && bytes.peek() == Some(&&b'\n')) { - buf.push(*b); - } - } - } - true - } else { - false - }) - } - - /// Statistics about a buffer that helps to safely perform EOL conversions - #[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] - pub struct Stats { - /// The amount of null bytes. - pub null: usize, - /// The amount of lone carriage returns (`\r`). - pub lone_cr: usize, - /// The amount of lone line feeds (`\n`). - pub lone_lf: usize, - /// The amount carriage returns followed by line feeds - pub crlf: usize, - /// The estimate of printable characters. - pub printable: usize, - /// The estimate of characters that can't be printed. - pub non_printable: usize, - } - - impl Stats { - /// Gather statistics from the given `bytes`. - /// - /// Note that the entire buffer will be scanned. - pub fn from_bytes(bytes: &[u8]) -> Self { - let mut bytes = bytes.iter().peekable(); - let mut null = 0; - let mut lone_cr = 0; - let mut lone_lf = 0; - let mut crlf = 0; - let mut printable = 0; - let mut non_printable = 0; - while let Some(b) = bytes.next() { - if *b == b'\r' { - match bytes.peek() { - Some(n) if **n == b'\n' => { - bytes.next(); - crlf += 1 - } - _ => lone_cr += 1, - } - continue; - } - if *b == b'\n' { - lone_lf += 1; - continue; - } - if *b == 127 { - non_printable += 1; - } else if *b < 32 { - match *b { - 8 /* \b */ | b'\t' | 27 /* \033 */ | 12 /* \014 */ => printable += 1, - 0 => { - non_printable += 1; - null += 1; - }, - _ => non_printable += 1, - } - } else { - printable += 1; - } - } - - Self { - null, - lone_cr, - lone_lf, - crlf, - printable, - non_printable, - } - } - - /// Returns `true` if these statistics are typical for a binary file. - pub fn is_binary(&self) -> bool { - self.lone_cr > 0 || self.null > 0 || (self.printable >> 7) < self.non_printable - } - } -} +pub mod eol; fn clear_and_set_capacity(buf: &mut Vec, cap: usize) { buf.clear(); diff --git a/gix-filter/tests/eol/mod.rs b/gix-filter/tests/eol/mod.rs index ba191e391c2..a408e396830 100644 --- a/gix-filter/tests/eol/mod.rs +++ b/gix-filter/tests/eol/mod.rs @@ -25,22 +25,37 @@ mod convert_to_git { use bstr::{ByteSlice, ByteVec}; use gix_filter::eol; use gix_filter::eol::AttributesDigest; + use std::path::Path; #[test] fn with_binary_attribute_is_never_converted() { let mut buf = Vec::new(); - let changed = eol::convert_to_git(b"hi\r\nho", AttributesDigest::Binary, &mut buf, no_call).expect("no error"); + let changed = eol::convert_to_git( + b"hi\r\nho", + AttributesDigest::Binary, + &mut buf, + no_call, + Default::default(), + ) + .expect("no error"); assert!(!changed, "the user marked it as binary so it's never being touched"); } #[test] fn no_crlf_means_no_work() -> crate::Result { let mut buf = Vec::new(); - let changed = eol::convert_to_git(b"hi", AttributesDigest::TextCrlf, &mut buf, no_call).expect("no error"); + let changed = eol::convert_to_git(b"hi", AttributesDigest::TextCrlf, &mut buf, no_call, Default::default()) + .expect("no error"); assert!(!changed); - let changed = - eol::convert_to_git(b"hi", AttributesDigest::TextAutoCrlf, &mut buf, no_object_in_index).expect("no error"); + let changed = eol::convert_to_git( + b"hi", + AttributesDigest::TextAutoCrlf, + &mut buf, + no_object_in_index, + Default::default(), + ) + .expect("no error"); assert!(!changed, "in auto-mode, the object is queried in the index as well."); Ok(()) } @@ -53,6 +68,7 @@ mod convert_to_git { AttributesDigest::TextAuto, &mut buf, no_call, + Default::default(), ) .expect("no error"); assert!( @@ -65,8 +81,14 @@ mod convert_to_git { #[test] fn fast_conversion_by_stripping_cr() -> crate::Result { let mut buf = Vec::new(); - let changed = - eol::convert_to_git(b"a\r\nb\r\nc", AttributesDigest::TextCrlf, &mut buf, no_call).expect("no error"); + let changed = eol::convert_to_git( + b"a\r\nb\r\nc", + AttributesDigest::TextCrlf, + &mut buf, + no_call, + Default::default(), + ) + .expect("no error"); assert!(changed); assert_eq!(buf.as_bstr(), "a\nb\nc", "here carriage returns can just be stripped"); Ok(()) @@ -75,8 +97,14 @@ mod convert_to_git { #[test] fn slower_conversion_due_to_lone_cr() -> crate::Result { let mut buf = Vec::new(); - let changed = - eol::convert_to_git(b"\r\ra\r\nb\r\nc", AttributesDigest::TextCrlf, &mut buf, no_call).expect("no error"); + let changed = eol::convert_to_git( + b"\r\ra\r\nb\r\nc", + AttributesDigest::TextCrlf, + &mut buf, + no_call, + Default::default(), + ) + .expect("no error"); assert!(changed); assert_eq!( buf.as_bstr(), @@ -90,12 +118,18 @@ mod convert_to_git { fn crlf_in_index_prevents_conversion_to_lf() -> crate::Result { let mut buf = Vec::new(); let mut called = false; - let changed = eol::convert_to_git(b"elligible\n", AttributesDigest::TextAutoInput, &mut buf, |buf| { - called = true; - buf.clear(); - buf.push_str("with CRLF\r\n"); - Ok::<_, std::convert::Infallible>(Some(())) - }) + let changed = eol::convert_to_git( + b"elligible\n", + AttributesDigest::TextAutoInput, + &mut buf, + |buf| { + called = true; + buf.clear(); + buf.push_str("with CRLF\r\n"); + Ok::<_, std::convert::Infallible>(Some(())) + }, + Default::default(), + ) .expect("no error"); assert!(called, "in auto mode, the index is queried as well"); assert!( @@ -105,6 +139,49 @@ mod convert_to_git { Ok(()) } + #[test] + fn round_trip_check() -> crate::Result { + let mut buf = Vec::new(); + for (input, expected) in [ + (&b"lone-nl\nhi\r\nho"[..], "LF would be replaced by CRLF in 'hello.txt'"), + // despite trying, I was unable to get into the other branch + (b"lone-cr\nhi\r\nho", "LF would be replaced by CRLF in 'hello.txt'"), + ] { + let err = eol::convert_to_git( + input, + AttributesDigest::TextCrlf, + &mut buf, + no_call, + eol::convert_to_git::Context { + round_trip_check: Some(gix_filter::eol::convert_to_git::RoundTripCheck::Fail { + rela_path: Path::new("hello.txt"), + }), + config: Default::default(), + }, + ) + .unwrap_err(); + assert_eq!(err.to_string(), expected); + + let changed = eol::convert_to_git( + input, + AttributesDigest::TextCrlf, + &mut buf, + no_call, + eol::convert_to_git::Context { + round_trip_check: Some(gix_filter::eol::convert_to_git::RoundTripCheck::Warn { + rela_path: Path::new("hello.txt"), + }), + config: Default::default(), + }, + )?; + assert!( + changed, + "in warn mode, we will get a result even though it won't round-trip" + ) + } + Ok(()) + } + #[allow(clippy::ptr_arg)] fn no_call(_buf: &mut Vec) -> std::io::Result> { unreachable!("index function will not be called") From 1517cbc42c43b253046b7359c79731771fd7b941 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 26 Jun 2023 15:00:46 +0200 Subject: [PATCH 8/8] feat: add `eol::convert_to_worktree()`. It's the inverse of `eol::convert_to_git()` to re-add CRLF where there were LF only. --- gix-filter/src/eol/convert_to_worktree.rs | 40 +++++ gix-filter/src/eol/mod.rs | 3 + gix-filter/tests/eol/convert_to_git.rs | 169 +++++++++++++++++++ gix-filter/tests/eol/convert_to_worktree.rs | 95 +++++++++++ gix-filter/tests/eol/mod.rs | 173 +------------------- 5 files changed, 309 insertions(+), 171 deletions(-) create mode 100644 gix-filter/src/eol/convert_to_worktree.rs create mode 100644 gix-filter/tests/eol/convert_to_git.rs create mode 100644 gix-filter/tests/eol/convert_to_worktree.rs diff --git a/gix-filter/src/eol/convert_to_worktree.rs b/gix-filter/src/eol/convert_to_worktree.rs new file mode 100644 index 00000000000..9ff49de03f2 --- /dev/null +++ b/gix-filter/src/eol/convert_to_worktree.rs @@ -0,0 +1,40 @@ +use crate::clear_and_set_capacity; +use crate::eol::{AttributesDigest, Configuration, Mode, Stats}; +use bstr::{ByteSlice, ByteVec}; + +/// Convert all `\n` in `src` to `crlf` if `digest` and `config` indicate it, returning `true` if `buf` holds the result, or `false` +/// if no change was made after all. +pub fn convert_to_worktree(src: &[u8], digest: AttributesDigest, buf: &mut Vec, config: Configuration) -> bool { + if src.is_empty() || digest.to_eol(config) != Some(Mode::CrLf) { + return false; + } + let stats = Stats::from_bytes(src); + if !stats.will_convert_lf_to_crlf(digest, config) { + return false; + } + + clear_and_set_capacity(buf, src.len() + stats.lone_lf); + + let mut ofs = 0; + while let Some(pos) = src[ofs..].find_byteset(b"\r\n") { + match src[ofs + pos] { + b'\r' => { + if src.get(ofs + pos + 1) == Some(&b'\n') { + buf.push_str(&src[ofs..][..pos + 2]); + ofs += pos + 2; + } else { + buf.push_str(&src[ofs..][..pos + 1]); + ofs += pos + 1; + } + } + b'\n' => { + buf.push_str(&src[ofs..][..pos]); + buf.push_str(b"\r\n"); + ofs += pos + 1; + } + _ => unreachable!("would only find one of two possible values"), + } + } + buf.push_str(&src[ofs..]); + true +} diff --git a/gix-filter/src/eol/mod.rs b/gix-filter/src/eol/mod.rs index 24247ec69dc..5fda6882615 100644 --- a/gix-filter/src/eol/mod.rs +++ b/gix-filter/src/eol/mod.rs @@ -2,6 +2,9 @@ pub mod convert_to_git; pub use convert_to_git::function::convert_to_git; +mod convert_to_worktree; +pub use convert_to_worktree::convert_to_worktree; + mod utils; /// The kind of end of lines to set. diff --git a/gix-filter/tests/eol/convert_to_git.rs b/gix-filter/tests/eol/convert_to_git.rs new file mode 100644 index 00000000000..a7dd51f92f1 --- /dev/null +++ b/gix-filter/tests/eol/convert_to_git.rs @@ -0,0 +1,169 @@ +use bstr::{ByteSlice, ByteVec}; +use gix_filter::eol; +use gix_filter::eol::AttributesDigest; +use std::path::Path; + +#[test] +fn with_binary_attribute_is_never_converted() { + let mut buf = Vec::new(); + let changed = eol::convert_to_git( + b"hi\r\nho", + AttributesDigest::Binary, + &mut buf, + no_call, + Default::default(), + ) + .expect("no error"); + assert!(!changed, "the user marked it as binary so it's never being touched"); +} + +#[test] +fn no_crlf_means_no_work() -> crate::Result { + let mut buf = Vec::new(); + let changed = eol::convert_to_git(b"hi", AttributesDigest::TextCrlf, &mut buf, no_call, Default::default()) + .expect("no error"); + assert!(!changed); + + let changed = eol::convert_to_git( + b"hi", + AttributesDigest::TextAutoCrlf, + &mut buf, + no_object_in_index, + Default::default(), + ) + .expect("no error"); + assert!(!changed, "in auto-mode, the object is queried in the index as well."); + Ok(()) +} + +#[test] +fn detected_as_binary() -> crate::Result { + let mut buf = Vec::new(); + let changed = eol::convert_to_git( + b"hi\0zero makes it binary", + AttributesDigest::TextAuto, + &mut buf, + no_call, + Default::default(), + ) + .expect("no error"); + assert!( + !changed, + "in auto-mode, we have a heuristic to see if the buffer is binary" + ); + Ok(()) +} + +#[test] +fn fast_conversion_by_stripping_cr() -> crate::Result { + let mut buf = Vec::new(); + let changed = eol::convert_to_git( + b"a\r\nb\r\nc", + AttributesDigest::TextCrlf, + &mut buf, + no_call, + Default::default(), + ) + .expect("no error"); + assert!(changed); + assert_eq!(buf.as_bstr(), "a\nb\nc", "here carriage returns can just be stripped"); + Ok(()) +} + +#[test] +fn slower_conversion_due_to_lone_cr() -> crate::Result { + let mut buf = Vec::new(); + let changed = eol::convert_to_git( + b"\r\ra\r\nb\r\nc", + AttributesDigest::TextCrlf, + &mut buf, + no_call, + Default::default(), + ) + .expect("no error"); + assert!(changed); + assert_eq!( + buf.as_bstr(), + "\r\ra\nb\nc", + "here carriage returns cannot be stripped but must be handled in pairs" + ); + Ok(()) +} + +#[test] +fn crlf_in_index_prevents_conversion_to_lf() -> crate::Result { + let mut buf = Vec::new(); + let mut called = false; + let changed = eol::convert_to_git( + b"elligible\n", + AttributesDigest::TextAutoInput, + &mut buf, + |buf| { + called = true; + buf.clear(); + buf.push_str("with CRLF\r\n"); + Ok::<_, std::convert::Infallible>(Some(())) + }, + Default::default(), + ) + .expect("no error"); + assert!(called, "in auto mode, the index is queried as well"); + assert!( + !changed, + "we saw the CRLF is present in the index, so it's unsafe to make changes" + ); + Ok(()) +} + +#[test] +fn round_trip_check() -> crate::Result { + let mut buf = Vec::new(); + for (input, expected) in [ + (&b"lone-nl\nhi\r\nho"[..], "LF would be replaced by CRLF in 'hello.txt'"), + // despite trying, I was unable to get into the other branch + (b"lone-cr\nhi\r\nho", "LF would be replaced by CRLF in 'hello.txt'"), + ] { + let err = eol::convert_to_git( + input, + AttributesDigest::TextCrlf, + &mut buf, + no_call, + eol::convert_to_git::Context { + round_trip_check: Some(gix_filter::eol::convert_to_git::RoundTripCheck::Fail { + rela_path: Path::new("hello.txt"), + }), + config: Default::default(), + }, + ) + .unwrap_err(); + assert_eq!(err.to_string(), expected); + + let changed = eol::convert_to_git( + input, + AttributesDigest::TextCrlf, + &mut buf, + no_call, + eol::convert_to_git::Context { + round_trip_check: Some(gix_filter::eol::convert_to_git::RoundTripCheck::Warn { + rela_path: Path::new("hello.txt"), + }), + config: Default::default(), + }, + )?; + assert!( + changed, + "in warn mode, we will get a result even though it won't round-trip" + ) + } + Ok(()) +} + +#[allow(clippy::ptr_arg)] +fn no_call(_buf: &mut Vec) -> std::io::Result> { + unreachable!("index function will not be called") +} + +#[allow(clippy::ptr_arg)] +fn no_object_in_index(_buf: &mut Vec) -> std::io::Result> { + Ok(Some(())) +} diff --git a/gix-filter/tests/eol/convert_to_worktree.rs b/gix-filter/tests/eol/convert_to_worktree.rs new file mode 100644 index 00000000000..3888d4ac273 --- /dev/null +++ b/gix-filter/tests/eol/convert_to_worktree.rs @@ -0,0 +1,95 @@ +use bstr::ByteSlice; +use gix_filter::eol; +use gix_filter::eol::{AttributesDigest, Configuration, Mode}; + +#[test] +fn no_conversion_if_attribute_digest_does_not_allow_it() { + let mut buf = Vec::new(); + for digest in [ + AttributesDigest::Binary, + AttributesDigest::TextInput, + AttributesDigest::TextAutoInput, + ] { + let changed = eol::convert_to_worktree(b"hi\nho", digest, &mut buf, Default::default()); + assert!(!changed, "the digest doesn't allow for CRLF changes"); + } +} + +#[test] +fn no_conversion_if_configuration_does_not_allow_it() { + let mut buf = Vec::new(); + for digest in [AttributesDigest::Text, AttributesDigest::TextAuto] { + for config in [ + Configuration { + auto_crlf: None, + eol: Some(Mode::CrLf), + }, + Configuration { + auto_crlf: Some(false), + eol: Some(Mode::Lf), + }, + ] { + let changed = eol::convert_to_worktree(b"hi\nho", digest, &mut buf, config); + assert!(!changed, "the configuration doesn't allow for changes"); + } + } +} + +#[test] +fn no_conversion_if_nothing_to_do() { + let mut buf = Vec::new(); + for (input, digest, msg) in [ + ( + &b"hi\r\nho"[..], + AttributesDigest::TextCrlf, + "no lone line feed to handle", + ), + ( + &b"binary\0linefeed\nho"[..], + AttributesDigest::TextAutoCrlf, + "binary in auto-mode is never handled", + ), + ( + &b"binary\nlinefeed\r\nho"[..], + AttributesDigest::TextAutoCrlf, + "mixed crlf and lf is avoided", + ), + ( + &b"elligible-but-disabled\nhere"[..], + AttributesDigest::Binary, + "designated binary is never handled", + ), + ] { + let changed = eol::convert_to_worktree(input, digest, &mut buf, Default::default()); + assert!(!changed, "{msg}"); + } +} + +#[test] +fn each_nl_is_replaced_with_crnl() { + let mut buf = Vec::new(); + let changed = eol::convert_to_worktree( + b"hi\n\nho\nend", + AttributesDigest::TextCrlf, + &mut buf, + Default::default(), + ); + assert!( + changed, + "the buffer has to be changed as it is explicitly demanded and has newlines to convert" + ); + assert_eq!(buf.as_bstr(), "hi\r\n\r\nho\r\nend"); +} + +#[test] +fn existing_crnl_are_not_replaced_for_safety_nor_are_lone_cr() { + let mut buf = Vec::new(); + let changed = eol::convert_to_worktree( + b"hi\r\n\nho\r\nend\r", + AttributesDigest::TextCrlf, + &mut buf, + Default::default(), + ); + assert!(changed); + assert_eq!(buf.as_bstr(), "hi\r\n\r\nho\r\nend\r"); +} diff --git a/gix-filter/tests/eol/mod.rs b/gix-filter/tests/eol/mod.rs index a408e396830..b9c4fdd44f0 100644 --- a/gix-filter/tests/eol/mod.rs +++ b/gix-filter/tests/eol/mod.rs @@ -21,174 +21,5 @@ mod stats { } } -mod convert_to_git { - use bstr::{ByteSlice, ByteVec}; - use gix_filter::eol; - use gix_filter::eol::AttributesDigest; - use std::path::Path; - - #[test] - fn with_binary_attribute_is_never_converted() { - let mut buf = Vec::new(); - let changed = eol::convert_to_git( - b"hi\r\nho", - AttributesDigest::Binary, - &mut buf, - no_call, - Default::default(), - ) - .expect("no error"); - assert!(!changed, "the user marked it as binary so it's never being touched"); - } - - #[test] - fn no_crlf_means_no_work() -> crate::Result { - let mut buf = Vec::new(); - let changed = eol::convert_to_git(b"hi", AttributesDigest::TextCrlf, &mut buf, no_call, Default::default()) - .expect("no error"); - assert!(!changed); - - let changed = eol::convert_to_git( - b"hi", - AttributesDigest::TextAutoCrlf, - &mut buf, - no_object_in_index, - Default::default(), - ) - .expect("no error"); - assert!(!changed, "in auto-mode, the object is queried in the index as well."); - Ok(()) - } - - #[test] - fn detected_as_binary() -> crate::Result { - let mut buf = Vec::new(); - let changed = eol::convert_to_git( - b"hi\0zero makes it binary", - AttributesDigest::TextAuto, - &mut buf, - no_call, - Default::default(), - ) - .expect("no error"); - assert!( - !changed, - "in auto-mode, we have a heuristic to see if the buffer is binary" - ); - Ok(()) - } - - #[test] - fn fast_conversion_by_stripping_cr() -> crate::Result { - let mut buf = Vec::new(); - let changed = eol::convert_to_git( - b"a\r\nb\r\nc", - AttributesDigest::TextCrlf, - &mut buf, - no_call, - Default::default(), - ) - .expect("no error"); - assert!(changed); - assert_eq!(buf.as_bstr(), "a\nb\nc", "here carriage returns can just be stripped"); - Ok(()) - } - - #[test] - fn slower_conversion_due_to_lone_cr() -> crate::Result { - let mut buf = Vec::new(); - let changed = eol::convert_to_git( - b"\r\ra\r\nb\r\nc", - AttributesDigest::TextCrlf, - &mut buf, - no_call, - Default::default(), - ) - .expect("no error"); - assert!(changed); - assert_eq!( - buf.as_bstr(), - "\r\ra\nb\nc", - "here carriage returns cannot be stripped but must be handled in pairs" - ); - Ok(()) - } - - #[test] - fn crlf_in_index_prevents_conversion_to_lf() -> crate::Result { - let mut buf = Vec::new(); - let mut called = false; - let changed = eol::convert_to_git( - b"elligible\n", - AttributesDigest::TextAutoInput, - &mut buf, - |buf| { - called = true; - buf.clear(); - buf.push_str("with CRLF\r\n"); - Ok::<_, std::convert::Infallible>(Some(())) - }, - Default::default(), - ) - .expect("no error"); - assert!(called, "in auto mode, the index is queried as well"); - assert!( - !changed, - "we saw the CRLF is present in the index, so it's unsafe to make changes" - ); - Ok(()) - } - - #[test] - fn round_trip_check() -> crate::Result { - let mut buf = Vec::new(); - for (input, expected) in [ - (&b"lone-nl\nhi\r\nho"[..], "LF would be replaced by CRLF in 'hello.txt'"), - // despite trying, I was unable to get into the other branch - (b"lone-cr\nhi\r\nho", "LF would be replaced by CRLF in 'hello.txt'"), - ] { - let err = eol::convert_to_git( - input, - AttributesDigest::TextCrlf, - &mut buf, - no_call, - eol::convert_to_git::Context { - round_trip_check: Some(gix_filter::eol::convert_to_git::RoundTripCheck::Fail { - rela_path: Path::new("hello.txt"), - }), - config: Default::default(), - }, - ) - .unwrap_err(); - assert_eq!(err.to_string(), expected); - - let changed = eol::convert_to_git( - input, - AttributesDigest::TextCrlf, - &mut buf, - no_call, - eol::convert_to_git::Context { - round_trip_check: Some(gix_filter::eol::convert_to_git::RoundTripCheck::Warn { - rela_path: Path::new("hello.txt"), - }), - config: Default::default(), - }, - )?; - assert!( - changed, - "in warn mode, we will get a result even though it won't round-trip" - ) - } - Ok(()) - } - - #[allow(clippy::ptr_arg)] - fn no_call(_buf: &mut Vec) -> std::io::Result> { - unreachable!("index function will not be called") - } - - #[allow(clippy::ptr_arg)] - fn no_object_in_index(_buf: &mut Vec) -> std::io::Result> { - Ok(Some(())) - } -} +mod convert_to_git; +mod convert_to_worktree;