From 1b3cf7d28880607d0ee04e5344886a7f7cb24ea5 Mon Sep 17 00:00:00 2001 From: jRimbault Date: Thu, 17 Sep 2020 22:09:30 +0200 Subject: [PATCH 1/4] Make indent preserve newlines or lack thereof Before, indent("foo", "") would give "foo\n". It now preserves any trailing newline character present in the input string. This makes indent behave consistently with dedent. New tests ere added to ensure this on a number of corner cases. closes #207 --- .github/workflows/build.yml | 1 + src/indentation.rs | 230 +++++++++++++++++++++--------------- src/lib.rs | 1 + tests/indent.rs | 88 ++++++++++++++ 4 files changed, 224 insertions(+), 96 deletions(-) create mode 100644 tests/indent.rs diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ac3b4cb5..bea4fec2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,6 +12,7 @@ env: jobs: build: strategy: + fail-fast: false matrix: os: - ubuntu-latest diff --git a/src/indentation.rs b/src/indentation.rs index ca192490..1295ffa4 100644 --- a/src/indentation.rs +++ b/src/indentation.rs @@ -18,8 +18,7 @@ /// "); /// ``` /// -/// Empty lines (lines consisting only of whitespace) are not indented -/// and the whitespace is replaced by a single newline (`\n`): +/// Lines consisting only of whitespace are kept unchanged: /// /// ``` /// use textwrap::indent; @@ -34,7 +33,7 @@ /// ->Foo /// /// ->Bar -/// +/// \t /// ->Baz /// "); /// ``` @@ -45,17 +44,18 @@ /// ``` /// use textwrap::indent; /// -/// assert_eq!(indent(" \t Foo ", "->"), "-> \t Foo \n"); +/// assert_eq!(indent(" \t Foo ", "->"), "-> \t Foo "); /// ``` pub fn indent(s: &str, prefix: &str) -> String { let mut result = String::new(); - for line in s.lines() { - if line.chars().any(|c| !c.is_whitespace()) { + + for line in s.split_inclusive('\n') { + if !line.trim().is_empty() { result.push_str(prefix); - result.push_str(line); } - result.push('\n'); + result.push_str(line); } + result } @@ -138,12 +138,6 @@ pub fn dedent(s: &str) -> String { mod tests { use super::*; - /// Add newlines. Ensures that the final line in the vector also - /// has a newline. - fn add_nl(lines: &[&str]) -> String { - lines.join("\n") + "\n" - } - #[test] fn indent_empty() { assert_eq!(indent("\n", " "), "\n"); @@ -152,27 +146,35 @@ mod tests { #[test] #[rustfmt::skip] fn indent_nonempty() { - let x = vec![" foo", - "bar", - " baz"]; - let y = vec!["// foo", - "//bar", - "// baz"]; - assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y)); + let text = [ + " foo\n", + "bar\n", + " baz\n", + ].join(""); + let expected = [ + "// foo\n", + "//bar\n", + "// baz\n", + ].join(""); + assert_eq!(indent(&text, "//"), expected); } #[test] #[rustfmt::skip] fn indent_empty_line() { - let x = vec![" foo", - "bar", - "", - " baz"]; - let y = vec!["// foo", - "//bar", - "", - "// baz"]; - assert_eq!(indent(&add_nl(&x), "//"), add_nl(&y)); + let text = [ + " foo", + "bar", + "", + " baz", + ].join("\n"); + let expected = [ + "// foo", + "//bar", + "", + "// baz", + ].join("\n"); + assert_eq!(indent(&text, "//"), expected); } #[test] @@ -183,112 +185,148 @@ mod tests { #[test] #[rustfmt::skip] fn dedent_multi_line() { - let x = vec![" foo", - " bar", - " baz"]; - let y = vec![" foo", - "bar", - " baz"]; - assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + let x = [ + " foo", + " bar", + " baz", + ].join("\n"); + let y = [ + " foo", + "bar", + " baz" + ].join("\n"); + assert_eq!(dedent(&x), y); } #[test] #[rustfmt::skip] fn dedent_empty_line() { - let x = vec![" foo", - " bar", - " ", - " baz"]; - let y = vec![" foo", - "bar", - "", - " baz"]; - assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + let x = [ + " foo", + " bar", + " ", + " baz" + ].join("\n"); + let y = [ + " foo", + "bar", + "", + " baz" + ].join("\n"); + assert_eq!(dedent(&x), y); } #[test] #[rustfmt::skip] fn dedent_blank_line() { - let x = vec![" foo", - "", - " bar", - " foo", - " bar", - " baz"]; - let y = vec!["foo", - "", - " bar", - " foo", - " bar", - " baz"]; - assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + let x = [ + " foo", + "", + " bar", + " foo", + " bar", + " baz", + ].join("\n"); + let y = [ + "foo", + "", + " bar", + " foo", + " bar", + " baz", + ].join("\n"); + assert_eq!(dedent(&x), y); } #[test] #[rustfmt::skip] fn dedent_whitespace_line() { - let x = vec![" foo", - " ", - " bar", - " foo", - " bar", - " baz"]; - let y = vec!["foo", - "", - " bar", - " foo", - " bar", - " baz"]; - assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + let x = [ + " foo", + " ", + " bar", + " foo", + " bar", + " baz", + ].join("\n"); + let y = [ + "foo", + "", + " bar", + " foo", + " bar", + " baz", + ].join("\n"); + assert_eq!(dedent(&x), y); } #[test] #[rustfmt::skip] fn dedent_mixed_whitespace() { - let x = vec!["\tfoo", - " bar"]; - let y = vec!["\tfoo", - " bar"]; - assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + let x = [ + "\tfoo", + " bar", + ].join("\n"); + let y = [ + "\tfoo", + " bar", + ].join("\n"); + assert_eq!(dedent(&x), y); } #[test] #[rustfmt::skip] fn dedent_tabbed_whitespace() { - let x = vec!["\t\tfoo", - "\t\t\tbar"]; - let y = vec!["foo", - "\tbar"]; - assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + let x = [ + "\t\tfoo", + "\t\t\tbar", + ].join("\n"); + let y = [ + "foo", + "\tbar", + ].join("\n"); + assert_eq!(dedent(&x), y); } #[test] #[rustfmt::skip] fn dedent_mixed_tabbed_whitespace() { - let x = vec!["\t \tfoo", - "\t \t\tbar"]; - let y = vec!["foo", - "\tbar"]; - assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + let x = [ + "\t \tfoo", + "\t \t\tbar", + ].join("\n"); + let y = [ + "foo", + "\tbar", + ].join("\n"); + assert_eq!(dedent(&x), y); } #[test] #[rustfmt::skip] fn dedent_mixed_tabbed_whitespace2() { - let x = vec!["\t \tfoo", - "\t \tbar"]; - let y = vec!["\tfoo", - " \tbar"]; - assert_eq!(dedent(&add_nl(&x)), add_nl(&y)); + let x = [ + "\t \tfoo", + "\t \tbar", + ].join("\n"); + let y = [ + "\tfoo", + " \tbar", + ].join("\n"); + assert_eq!(dedent(&x), y); } #[test] #[rustfmt::skip] fn dedent_preserve_no_terminating_newline() { - let x = vec![" foo", - " bar"].join("\n"); - let y = vec!["foo", - " bar"].join("\n"); + let x = [ + " foo", + " bar", + ].join("\n"); + let y = [ + "foo", + " bar", + ].join("\n"); assert_eq!(dedent(&x), y); } } diff --git a/src/lib.rs b/src/lib.rs index 2703c81f..9742e376 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -92,6 +92,7 @@ #![deny(missing_docs)] #![deny(missing_debug_implementations)] #![allow(clippy::redundant_field_names)] +#![feature(split_inclusive)] use std::borrow::Cow; use std::str::CharIndices; diff --git a/tests/indent.rs b/tests/indent.rs new file mode 100644 index 00000000..9dd5ad26 --- /dev/null +++ b/tests/indent.rs @@ -0,0 +1,88 @@ +/// tests cases ported over from python standard library +use textwrap::{dedent, indent}; + +const ROUNDTRIP_CASES: [&str; 3] = [ + // basic test case + "Hi.\nThis is a test.\nTesting.", + // include a blank line + "Hi.\nThis is a test.\n\nTesting.", + // include leading and trailing blank lines + "\nHi.\nThis is a test.\nTesting.\n", +]; + +const WINDOWS_CASES: [&str; 2] = [ + // use windows line endings + "Hi.\r\nThis is a test.\r\nTesting.", + // pathological case + "Hi.\r\nThis is a test.\n\r\nTesting.\r\n\n", +]; + +#[test] +fn test_indent_nomargin_default() { + // indent should do nothing if 'prefix' is empty. + for text in ROUNDTRIP_CASES.iter() { + assert_eq!(&indent(text, ""), text); + } + for text in WINDOWS_CASES.iter() { + assert_eq!(&indent(text, ""), text); + } +} + +#[test] +fn test_roundtrip_spaces() { + // A whitespace prefix should roundtrip with dedent + for text in ROUNDTRIP_CASES.iter() { + assert_eq!(&dedent(&indent(text, " ")), text); + } +} + +#[test] +fn test_roundtrip_tabs() { + // A whitespace prefix should roundtrip with dedent + for text in ROUNDTRIP_CASES.iter() { + assert_eq!(&dedent(&indent(text, "\t\t")), text); + } +} + +#[test] +fn test_roundtrip_mixed() { + // A whitespace prefix should roundtrip with dedent + for text in ROUNDTRIP_CASES.iter() { + assert_eq!(&dedent(&indent(text, " \t \t ")), text); + } +} + +#[test] +fn test_indent_default() { + // Test default indenting of lines that are not whitespace only + let prefix = " "; + let expected = [ + // Basic test case + " Hi.\n This is a test.\n Testing.", + // Include a blank line + " Hi.\n This is a test.\n\n Testing.", + // Include leading and trailing blank lines + "\n Hi.\n This is a test.\n Testing.\n", + ]; + for (text, expect) in ROUNDTRIP_CASES.iter().zip(expected.iter()) { + assert_eq!(&indent(text, prefix), expect) + } + let expected = [ + // Use Windows line endings + " Hi.\r\n This is a test.\r\n Testing.", + // Pathological case + " Hi.\r\n This is a test.\n\r\n Testing.\r\n\n", + ]; + for (text, expect) in WINDOWS_CASES.iter().zip(expected.iter()) { + assert_eq!(&indent(text, prefix), expect) + } +} + +#[test] +fn indented_text_should_have_the_same_number_of_lines_as_the_original_text() { + let texts = ["foo\nbar", "foo\nbar\n", "foo\nbar\nbaz"]; + for original in texts.iter() { + let indented = indent(original, ""); + assert_eq!(&indented, original); + } +} From 946d9d5421b952afd28bbad70d5970caf3e499de Mon Sep 17 00:00:00 2001 From: jRimbault Date: Wed, 6 Jan 2021 13:17:04 +0100 Subject: [PATCH 2/4] add custom iterator --- src/indentation.rs | 42 +++++++++++++++++++++++++++++++++++++++++- src/lib.rs | 1 - 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/src/indentation.rs b/src/indentation.rs index 1295ffa4..d5fe9b7c 100644 --- a/src/indentation.rs +++ b/src/indentation.rs @@ -49,7 +49,7 @@ pub fn indent(s: &str, prefix: &str) -> String { let mut result = String::new(); - for line in s.split_inclusive('\n') { + for line in CharSplitInclusive::new(s, '\n') { if !line.trim().is_empty() { result.push_str(prefix); } @@ -59,6 +59,46 @@ pub fn indent(s: &str, prefix: &str) -> String { result } +/// This should hopefully be replaced by the split_inclusive feature in the +/// standard library. +/// Issue: #72360 +struct CharSplitInclusive<'a> { + sep: char, + value: &'a str, + finished: bool, +} + +impl<'a> CharSplitInclusive<'a> { + fn new(s: &'a str, sep: char) -> Self { + Self { + finished: false, + sep, + value: s, + } + } +} + +impl<'a> Iterator for CharSplitInclusive<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option { + if self.finished { + return None; + } + match self.value.find(self.sep) { + Some(i) => { + let ret = unsafe { self.value.get_unchecked(0..i + 1) }; + self.value = unsafe { self.value.get_unchecked(i + 1..self.value.len()) }; + Some(ret) + } + None => { + self.finished = true; + Some(self.value) + } + } + } +} + /// Removes common leading whitespace from each line. /// /// This function will look at each non-empty line and determine the diff --git a/src/lib.rs b/src/lib.rs index 9742e376..2703c81f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -92,7 +92,6 @@ #![deny(missing_docs)] #![deny(missing_debug_implementations)] #![allow(clippy::redundant_field_names)] -#![feature(split_inclusive)] use std::borrow::Cow; use std::str::CharIndices; From 75ced97fdee78b71b091039fe8ede9696ec3dd0f Mon Sep 17 00:00:00 2001 From: jRimbault Date: Thu, 7 Jan 2021 00:34:12 +0100 Subject: [PATCH 3/4] remove unsafe blocks I guess this causes bound checking, but I think I prefer that. --- src/indentation.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/indentation.rs b/src/indentation.rs index d5fe9b7c..6e14ea32 100644 --- a/src/indentation.rs +++ b/src/indentation.rs @@ -87,8 +87,8 @@ impl<'a> Iterator for CharSplitInclusive<'a> { } match self.value.find(self.sep) { Some(i) => { - let ret = unsafe { self.value.get_unchecked(0..i + 1) }; - self.value = unsafe { self.value.get_unchecked(i + 1..self.value.len()) }; + let ret = &self.value[0..i + 1]; + self.value = &self.value[i + 1..self.value.len()]; Some(ret) } None => { From 5477d6de2b3b83fad8a4633ab0923599a76dabf8 Mon Sep 17 00:00:00 2001 From: jRimbault Date: Sun, 17 Jan 2021 18:32:58 +0100 Subject: [PATCH 4/4] impl suggestions --- .github/workflows/build.yml | 1 - src/indentation.rs | 45 ++++--------------------------------- 2 files changed, 4 insertions(+), 42 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bea4fec2..ac3b4cb5 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,7 +12,6 @@ env: jobs: build: strategy: - fail-fast: false matrix: os: - ubuntu-latest diff --git a/src/indentation.rs b/src/indentation.rs index 6e14ea32..cc2351f2 100644 --- a/src/indentation.rs +++ b/src/indentation.rs @@ -49,7 +49,10 @@ pub fn indent(s: &str, prefix: &str) -> String { let mut result = String::new(); - for line in CharSplitInclusive::new(s, '\n') { + for (idx, line) in s.split('\n').enumerate() { + if idx > 0 { + result.push('\n'); + } if !line.trim().is_empty() { result.push_str(prefix); } @@ -59,46 +62,6 @@ pub fn indent(s: &str, prefix: &str) -> String { result } -/// This should hopefully be replaced by the split_inclusive feature in the -/// standard library. -/// Issue: #72360 -struct CharSplitInclusive<'a> { - sep: char, - value: &'a str, - finished: bool, -} - -impl<'a> CharSplitInclusive<'a> { - fn new(s: &'a str, sep: char) -> Self { - Self { - finished: false, - sep, - value: s, - } - } -} - -impl<'a> Iterator for CharSplitInclusive<'a> { - type Item = &'a str; - - fn next(&mut self) -> Option { - if self.finished { - return None; - } - match self.value.find(self.sep) { - Some(i) => { - let ret = &self.value[0..i + 1]; - self.value = &self.value[i + 1..self.value.len()]; - Some(ret) - } - None => { - self.finished = true; - Some(self.value) - } - } - } -} - /// Removes common leading whitespace from each line. /// /// This function will look at each non-empty line and determine the