Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

paste: permit the delimiter list to be empty #6714

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 130 additions & 29 deletions src/uu/paste/src/paste.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use clap::{crate_version, Arg, ArgAction, Command};
use std::fs::File;
use std::io::{stdin, stdout, BufRead, BufReader, Read, Write};
use std::path::Path;
use std::slice::Iter;
use uucore::error::{FromIo, UResult, USimpleError};
use uucore::line_ending::LineEnding;
use uucore::{format_usage, help_about, help_usage};
Expand Down Expand Up @@ -89,6 +90,53 @@ pub fn uu_app() -> Command {
)
}

struct DelimiterData<'a> {
current_delimiter_length: usize,
delimiters_encoded: &'a [Box<[u8]>],
delimiters_encoded_iter: Iter<'a, Box<[u8]>>,
}

/// - If there are no delimiters, returns `None`
/// - If there are delimiters, tries to return the next delimiter
/// - If the end of the delimiter list was reached, resets the iter to point to the beginning of the delimiter list
/// - (Technically this is done by creating a new iter)
/// - Then returns the next delimiter (which will be the first delimiter in the delimiter list)
fn get_delimiter_to_use_option<'a>(
delimiter_data_option: &'a mut Option<DelimiterData>,
) -> Option<&'a [u8]> {
match *delimiter_data_option {
Some(ref mut de) => {
let &mut DelimiterData {
ref mut current_delimiter_length,
delimiters_encoded,
ref mut delimiters_encoded_iter,
} = de;

let current_delimiter = if let Some(bo) = delimiters_encoded_iter.next() {
bo
} else {
let mut new_delimiters_encoded_iter = delimiters_encoded.iter();

// Unwrapping because:
// 1) `delimiters_encoded` is non-empty
// 2) `new_delimiters_encoded_iter` is a newly constructed Iter
// So: `next` should always return an element
let bo = new_delimiters_encoded_iter.next().unwrap();

// The old iter hit the end, so assign the new iter
*delimiters_encoded_iter = new_delimiters_encoded_iter;

bo
};

*current_delimiter_length = current_delimiter.len();

Some(current_delimiter)
}
None => None,
}
}

#[allow(clippy::cognitive_complexity)]
fn paste(
filenames: Vec<String>,
Expand Down Expand Up @@ -118,39 +166,74 @@ fn paste(
));
}

let delimiters: Vec<char> = unescape(delimiters).chars().collect();
let mut delim_count = 0;
let mut delim_length = 1;
let stdout = stdout();
let mut stdout = stdout.lock();
// Precompute instead of doing this inside the loops
let mut delimiters_encoded_option = {
let delimiters_unescaped = unescape(delimiters).chars().collect::<Vec<_>>();

let number_of_delimiters = delimiters_unescaped.len();

if number_of_delimiters > 0_usize {
let mut vec = Vec::<Box<[u8]>>::with_capacity(number_of_delimiters);

{
// a buffer of length four is large enough to encode any char
let mut buffer = [0_u8; 4_usize];

for ch in delimiters_unescaped {
let delimiter_encoded = ch.encode_utf8(&mut buffer);

vec.push(Box::from(delimiter_encoded.as_bytes()));
}
}

Some(vec.into_boxed_slice())
} else {
None
}
};

let mut delimiter_data_option = delimiters_encoded_option.as_mut().map(|bo| DelimiterData {
delimiters_encoded: bo,
delimiters_encoded_iter: bo.iter(),
current_delimiter_length: 0_usize,
});

let mut stdout = stdout().lock();

let mut output = Vec::new();

if serial {
for file in &mut files {
output.clear();

loop {
let delimiter_to_use_option =
get_delimiter_to_use_option(&mut delimiter_data_option);

match read_until(file.as_mut(), line_ending as u8, &mut output) {
Ok(0) => break,
Ok(0_usize) => break,
Ok(_) => {
if output.ends_with(&[line_ending as u8]) {
output.pop();
}
// a buffer of length four is large enough to encode any char
let mut buffer = [0; 4];
let ch =
delimiters[delim_count % delimiters.len()].encode_utf8(&mut buffer);
delim_length = ch.len();

for byte in buffer.iter().take(delim_length) {
output.push(*byte);

// Write delimiter, if one exists, to output
if let Some(current_delimiter) = delimiter_to_use_option {
output.extend_from_slice(current_delimiter);
}
}
Err(e) => return Err(e.map_err_context(String::new)),
}
delim_count += 1;
}
// remove final delimiter
output.truncate(output.len() - delim_length);

if let Some(ref de) = delimiter_data_option {
// Remove trailing delimiter, if there is a delimiter
if let Some(us) = output.len().checked_sub(de.current_delimiter_length) {
output.truncate(us);
} else {
// Subtraction would have resulted in a negative number. This should never happen.
}
}

write!(
stdout,
Expand All @@ -161,15 +244,21 @@ fn paste(
}
} else {
let mut eof = vec![false; files.len()];

loop {
output.clear();

let mut eof_count = 0;

for (i, file) in files.iter_mut().enumerate() {
let delimiter_to_use_option =
get_delimiter_to_use_option(&mut delimiter_data_option);

if eof[i] {
eof_count += 1;
} else {
match read_until(file.as_mut(), line_ending as u8, &mut output) {
Ok(0) => {
Ok(0_usize) => {
eof[i] = true;
eof_count += 1;
}
Expand All @@ -181,32 +270,44 @@ fn paste(
Err(e) => return Err(e.map_err_context(String::new)),
}
}
// a buffer of length four is large enough to encode any char
let mut buffer = [0; 4];
let ch = delimiters[delim_count % delimiters.len()].encode_utf8(&mut buffer);
delim_length = ch.len();

for byte in buffer.iter().take(delim_length) {
output.push(*byte);
// Write delimiter, if one exists, to output
if let Some(current_delimiter) = delimiter_to_use_option {
output.extend_from_slice(current_delimiter);
}

delim_count += 1;
}

if files.len() == eof_count {
break;
}
// Remove final delimiter
output.truncate(output.len() - delim_length);

if let &mut Some(ref mut de) = &mut delimiter_data_option {
let &mut DelimiterData {
current_delimiter_length,
delimiters_encoded,
ref mut delimiters_encoded_iter,
} = de;

// Reset iter after file is processed
*delimiters_encoded_iter = delimiters_encoded.iter();

// Remove trailing delimiter, if there is a delimiter
if let Some(us) = output.len().checked_sub(current_delimiter_length) {
output.truncate(us);
} else {
// Subtraction would have resulted in a negative number. This should never happen.
}
}

write!(
stdout,
"{}{}",
String::from_utf8_lossy(&output),
line_ending
)?;
delim_count = 0;
}
}

Ok(())
}

Expand Down
21 changes: 21 additions & 0 deletions tests/by-util/test_paste.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,27 @@ fn test_delimiter_list_ending_with_unescaped_backslash() {
}
}

#[test]
fn test_delimiter_list_empty() {
for st in ["-d", "--delimiters"] {
new_ucmd!()
.args(&[st, "", "-s", "--", "-"])
.pipe_in(
"\
A ALPHA 1 _
B BRAVO 2 _
C CHARLIE 3 _
",
)
.succeeds()
.stdout_only(
"\
A ALPHA 1 _B BRAVO 2 _C CHARLIE 3 _
",
);
}
}

#[test]
fn test_data() {
for example in EXAMPLE_DATA {
Expand Down
Loading