Skip to content

Commit

Permalink
Merge pull request uutils#6199 from BenWiederhake/dev-wc-ascii-contro…
Browse files Browse the repository at this point in the history
…l-is-word

wc: count ASCII control characters as word characters
  • Loading branch information
sylvestre authored Apr 7, 2024
2 parents abdeead + d5e7f9a commit a75b8a0
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 9 deletions.
3 changes: 1 addition & 2 deletions src/uu/wc/src/wc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -580,9 +580,8 @@ fn process_chunk<
if SHOW_WORDS {
if ch.is_whitespace() {
*in_word = false;
} else if ch.is_ascii_control() {
// These count as characters but do not affect the word state
} else if !(*in_word) {
// This also counts control characters! (As of GNU coreutils 9.5)
*in_word = true;
total.words += 1;
}
Expand Down
24 changes: 17 additions & 7 deletions tests/by-util/test_wc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ fn test_utf8_words() {
.arg("-w")
.pipe_in_fixture("UTF_8_weirdchars.txt")
.run()
.stdout_is("87\n");
.stdout_is("89\n");
}

#[test]
Expand All @@ -80,7 +80,7 @@ fn test_utf8_line_length_words() {
.arg("-Lw")
.pipe_in_fixture("UTF_8_weirdchars.txt")
.run()
.stdout_is(" 87 48\n");
.stdout_is(" 89 48\n");
}

#[test]
Expand All @@ -98,7 +98,7 @@ fn test_utf8_line_length_chars_words() {
.arg("-Lmw")
.pipe_in_fixture("UTF_8_weirdchars.txt")
.run()
.stdout_is(" 87 442 48\n");
.stdout_is(" 89 442 48\n");
}

#[test]
Expand Down Expand Up @@ -143,7 +143,7 @@ fn test_utf8_chars_words() {
.arg("-mw")
.pipe_in_fixture("UTF_8_weirdchars.txt")
.run()
.stdout_is(" 87 442\n");
.stdout_is(" 89 442\n");
}

#[test]
Expand All @@ -161,7 +161,7 @@ fn test_utf8_line_length_lines_words() {
.arg("-Llw")
.pipe_in_fixture("UTF_8_weirdchars.txt")
.run()
.stdout_is(" 25 87 48\n");
.stdout_is(" 25 89 48\n");
}

#[test]
Expand All @@ -179,7 +179,7 @@ fn test_utf8_lines_words_chars() {
.arg("-mlw")
.pipe_in_fixture("UTF_8_weirdchars.txt")
.run()
.stdout_is(" 25 87 442\n");
.stdout_is(" 25 89 442\n");
}

#[test]
Expand All @@ -197,7 +197,17 @@ fn test_utf8_all() {
.arg("-lwmcL")
.pipe_in_fixture("UTF_8_weirdchars.txt")
.run()
.stdout_is(" 25 87 442 513 48\n");
.stdout_is(" 25 89 442 513 48\n");
}

#[test]
fn test_ascii_control() {
// GNU coreutils "d1" test
new_ucmd!()
.arg("-w")
.pipe_in(*b"\x01\n")
.run()
.stdout_is("1\n");
}

#[test]
Expand Down

0 comments on commit a75b8a0

Please sign in to comment.