Skip to content

Commit

Permalink
Rollup merge of #81837 - gilescope:to_ascii_speedups, r=dtolnay
Browse files Browse the repository at this point in the history
Slight perf improvement on char::to_ascii_lowercase

`char::to_ascii_lowercase()` was checking if it was ascii and then if it was in the right range. Instead propose to check once (I think removing a compare and a shift in the process: [godbolt](https://godbolt.org/z/e5Tora) ).

before:
```
        test char::methods::bench_to_ascii_lowercase                    ... bench:      11,196 ns/iter (+/- 632)
        test char::methods::bench_to_ascii_uppercase                    ... bench:      11,656 ns/iter (+/- 671)
```
after:
```
         test char::methods::bench_to_ascii_lowercase                    ... bench:       9,612 ns/iter (+/- 979)
         test char::methods::bench_to_ascii_uppercase                    ... bench:       8,241 ns/iter (+/- 701)
```

(calling u8::to_ascii_lowercase and letting that flip the 5th bit is also an option, but it's more instructions. I'm thinking for things around ascii and char we want to be as efficient as possible.)
  • Loading branch information
Dylan-DPC authored Feb 23, 2021
2 parents 5d90e89 + 33d8b04 commit 4af965e
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 6 deletions.
6 changes: 4 additions & 2 deletions library/core/benches/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ macro_rules! benches {
use test::black_box;
use test::Bencher;

const ASCII_CASE_MASK: u8 = 0b0010_0000;

benches! {
fn case00_alloc_only(_bytes: &mut [u8]) {}

Expand Down Expand Up @@ -204,7 +206,7 @@ benches! {
}
}
for byte in bytes {
*byte &= !((is_ascii_lowercase(*byte) as u8) << 5)
*byte &= !((is_ascii_lowercase(*byte) as u8) * ASCII_CASE_MASK)
}
}

Expand All @@ -216,7 +218,7 @@ benches! {
}
}
for byte in bytes {
*byte -= (is_ascii_lowercase(*byte) as u8) << 5
*byte -= (is_ascii_lowercase(*byte) as u8) * ASCII_CASE_MASK
}
}

Expand Down
10 changes: 10 additions & 0 deletions library/core/benches/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,13 @@ fn bench_to_digit_radix_var(b: &mut Bencher) {
.min()
})
}

#[bench]
fn bench_to_ascii_uppercase(b: &mut Bencher) {
b.iter(|| CHARS.iter().cycle().take(10_000).map(|c| c.to_ascii_uppercase()).min())
}

#[bench]
fn bench_to_ascii_lowercase(b: &mut Bencher) {
b.iter(|| CHARS.iter().cycle().take(10_000).map(|c| c.to_ascii_lowercase()).min())
}
12 changes: 10 additions & 2 deletions library/core/src/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1088,7 +1088,11 @@ impl char {
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
#[inline]
pub fn to_ascii_uppercase(&self) -> char {
if self.is_ascii() { (*self as u8).to_ascii_uppercase() as char } else { *self }
if self.is_ascii_lowercase() {
(*self as u8).ascii_change_case_unchecked() as char
} else {
*self
}
}

/// Makes a copy of the value in its ASCII lower case equivalent.
Expand Down Expand Up @@ -1116,7 +1120,11 @@ impl char {
#[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")]
#[inline]
pub fn to_ascii_lowercase(&self) -> char {
if self.is_ascii() { (*self as u8).to_ascii_lowercase() as char } else { *self }
if self.is_ascii_uppercase() {
(*self as u8).ascii_change_case_unchecked() as char
} else {
*self
}
}

/// Checks that two values are an ASCII case-insensitive match.
Expand Down
13 changes: 11 additions & 2 deletions library/core/src/num/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@ impl isize {
usize_isize_to_xe_bytes_doc!(), usize_isize_from_xe_bytes_doc!() }
}

/// If 6th bit set ascii is upper case.
const ASCII_CASE_MASK: u8 = 0b0010_0000;

#[lang = "u8"]
impl u8 {
uint_impl! { u8, u8, 8, 255, 2, "0x82", "0xa", "0x12", "0x12", "0x48", "[0x12]",
Expand Down Expand Up @@ -195,7 +198,7 @@ impl u8 {
#[inline]
pub fn to_ascii_uppercase(&self) -> u8 {
// Unset the fifth bit if this is a lowercase letter
*self & !((self.is_ascii_lowercase() as u8) << 5)
*self & !((self.is_ascii_lowercase() as u8) * ASCII_CASE_MASK)
}

/// Makes a copy of the value in its ASCII lower case equivalent.
Expand All @@ -218,7 +221,13 @@ impl u8 {
#[inline]
pub fn to_ascii_lowercase(&self) -> u8 {
// Set the fifth bit if this is an uppercase letter
*self | ((self.is_ascii_uppercase() as u8) << 5)
*self | (self.is_ascii_uppercase() as u8 * ASCII_CASE_MASK)
}

/// Assumes self is ascii
#[inline]
pub(crate) fn ascii_change_case_unchecked(&self) -> u8 {
*self ^ ASCII_CASE_MASK
}

/// Checks that two values are an ASCII case-insensitive match.
Expand Down

0 comments on commit 4af965e

Please sign in to comment.