Skip to content

Commit

Permalink
Rollup merge of rust-lang#52116 - Pazzaz:match-str-case, r=SimonSapin
Browse files Browse the repository at this point in the history
Handle array manually in str case conversion methods

Avoiding the overhead incurred from `String.extend(char.to_lowercase())` showed a notable performance improvement when I benchmarked it.

I tested on these strings:
```rust
ALL_LOWER:       "loremipsumdolorsitametduosensibusmnesarchumabcdefgh"
ALL_UPPER:       "LOREMIPSUMDOLORSITAMETDUOSENSIBUSMNESARCHUMABCDEFGH"
REALISTIC_UPPER: "LOREM IPSUM DOLOR SIT AMET, DUO SENSIBUS MNESARCHUM"
SIGMAS:          "ΣΣΣΣΣ ΣΣΣΣΣ ΣΣΣΣΣ ΣΣΣ ΣΣΣΣ, ΣΣΣ ΣΣΣΣΣΣΣΣ ΣΣΣΣΣΣΣΣΣΣ"
WORD_UPPER:      "Lorem Ipsum Dolor Sit Amet, Duo Sensibus Mnesarchum"
```
the performance improvements of `to_lowercase()` were
```
running 10 tests
test tests::all_lower           ... bench:       1,752 ns/iter (+/- 49)
test tests::all_lower_new       ... bench:       1,266 ns/iter (+/- 15)   -28%
test tests::all_upper           ... bench:       1,832 ns/iter (+/- 39)
test tests::all_upper_new       ... bench:       1,337 ns/iter (+/- 18)   -27%
test tests::realistic_upper     ... bench:       1,993 ns/iter (+/- 14)
test tests::realistic_upper_new ... bench:       1,445 ns/iter (+/- 22)   -27%
test tests::sigmas              ... bench:       1,342 ns/iter (+/- 39)
test tests::sigmas_new          ... bench:       1,226 ns/iter (+/- 16)    -9%
test tests::word_upper          ... bench:       1,899 ns/iter (+/- 12)
test tests::word_upper_new      ... bench:       1,381 ns/iter (+/- 26)   -27%
```
and of `to_uppercase()`
```
running 10 tests
test tests::all_lower           ... bench:       1,813 ns/iter (+/- 20)
test tests::all_lower_new       ... bench:       1,321 ns/iter (+/- 16)   -27%
test tests::all_upper           ... bench:       1,629 ns/iter (+/- 22)
test tests::all_upper_new       ... bench:       1,241 ns/iter (+/- 9)    -24%
test tests::realistic_upper     ... bench:       1,670 ns/iter (+/- 24)
test tests::realistic_upper_new ... bench:       1,241 ns/iter (+/- 17)   -26%
test tests::sigmas              ... bench:       2,053 ns/iter (+/- 20)
test tests::sigmas_new          ... bench:       1,753 ns/iter (+/- 23)   -15%
test tests::word_upper          ... bench:       1,873 ns/iter (+/- 30)
test tests::word_upper_new      ... bench:       1,412 ns/iter (+/- 25)   -25%
```
I gave up on the more advanced method from rust-lang#52061 as it wasn't always a clear improvement and would help in even less cases if this PR was merged.
  • Loading branch information
kennytm authored Jul 18, 2018
2 parents 29ee654 + ad7621d commit 2712fbe
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 2 deletions.
29 changes: 27 additions & 2 deletions src/liballoc/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
use core::mem;
use core::ptr;
use core::iter::FusedIterator;
use core::unicode::conversions;

use borrow::{Borrow, ToOwned};
use boxed::Box;
Expand Down Expand Up @@ -369,7 +370,18 @@ impl str {
// See https://github.com/rust-lang/rust/issues/26035
map_uppercase_sigma(self, i, &mut s)
} else {
s.extend(c.to_lowercase());
match conversions::to_lower(c) {
[a, '\0', _] => s.push(a),
[a, b, '\0'] => {
s.push(a);
s.push(b);
}
[a, b, c] => {
s.push(a);
s.push(b);
s.push(c);
}
}
}
}
return s;
Expand Down Expand Up @@ -423,7 +435,20 @@ impl str {
#[stable(feature = "unicode_case_mapping", since = "1.2.0")]
pub fn to_uppercase(&self) -> String {
let mut s = String::with_capacity(self.len());
s.extend(self.chars().flat_map(|c| c.to_uppercase()));
for c in self[..].chars() {
match conversions::to_upper(c) {
[a, '\0', _] => s.push(a),
[a, b, '\0'] => {
s.push(a);
s.push(b);
}
[a, b, c] => {
s.push(a);
s.push(b);
s.push(c);
}
}
}
return s;
}

Expand Down
3 changes: 3 additions & 0 deletions src/libcore/unicode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ pub(crate) mod version;
pub mod derived_property {
pub use unicode::tables::derived_property::{Case_Ignorable, Cased};
}
pub mod conversions {
pub use unicode::tables::conversions::{to_lower, to_upper};
}

// For use in libsyntax
pub mod property {
Expand Down

0 comments on commit 2712fbe

Please sign in to comment.