From cc863f02a307f4aad3e3d207b57ea10263007a08 Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Wed, 11 Dec 2019 21:24:35 +0100 Subject: [PATCH 1/4] Improve `str` prefix/suffix comparison The comparison can be performed on the raw bytes, as the chars can only match if their UTF8 encoding matches. This avoids the `is_char_boundary` checks and translates to a straight `u8` slice comparison which is optimized to a memcmp or inline comparison where appropriate. --- src/libcore/str/pattern.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs index a494274118a74..dd3765d42da02 100644 --- a/src/libcore/str/pattern.rs +++ b/src/libcore/str/pattern.rs @@ -715,16 +715,13 @@ impl<'a, 'b> Pattern<'a> for &'b str { /// Checks whether the pattern matches at the front of the haystack #[inline] fn is_prefix_of(self, haystack: &'a str) -> bool { - haystack.is_char_boundary(self.len()) && - self == &haystack[..self.len()] + haystack.as_bytes().starts_with(self.as_bytes()) } /// Checks whether the pattern matches at the back of the haystack #[inline] fn is_suffix_of(self, haystack: &'a str) -> bool { - self.len() <= haystack.len() && - haystack.is_char_boundary(haystack.len() - self.len()) && - self == &haystack[haystack.len() - self.len()..] + haystack.as_bytes().ends_with(self.as_bytes()) } } From 1f6d0234db58459c09c233efba762ad8f40adf57 Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Wed, 11 Dec 2019 21:28:37 +0100 Subject: [PATCH 2/4] Prefer encoding the char when checking for string prefix/suffix This enables constant folding when matching a literal char. Fixes #41993. --- src/libcore/str/pattern.rs | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs index dd3765d42da02..71dd77fee7396 100644 --- a/src/libcore/str/pattern.rs +++ b/src/libcore/str/pattern.rs @@ -450,21 +450,15 @@ impl<'a> Pattern<'a> for char { #[inline] fn is_prefix_of(self, haystack: &'a str) -> bool { - if let Some(ch) = haystack.chars().next() { - self == ch - } else { - false - } + let mut buffer = [0u8; 4]; + self.encode_utf8(&mut buffer).is_prefix_of(haystack) } #[inline] fn is_suffix_of(self, haystack: &'a str) -> bool where Self::Searcher: ReverseSearcher<'a> { - if let Some(ch) = haystack.chars().next_back() { - self == ch - } else { - false - } + let mut buffer = [0u8; 4]; + self.encode_utf8(&mut buffer).is_suffix_of(haystack) } } From de7fefa04c74ecaa7618c910a74ae1cf62affa8e Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Thu, 12 Dec 2019 21:09:17 +0100 Subject: [PATCH 3/4] Minor cleanup in `Pattern::{is_prefix_of,is_suffix_of}` for `char` --- src/libcore/str/pattern.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/libcore/str/pattern.rs b/src/libcore/str/pattern.rs index 71dd77fee7396..1b6e6f1c2e419 100644 --- a/src/libcore/str/pattern.rs +++ b/src/libcore/str/pattern.rs @@ -450,15 +450,13 @@ impl<'a> Pattern<'a> for char { #[inline] fn is_prefix_of(self, haystack: &'a str) -> bool { - let mut buffer = [0u8; 4]; - self.encode_utf8(&mut buffer).is_prefix_of(haystack) + self.encode_utf8(&mut [0u8; 4]).is_prefix_of(haystack) } #[inline] fn is_suffix_of(self, haystack: &'a str) -> bool where Self::Searcher: ReverseSearcher<'a> { - let mut buffer = [0u8; 4]; - self.encode_utf8(&mut buffer).is_suffix_of(haystack) + self.encode_utf8(&mut [0u8; 4]).is_suffix_of(haystack) } } From 3de1923d5d3aad5c4bb0914f054e950bf166aa00 Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Mon, 16 Dec 2019 15:33:16 +0100 Subject: [PATCH 4/4] Add benchmarks for `start_with` and `ends_with` --- src/libcore/benches/lib.rs | 1 + src/libcore/benches/pattern.rs | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 src/libcore/benches/pattern.rs diff --git a/src/libcore/benches/lib.rs b/src/libcore/benches/lib.rs index 6932c7fe221d0..570fc4ab93390 100644 --- a/src/libcore/benches/lib.rs +++ b/src/libcore/benches/lib.rs @@ -11,4 +11,5 @@ mod hash; mod iter; mod num; mod ops; +mod pattern; mod slice; diff --git a/src/libcore/benches/pattern.rs b/src/libcore/benches/pattern.rs new file mode 100644 index 0000000000000..a49490cec1219 --- /dev/null +++ b/src/libcore/benches/pattern.rs @@ -0,0 +1,43 @@ +use test::black_box; +use test::Bencher; + +#[bench] +fn starts_with_char(b: &mut Bencher) { + let text = black_box("kdjsfhlakfhlsghlkvcnljknfqiunvcijqenwodind"); + b.iter(|| { + for _ in 0..1024 { + black_box(text.starts_with('k')); + } + }) +} + +#[bench] +fn starts_with_str(b: &mut Bencher) { + let text = black_box("kdjsfhlakfhlsghlkvcnljknfqiunvcijqenwodind"); + b.iter(|| { + for _ in 0..1024 { + black_box(text.starts_with("k")); + } + }) +} + + +#[bench] +fn ends_with_char(b: &mut Bencher) { + let text = black_box("kdjsfhlakfhlsghlkvcnljknfqiunvcijqenwodind"); + b.iter(|| { + for _ in 0..1024 { + black_box(text.ends_with('k')); + } + }) +} + +#[bench] +fn ends_with_str(b: &mut Bencher) { + let text = black_box("kdjsfhlakfhlsghlkvcnljknfqiunvcijqenwodind"); + b.iter(|| { + for _ in 0..1024 { + black_box(text.ends_with("k")); + } + }) +}