Skip to content

Commit

Permalink
perf: cost-free conversion from paths to &str (#93)
Browse files Browse the repository at this point in the history
Background:

When I was migrating `PathBuf` to `Utf8PathBuf`, etc, I found out some regression in our benchmarks. Then I found out `as_str` is actually not cost-free as in the older version of rustc there's no way to get the underlying bytes out of an `OsStr` until 1.74.0.

In this PR, with the help of [`OsStr::as_encoded_bytes`](rust-lang/rust#115443) was stabilized in 1.74.0, We can perform a cost-free conversion from `&OsStr` to `&str` with constraint of it's underlying bytes are `UTF-8` encoded.

Benchmark:

With the benchmark included in the PR, the time cost is a constant now.

Result:
```
// String length of 10
osstr to_str/10 
                        time:   [5.9769 ns 5.9913 ns 6.0060 ns]
osstr as_encoded_bytes/10 
                        time:   [554.90 ps 558.32 ps 562.19 ps]

// String length of 100
osstr to_str/100
                        time:   [6.6113 ns 6.6250 ns 6.6404 ns]
osstr as_encoded_bytes/100
                        time:   [553.18 ps 557.33 ps 561.68 ps]

// String length of 1000
osstr to_str/1000
                        time:   [26.990 ns 27.033 ns 27.086 ns]
osstr as_encoded_bytes/1000
                        time:   [553.66 ps 560.67 ps 570.42 ps]

// String length of 10000
osstr to_str/10000
                        time:   [310.17 ns 310.77 ns 311.32 ns]
osstr as_encoded_bytes/10000
                        time:   [550.98 ps 555.16 ps 559.53 ps]
```
  • Loading branch information
h-a-n-a authored Aug 15, 2024
1 parent 9cabdbd commit 44dd11c
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 9 deletions.
19 changes: 17 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,14 @@ jobs:
components: rustfmt, clippy
- name: Lint (clippy)
run: cargo clippy --workspace --all-features --all-targets
- name: Lint (clippy in camino-examples)
run: |
cd camino-examples && cargo clippy --all-features --all-targets
- name: Lint (rustfmt)
run: cargo xfmt --check
- name: Lint (rustfmt in camino-examples)
run: |
cd camino-examples && cargo xfmt --check
- name: Check for differences
run: git diff --exit-code

Expand All @@ -51,6 +57,7 @@ jobs:
- 1.63
- nightly-2022-12-14
- 1.68
- 1.74
- stable
exclude:
# These versions started failing with "archive member 'lib.rmeta' with length 26456 is not
Expand Down Expand Up @@ -89,11 +96,19 @@ jobs:
- name: Build all targets with all features
# Some optional features are not compatible with earlier versions
if: ${{ matrix.rust-version == 'stable' }}
run: cargo hack --feature-powerset build --workspace
run: cargo hack --feature-powerset build --workspace --all-targets
- name: Test all targets with all features
# Some optional features are not compatible with earlier versions
if: ${{ matrix.rust-version == 'stable' }}
run: cargo hack --feature-powerset test --workspace
run: cargo hack --feature-powerset test --workspace --all-targets
- name: Build camino-examples
if: ${{ matrix.rust-version == 'stable' }}
run: |
cd camino-examples && cargo build
- name: Test camino-examples
if: ${{ matrix.rust-version == 'stable' }}
run: |
cd camino-examples && cargo test
miri:
name: Check unsafe code against miri
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[workspace]
members = [".", "camino-examples"]
members = ["."]

[package]
name = "camino"
Expand Down
6 changes: 6 additions & 0 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ fn main() {
println!("cargo:rustc-check-cfg=cfg(path_buf_capacity)");
println!("cargo:rustc-check-cfg=cfg(shrink_to)");
println!("cargo:rustc-check-cfg=cfg(try_reserve_2)");
println!("cargo:rustc-check-cfg=cfg(os_str_bytes)");

let compiler = match rustc_version() {
Some(compiler) => compiler,
Expand Down Expand Up @@ -49,6 +50,11 @@ fn main() {
{
println!("cargo:rustc-cfg=path_buf_deref_mut");
}
// os_str_bytes was added in a 1.74 stable.
if (compiler.minor >= 74 && compiler.channel == ReleaseChannel::Stable) || compiler.minor >= 75
{
println!("cargo:rustc-cfg=os_str_bytes");
}

// Catch situations where the actual features aren't enabled. Currently, they're only shown with
// `-vv` output, but maybe that will be noticed.
Expand Down
1 change: 1 addition & 0 deletions camino-examples/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/target
12 changes: 12 additions & 0 deletions camino-examples/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
[workspace]
# camino-examples pulls in crates that aren't supported by old versions of Rust, so make it its own
# workspace.
members = ["."]

[package]
name = "camino-examples"
description = "Examples for camino"
Expand All @@ -12,5 +17,12 @@ clap = { version = "3.0.7", features = ["derive"] }
serde = { version = "1", features = ["derive"] }
serde_json = { version = "1.0.62" }

[dev-dependencies]
criterion = "0.5.1"

[[bin]]
name = "serde"

[[bench]]
name = "bench"
harness = false
24 changes: 24 additions & 0 deletions camino-examples/benches/bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (c) The camino Contributors
// SPDX-License-Identifier: MIT OR Apache-2.0

// This benchmark is here because criterion has a higher MSRV than camino -- camino-examples is only
// tested on stable, which is good enough.

use camino::Utf8PathBuf;
use criterion::*;

fn bench_path(c: &mut Criterion) {
let mut group = c.benchmark_group("Path");
for i in [10, 100, 1000, 10000] {
let p = "i".repeat(i);
let buf = Utf8PathBuf::from(black_box(p));
group.bench_with_input(BenchmarkId::new("Utf8PathBuf::as_str", i), &buf, |b, i| {
b.iter(|| {
let _ = black_box(&i).as_str();
})
});
}
}

criterion_group!(benches, bench_path);
criterion_main!(benches);
25 changes: 19 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -701,10 +701,10 @@ impl Utf8Path {
/// the current directory.
///
/// * On Unix, a path is absolute if it starts with the root, so
/// `is_absolute` and [`has_root`] are equivalent.
/// `is_absolute` and [`has_root`] are equivalent.
///
/// * On Windows, a path is absolute if it has a prefix and starts with the
/// root: `c:\windows` is absolute, while `c:temp` and `\temp` are not.
/// root: `c:\windows` is absolute, while `c:temp` and `\temp` are not.
///
/// # Examples
///
Expand Down Expand Up @@ -3067,9 +3067,22 @@ impl_cmp_os_str!(&'a Utf8Path, OsString);
// invariant: OsStr must be guaranteed to be utf8 data
#[inline]
unsafe fn str_assume_utf8(string: &OsStr) -> &str {
// Adapted from the source code for Option::unwrap_unchecked.
match string.to_str() {
Some(val) => val,
None => std::hint::unreachable_unchecked(),
#[cfg(os_str_bytes)]
{
// SAFETY: OsStr is guaranteed to be utf8 data from the invariant
unsafe {
std::str::from_utf8_unchecked(
#[allow(clippy::incompatible_msrv)]
string.as_encoded_bytes(),
)
}
}
#[cfg(not(os_str_bytes))]
{
// Adapted from the source code for Option::unwrap_unchecked.
match string.to_str() {
Some(val) => val,
None => std::hint::unreachable_unchecked(),
}
}
}

0 comments on commit 44dd11c

Please sign in to comment.