Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🚀 float NaN handling #21

Merged
merged 49 commits into from
Feb 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
edd083c
:recycle: prepare for float NaN handling
jvdd Feb 8, 2023
878dbdc
:broom: separate float & (u)int in ArgMinMax trait macro
jvdd Feb 8, 2023
ec9dd42
:see_no_evil: add temporary switched f16
jvdd Feb 8, 2023
4c27423
:recycle: major refactoring
jvdd Feb 9, 2023
728e310
Adjust NEON simd types to support new expanded NaN supporting trait v…
varon Feb 9, 2023
718a0ee
:fire: getting there
jvdd Feb 9, 2023
a046743
:see_no_evil: quickfix benches
jvdd Feb 9, 2023
97adc4f
Merge pull request #23 from jvdd/nans_v4
jvdd Feb 9, 2023
b58d0d6
:see_no_evil: enable correct target feature for u32 AVX2
jvdd Feb 10, 2023
ab36c08
:thinking: add simd f64 return nans + cleanup code
jvdd Feb 10, 2023
ddad0a5
:bulb: account for missing srai 64-bit instruction on SSE & AVX
jvdd Feb 10, 2023
4cf61d8
:broom: revert prev commit :/
jvdd Feb 11, 2023
599d247
:pray: pave the path towards ReturnNan default argminmax for floats
jvdd Feb 11, 2023
4b81161
:broom: formatting
jvdd Feb 11, 2023
1b6231c
Merge branch 'neon-nan-v3' of https://github.com/varon/argminmax into…
jvdd Feb 11, 2023
c3ddfeb
:pray: resolve Aarch64 implements merge conflicts on x86_64
jvdd Feb 11, 2023
a3f7c4b
:pizza: update NEON SIMD to latest changes
jvdd Feb 11, 2023
3324014
:broom: fix cargo clippy warnings
jvdd Feb 11, 2023
56c09cd
:bug: check for correct SIMD feature avx -> avx2
jvdd Feb 11, 2023
ccf0f8f
:fire: implement return-nan argminmax for NEON f32
jvdd Feb 11, 2023
d8da46c
Merge pull request #25 from jvdd/varon-neon-nan-v3
jvdd Feb 11, 2023
fa86ca8
:pen: add some documentation to float SIMD
jvdd Feb 12, 2023
d8f0896
:see_no_evil: resolve merge conflict
jvdd Feb 12, 2023
2cf885b
:broom: improve variable names
jvdd Feb 12, 2023
aa14124
:broom: shorter imports
jvdd Feb 12, 2023
6026ad8
:sparkles: scalar ignore_nans + ignore_nans & inf tests
jvdd Feb 12, 2023
b5390b6
:fire: test return nans + inf handling + minor bug fix
jvdd Feb 12, 2023
2c2ab4e
:bug: minor bug fix + add tests
jvdd Feb 12, 2023
87422b7
:see_no_evil: remove avx512f target feature check in NEON tests
jvdd Feb 12, 2023
a52571e
:tada: use correct SCALAR implementation
jvdd Feb 12, 2023
de00a6f
:recycle: move duplicate code to dedicated method
jvdd Feb 12, 2023
7b12400
:muscle: make return nan default implementation
jvdd Feb 12, 2023
4dbd0f4
:bug: same scalar f16 impl for ARM/Aarch64 as for x86_64
jvdd Feb 12, 2023
c2b386a
:pray: update benches b4 renaming
jvdd Feb 12, 2023
e65d89b
:see_no_evil: enable correct target feature for f64 SSE
jvdd Feb 12, 2023
fd7edfb
:broom:
jvdd Feb 13, 2023
30fad1c
:broom: fix clippy warnings
jvdd Feb 14, 2023
1b4bfe6
:pray: fix benches on ARM/Aarch64
jvdd Feb 16, 2023
1d95552
:sparkles: add benches for nanargminmax
jvdd Feb 17, 2023
859ac95
:bug: minor bugfix in benches
jvdd Feb 17, 2023
14f5070
:broom: num_traits Float -> FloatCore
jvdd Feb 24, 2023
dd698cf
:bike: default behavior = ignore NaNs
jvdd Feb 25, 2023
4fcc780
:hourglass: update benches
jvdd Feb 25, 2023
c493e39
:broom:
jvdd Feb 25, 2023
2e28b19
:bug: call correct implementation
jvdd Feb 25, 2023
439b58b
:bug: call correct implementation for f16
jvdd Feb 25, 2023
049e9d3
Merge pull request #28 from jvdd/nans_change_default
jvdd Feb 25, 2023
84b6518
:mag: own code review!
jvdd Feb 26, 2023
d53e09c
:pen: code review
jvdd Feb 26, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,38 @@ arrow = { version = "*", default-features = false, optional = true}
default = ["half"] # TODO: remove this as default feature as soon as https://github.com/CodSpeedHQ/codspeed-rust/issues/1 is fixed

[dev-dependencies]
# rstest = { version = "0.16", default-features = false}
# rstest_reuse = "0.5"
Comment on lines +25 to +26
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is something I experimented with and will use in a future PR (parameterizing the tests)

codspeed-criterion-compat = "1.0.1"
criterion = "0.3.1"
dev_utils = { path = "dev_utils" }


[[bench]]
name = "bench_f16"
name = "bench_f16_return_nan"
harness = false
required-features = ["half"]

# TODO: support this
# [[bench]]
# name = "bench_f16_ignore_nan"
# harness = false
# required-features = ["half"]
Comment on lines +37 to +41
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is currently not supported as we use the ord_transform to provide SIMD support for the non-hardware supported f16 datatype (see #1)


[[bench]]
name = "bench_f32_return_nan"
harness = false

[[bench]]
name = "bench_f32_ignore_nan"
harness = false

[[bench]]
name = "bench_f32"
name = "bench_f64_return_nan"
harness = false

[[bench]]
name = "bench_f64"
name = "bench_f64_ignore_nan"
harness = false

[[bench]]
Expand Down
214 changes: 0 additions & 214 deletions benches/bench_f16.rs

This file was deleted.

83 changes: 83 additions & 0 deletions benches/bench_f16_return_nan.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#![feature(stdsimd)]

extern crate dev_utils;

#[cfg(feature = "half")]
use argminmax::ArgMinMax;
use codspeed_criterion_compat::*;
use dev_utils::{config, utils};

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use argminmax::{SIMDArgMinMax, AVX2, AVX512, SSE};
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
use argminmax::{SIMDArgMinMax, NEON};
use argminmax::{ScalarArgMinMax, SCALAR};

#[cfg(feature = "half")]
use half::f16;

#[cfg(feature = "half")]
fn get_random_f16_array(n: usize) -> Vec<f16> {
let data = utils::get_random_array::<u16>(n, u16::MIN, u16::MAX);
let data: Vec<f16> = data.iter().map(|&x| f16::from_bits(x)).collect();
// Replace NaNs and Infs with 0
let data: Vec<f16> = data
.iter()
.map(|&x| {
if x.is_nan() || x.is_infinite() {
f16::from_bits(0)
} else {
x
}
})
.collect();
data
}

// TODO: rename _random_long_ to _nanargminmax_
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do this in a separate PR (cleaning up the benchmarks; renaming + removing unused benchmarks)

#[cfg(feature = "half")]
fn nanargminmax_f16_random_array_long(c: &mut Criterion) {
let n = config::ARRAY_LENGTH_LONG;
let data: &[f16] = &get_random_f16_array(n);
c.bench_function("scalar_random_long_f16", |b| {
b.iter(|| SCALAR::argminmax(black_box(data)))
});
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("sse4.1") {
c.bench_function("sse_random_long_f16", |b| {
b.iter(|| unsafe { SSE::argminmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx2") {
c.bench_function("avx2_random_long_f16", |b| {
b.iter(|| unsafe { AVX2::argminmax(black_box(data)) })
});
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
if is_x86_feature_detected!("avx512bw") {
c.bench_function("avx512_random_long_f16", |b| {
b.iter(|| unsafe { AVX512::argminmax(black_box(data)) })
});
}
#[cfg(target_arch = "arm")]
if std::arch::is_arm_feature_detected!("neon") {
c.bench_function("neon_random_long_f16", |b| {
b.iter(|| unsafe { NEON::argminmax(black_box(data)) })
});
}
#[cfg(target_arch = "aarch64")]
if std::arch::is_aarch64_feature_detected!("neon") {
c.bench_function("neon_random_long_f16", |b| {
b.iter(|| unsafe { NEON::argminmax(black_box(data)) })
});
}
c.bench_function("impl_random_long_f16", |b| {
b.iter(|| black_box(data.nanargminmax()))
});
}

#[cfg(feature = "half")]
criterion_group!(benches, nanargminmax_f16_random_array_long,);
#[cfg(feature = "half")]
criterion_main!(benches);
Loading