Skip to content

Commit

Permalink
Correct the vqrdmlah intrinsics. (#1246)
Browse files Browse the repository at this point in the history
  • Loading branch information
JamieCunliffe authored Nov 4, 2021
1 parent 2eb8706 commit 219cf81
Show file tree
Hide file tree
Showing 4 changed files with 274 additions and 282 deletions.
274 changes: 256 additions & 18 deletions crates/core_arch/src/aarch64/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9639,58 +9639,176 @@ pub unsafe fn vqrdmulhs_laneq_s32<const LANE: i32>(a: i32, b: int32x4_t) -> i32

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrdmulh))]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah))]
pub unsafe fn vqrdmlah_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
vqadd_s16(a, vqrdmulh_s16(b, c))
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah))]
pub unsafe fn vqrdmlahq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
vqaddq_s16(a, vqrdmulhq_s16(b, c))
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah))]
pub unsafe fn vqrdmlah_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
vqadd_s32(a, vqrdmulh_s32(b, c))
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah))]
pub unsafe fn vqrdmlahq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
vqaddq_s32(a, vqrdmulhq_s32(b, c))
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah))]
pub unsafe fn vqrdmlahh_s16(a: i16, b: i16, c: i16) -> i16 {
vqaddh_s16(a, vqrdmulhh_s16(b, c))
let a: int16x4_t = vdup_n_s16(a);
let b: int16x4_t = vdup_n_s16(b);
let c: int16x4_t = vdup_n_s16(c);
simd_extract(vqrdmlah_s16(a, b, c), 0)
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrdmulh))]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah))]
pub unsafe fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 {
vqadds_s32(a, vqrdmulhs_s32(b, c))
let a: int32x2_t = vdup_n_s32(a);
let b: int32x2_t = vdup_n_s32(b);
let c: int32x2_t = vdup_n_s32(c);
simd_extract(vqrdmlah_s32(a, b, c), 0)
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlah_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t {
static_assert_imm2!(LANE);
vqadd_s16(a, vqrdmulh_lane_s16::<LANE>(b, c))
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlah_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t {
static_assert_imm3!(LANE);
vqadd_s16(a, vqrdmulh_laneq_s16::<LANE>(b, c))
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlahq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t {
static_assert_imm2!(LANE);
vqaddq_s16(a, vqrdmulhq_lane_s16::<LANE>(b, c))
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlahq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t {
static_assert_imm3!(LANE);
vqaddq_s16(a, vqrdmulhq_laneq_s16::<LANE>(b, c))
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlah_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t {
static_assert_imm1!(LANE);
vqadd_s32(a, vqrdmulh_lane_s32::<LANE>(b, c))
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlah_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t {
static_assert_imm2!(LANE);
vqadd_s32(a, vqrdmulh_laneq_s32::<LANE>(b, c))
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlahq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t {
static_assert_imm1!(LANE);
vqaddq_s32(a, vqrdmulhq_lane_s32::<LANE>(b, c))
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlahq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t {
static_assert_imm2!(LANE);
vqaddq_s32(a, vqrdmulhq_laneq_s32::<LANE>(b, c))
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlahh_lane_s16<const LANE: i32>(a: i16, b: i16, c: int16x4_t) -> i16 {
static_assert_imm2!(LANE);
vqaddh_s16(a, vqrdmulhh_lane_s16::<LANE>(b, c))
vqrdmlahh_s16(a, b, simd_extract(c, LANE as u32))
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlahh_laneq_s16<const LANE: i32>(a: i16, b: i16, c: int16x8_t) -> i16 {
static_assert_imm3!(LANE);
vqaddh_s16(a, vqrdmulhh_laneq_s16::<LANE>(b, c))
vqrdmlahh_s16(a, b, simd_extract(c, LANE as u32))
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlahs_lane_s32<const LANE: i32>(a: i32, b: i32, c: int32x2_t) -> i32 {
static_assert_imm1!(LANE);
vqadds_s32(a, vqrdmulhs_lane_s32::<LANE>(b, c))
vqrdmlahs_s32(a, b, simd_extract(c, LANE as u32))
}

/// Signed saturating rounding doubling multiply accumulate returning high half
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrdmulh, LANE = 1))]
#[target_feature(enable = "rdm")]
#[cfg_attr(test, assert_instr(sqrdmlah, LANE = 1))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn vqrdmlahs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t) -> i32 {
static_assert_imm2!(LANE);
vqadds_s32(a, vqrdmulhs_laneq_s32::<LANE>(b, c))
vqrdmlahs_s32(a, b, simd_extract(c, LANE as u32))
}

/// Signed saturating rounding doubling multiply subtract returning high half
Expand Down Expand Up @@ -20709,6 +20827,46 @@ mod test {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vqrdmlah_s16() {
let a: i16x4 = i16x4::new(1, 1, 1, 1);
let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
let c: i16x4 = i16x4::new(2, 2, 2, 2);
let e: i16x4 = i16x4::new(3, 3, 3, 3);
let r: i16x4 = transmute(vqrdmlah_s16(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vqrdmlahq_s16() {
let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
let c: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
let e: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
let r: i16x8 = transmute(vqrdmlahq_s16(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vqrdmlah_s32() {
let a: i32x2 = i32x2::new(1, 1);
let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
let c: i32x2 = i32x2::new(2, 2);
let e: i32x2 = i32x2::new(3, 3);
let r: i32x2 = transmute(vqrdmlah_s32(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vqrdmlahq_s32() {
let a: i32x4 = i32x4::new(1, 1, 1, 1);
let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
let c: i32x4 = i32x4::new(2, 2, 2, 2);
let e: i32x4 = i32x4::new(3, 3, 3, 3);
let r: i32x4 = transmute(vqrdmlahq_s32(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vqrdmlahh_s16() {
let a: i16 = 1;
Expand All @@ -20729,6 +20887,86 @@ mod test {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vqrdmlah_lane_s16() {
let a: i16x4 = i16x4::new(1, 1, 1, 1);
let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
let c: i16x4 = i16x4::new(0, 2, 0, 0);
let e: i16x4 = i16x4::new(3, 3, 3, 3);
let r: i16x4 = transmute(vqrdmlah_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vqrdmlah_laneq_s16() {
let a: i16x4 = i16x4::new(1, 1, 1, 1);
let b: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
let e: i16x4 = i16x4::new(3, 3, 3, 3);
let r: i16x4 = transmute(vqrdmlah_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vqrdmlahq_lane_s16() {
let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
let c: i16x4 = i16x4::new(0, 2, 0, 0);
let e: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
let r: i16x8 = transmute(vqrdmlahq_lane_s16::<1>(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vqrdmlahq_laneq_s16() {
let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
let b: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
let c: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
let e: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3);
let r: i16x8 = transmute(vqrdmlahq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vqrdmlah_lane_s32() {
let a: i32x2 = i32x2::new(1, 1);
let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
let c: i32x2 = i32x2::new(0, 2);
let e: i32x2 = i32x2::new(3, 3);
let r: i32x2 = transmute(vqrdmlah_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vqrdmlah_laneq_s32() {
let a: i32x2 = i32x2::new(1, 1);
let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
let c: i32x4 = i32x4::new(0, 2, 0, 0);
let e: i32x2 = i32x2::new(3, 3);
let r: i32x2 = transmute(vqrdmlah_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vqrdmlahq_lane_s32() {
let a: i32x4 = i32x4::new(1, 1, 1, 1);
let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
let c: i32x2 = i32x2::new(0, 2);
let e: i32x4 = i32x4::new(3, 3, 3, 3);
let r: i32x4 = transmute(vqrdmlahq_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vqrdmlahq_laneq_s32() {
let a: i32x4 = i32x4::new(1, 1, 1, 1);
let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
let c: i32x4 = i32x4::new(0, 2, 0, 0);
let e: i32x4 = i32x4::new(3, 3, 3, 3);
let r: i32x4 = transmute(vqrdmlahq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vqrdmlahh_lane_s16() {
let a: i16 = 1;
Expand Down
Loading

0 comments on commit 219cf81

Please sign in to comment.