Skip to content

Commit

Permalink
Add more mixed arithmetic instructions (rust-lang#541)
Browse files Browse the repository at this point in the history
* Add Vector Multiply Add

* Add Vector Negative Multiply Subtract

* Add Vector Sum Across Partial (1/2) Saturated

* Add Vector Sum Across Partial (1/4) Saturated
  • Loading branch information
lu-zero authored and alexcrichton committed Jul 26, 2018
1 parent 3e2efe9 commit 417add6
Showing 1 changed file with 262 additions and 0 deletions.
262 changes: 262 additions & 0 deletions coresimd/powerpc/altivec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,99 @@ extern "C" {
#[link_name = "llvm.ppc.altivec.vmsumshm"]
fn vmsumshm(
a: vector_signed_short, b: vector_signed_short,c: vector_signed_int) -> vector_signed_int;
#[link_name = "llvm.ppc.altivec.vmaddfp"]
fn vmaddfp(
a: vector_float, b: vector_float, c: vector_float) -> vector_float;
#[link_name = "llvm.ppc.altivec.vnmsubfp"]
fn vnmsubfp(
a: vector_float, b: vector_float, c: vector_float) -> vector_float;
#[link_name = "llvm.ppc.altivec.vsum2sws"]
fn vsum2sws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;
#[link_name = "llvm.ppc.altivec.vsum4ubs"]
fn vsum4ubs(a: vector_unsigned_char, b: vector_unsigned_int) -> vector_unsigned_int;
#[link_name = "llvm.ppc.altivec.vsum4sbs"]
fn vsum4sbs(a: vector_signed_char, b: vector_signed_int) -> vector_signed_int;
#[link_name = "llvm.ppc.altivec.vsum4shs"]
fn vsum4shs(a: vector_signed_short, b: vector_signed_int) -> vector_signed_int;
}

mod sealed {

use super::*;

#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(test, assert_instr(vsum4ubs))]
unsafe fn vec_vsum4ubs(a: vector_unsigned_char, b: vector_unsigned_int) -> vector_unsigned_int {
vsum4ubs(a, b)
}

#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(test, assert_instr(vsum4sbs))]
unsafe fn vec_vsum4sbs(a: vector_signed_char, b: vector_signed_int) -> vector_signed_int {
vsum4sbs(a, b)
}

#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(test, assert_instr(vsum4shs))]
unsafe fn vec_vsum4shs(a: vector_signed_short, b: vector_signed_int) -> vector_signed_int {
vsum4shs(a, b)
}


pub trait VectorSum4s<Other> {
unsafe fn vec_sum4s(self, b: Other) -> Other;
}

impl VectorSum4s<vector_unsigned_int> for vector_unsigned_char {
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn vec_sum4s(self, b: vector_unsigned_int) -> vector_unsigned_int {
vsum4ubs(self, b)
}
}

impl VectorSum4s<vector_signed_int> for vector_signed_char {
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn vec_sum4s(self, b: vector_signed_int) -> vector_signed_int {
vsum4sbs(self, b)
}
}

impl VectorSum4s<vector_signed_int> for vector_signed_short {
#[inline]
#[target_feature(enable = "altivec")]
unsafe fn vec_sum4s(self, b: vector_signed_int) -> vector_signed_int {
vsum4shs(self, b)
}
}

#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(test, assert_instr(vsum2sws))]
unsafe fn vec_vsum2sws(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
vsum2sws(a, b)
}

#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(test, assert_instr(vnmsubfp))]
unsafe fn vec_vnmsubfp(
a: vector_float, b: vector_float, c: vector_float) -> vector_float {
vnmsubfp(a, b, c)
}

#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(test, assert_instr(vmaddfp))]
unsafe fn vec_vmaddfp(
a: vector_float, b: vector_float, c: vector_float) -> vector_float {
vmaddfp(a, b, c)
}

#[inline]
#[target_feature(enable = "altivec")]
#[cfg_attr(test, assert_instr(vmsumubm))]
Expand Down Expand Up @@ -582,6 +669,20 @@ mod endian {

b.vec_vperm(a, c)
}

/// Vector Sum Across Partial (1/2) Saturated
#[inline]
#[target_feature(enable = "altivec")]
pub unsafe fn vec_sum2s(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
// vsum2sws has big-endian bias
//
// swap the even b elements with the odd ones
let flip = ::mem::transmute(u8x16::new(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11));
let b = vec_perm(b, b, flip);
let c = vsum2sws(a, b);

vec_perm(c, c, flip)
}
}

/// Vector Multiply Add Saturated
Expand Down Expand Up @@ -629,6 +730,29 @@ pub unsafe fn vec_msums<T, U>(a: T, b: T, c: U) -> U
a.vec_msums(b, c)
}

/// Vector Multiply Add
#[inline]
#[target_feature(enable = "altivec")]
pub unsafe fn vec_madd(a: vector_float, b: vector_float, c: vector_float) -> vector_float {
vmaddfp(a, b, c)
}

/// Vector Negative Multiply Subtract
#[inline]
#[target_feature(enable = "altivec")]
pub unsafe fn vec_nmsub(a: vector_float, b: vector_float, c: vector_float) -> vector_float {
vnmsubfp(a, b, c)
}

/// Vector Sum Across Partial (1/4) Saturated
#[inline]
#[target_feature(enable = "altivec")]
pub unsafe fn vec_sum4s<T, U>(a: T, b: U) -> U
where
T: sealed::VectorSum4s<U> {
a.vec_sum4s(b)
}

#[cfg(target_endian = "big")]
mod endian {
use super::*;
Expand All @@ -641,6 +765,13 @@ mod endian {
{
a.vec_vperm(b, c)
}

/// Vector Sum Across Partial (1/2) Saturated
#[inline]
#[target_feature(enable = "altivec")]
pub unsafe fn vec_sum2s(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int {
vsum2sws(a, b)
}
}

pub use self::endian::*;
Expand Down Expand Up @@ -768,6 +899,34 @@ mod tests {
assert_eq!(d, ::mem::transmute(vec_madds(a, b, c)));
}

#[simd_test(enable = "altivec")]
unsafe fn test_vec_madd_float() {
let a: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
let b: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
let c: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
let d = f32x4::new(
0.1 * 0.1 + 0.1,
0.2 * 0.2 + 0.2,
0.3 * 0.3 + 0.3,
0.4 * 0.4 + 0.4);

assert_eq!(d, ::mem::transmute(vec_madd(a, b, c)));
}

#[simd_test(enable = "altivec")]
unsafe fn test_vec_nmsub_float() {
let a: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
let b: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
let c: vector_float = ::mem::transmute(f32x4::new(0.1, 0.2, 0.3, 0.4));
let d = f32x4::new(
-(0.1 * 0.1 - 0.1),
-(0.2 * 0.2 - 0.2),
-(0.3 * 0.3 - 0.3),
-(0.4 * 0.4 - 0.4),
);
assert_eq!(d, ::mem::transmute(vec_nmsub(a, b, c)));
}

#[simd_test(enable = "altivec")]
unsafe fn test_vec_mradds() {
let a: vector_signed_short = ::mem::transmute(i16x8::new(
Expand Down Expand Up @@ -991,6 +1150,109 @@ mod tests {
assert_eq!(d, ::mem::transmute(vec_msums(a, b, c)));
}

#[simd_test(enable = "altivec")]
unsafe fn test_vec_sum2s() {
let a: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3));
let b: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3));
let d = i32x4::new(
0,
0 + 1 + 1,
0,
2 + 3 + 3);

assert_eq!(d, ::mem::transmute(vec_sum2s(a, b)));
}

#[simd_test(enable = "altivec")]
unsafe fn test_vec_sum4s_unsigned_char() {
let a: vector_unsigned_char = ::mem::transmute(u8x16::new(
0,
1,
2,
3,

4,
5,
6,
7,

0,
1,
2,
3,

4,
5,
6,
7,
));
let b: vector_unsigned_int = ::mem::transmute(u32x4::new(0, 1, 2, 3));
let d = u32x4::new(
0 + 1 + 2 + 3 + 0,
4 + 5 + 6 + 7 + 1,
0 + 1 + 2 + 3 + 2,
4 + 5 + 6 + 7 + 3,
);

assert_eq!(d, ::mem::transmute(vec_sum4s(a, b)));
}
#[simd_test(enable = "altivec")]
unsafe fn test_vec_sum4s_signed_char() {
let a: vector_signed_char = ::mem::transmute(i8x16::new(
0,
1,
2,
3,

4,
5,
6,
7,

0,
1,
2,
3,

4,
5,
6,
7,
));
let b: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3));
let d = i32x4::new(
0 + 1 + 2 + 3 + 0,
4 + 5 + 6 + 7 + 1,
0 + 1 + 2 + 3 + 2,
4 + 5 + 6 + 7 + 3,
);

assert_eq!(d, ::mem::transmute(vec_sum4s(a, b)));
}
#[simd_test(enable = "altivec")]
unsafe fn test_vec_sum4s_signed_short() {
let a: vector_signed_short = ::mem::transmute(i16x8::new(
0,
1,
2,
3,

4,
5,
6,
7,
));
let b: vector_signed_int = ::mem::transmute(i32x4::new(0, 1, 2, 3));
let d = i32x4::new(
0 + 1 + 0,
2 + 3 + 1,
4 + 5 + 2,
6 + 7 + 3,
);

assert_eq!(d, ::mem::transmute(vec_sum4s(a, b)));
}

#[simd_test(enable = "altivec")]
unsafe fn vec_add_i32x4_i32x4() {
let x = i32x4::new(1, 2, 3, 4);
Expand Down

0 comments on commit 417add6

Please sign in to comment.