From 7b49e4d6d63c80307b92e26e6d350cd84be24357 Mon Sep 17 00:00:00 2001 From: Gregory Sobol Date: Fri, 30 Sep 2022 17:02:30 +0300 Subject: [PATCH 1/4] fix popcnt for m1 and add test --- lib/compiler-singlepass/src/machine_arm64.rs | 58 ++++++++++++++++++++ tests/compilers/issues.rs | 54 ++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 6f6c16d3e05..d41ca1bebf6 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -3047,9 +3047,29 @@ impl Machine for MachineARM64 { }; let label_loop = self.assembler.get_label(); let label_exit = self.assembler.get_label(); + + // If target is apple M1 processor, then: + // ``` + // mov w1, 1 + // mov w2, 32 + // lsl w3, w1, w2 + // ``` + // result in `w3` register is not zero, + // in spite of the fact that in Aarch64 documentation the result must be 0. + #[cfg(target_vendor = "apple")] + let src_is_one_label = self.assembler.get_label(); + self.assembler .emit_mov(Size::S32, Location::GPR(GPR::XzrSp), dest)?; // 0 => dest self.assembler.emit_cbz_label(Size::S32, src, label_exit)?; // src==0, exit + + // If `src` reg value equal to `1`, then skip loop and go to `src_is_one_label`. + #[cfg(target_vendor = "apple")] + { + self.assembler.emit_cmp(Size::S32, Location::Imm32(1), src)?; // cmp `src`, 1 + self.assembler.emit_bcond_label(Condition::Eq, src_is_one_label)?; // if `src` == 1 goto src_is_one_label + } + self.assembler.emit_label(label_loop)?; // loop: self.assembler .emit_add(Size::S32, dest, Location::Imm8(1), dest)?; // inc dest @@ -3058,6 +3078,15 @@ impl Machine for MachineARM64 { .emit_add(Size::S32, tmp, Location::Imm8(1), tmp)?; // inc tmp self.assembler.emit_lsl(Size::S32, src, tmp, src)?; // src << tmp => src self.assembler.emit_cbnz_label(Size::S32, src, label_loop)?; // if src!=0 goto loop + + // Write 1 to `dest`, in case `src` is equal to 1. + #[cfg(target_vendor = "apple")] + { + self.assembler.emit_b_label(label_exit)?; // if comes from loop then goto exit + self.assembler.emit_label(src_is_one_label)?; + self.assembler.emit_mov(Size::S32, Location::Imm32(1), dest)?; // `dest` <= 1 + } + self.assembler.emit_label(label_exit)?; if ret != dest { self.move_location(Size::S32, dest, ret)?; @@ -4085,9 +4114,29 @@ impl Machine for MachineARM64 { }; let label_loop = self.assembler.get_label(); let label_exit = self.assembler.get_label(); + + // If target is apple M1 processor, then: + // ``` + // mov x1, 1 + // mov x2, 32 + // lsl x3, x1, x2 + // ``` + // result in `x3` register is not zero, + // in spite of the fact that in Aarch64 documentation the result must be 0. + #[cfg(target_vendor = "apple")] + let src_is_one_label = self.assembler.get_label(); + self.assembler .emit_mov(Size::S32, Location::GPR(GPR::XzrSp), dest)?; self.assembler.emit_cbz_label(Size::S64, src, label_exit)?; + + // If `src` reg value equal to `1`, then skip loop and go to `src_is_one_label`. + #[cfg(target_vendor = "apple")] + { + self.assembler.emit_cmp(Size::S64, Location::Imm64(1), src)?; // cmp `src`, 1 + self.assembler.emit_bcond_label(Condition::Eq, src_is_one_label)?; // if `src` == 1 goto src_is_one_label + } + self.assembler.emit_label(label_loop)?; self.assembler .emit_add(Size::S32, dest, Location::Imm8(1), dest)?; @@ -4096,6 +4145,15 @@ impl Machine for MachineARM64 { .emit_add(Size::S32, tmp, Location::Imm8(1), tmp)?; self.assembler.emit_lsl(Size::S64, src, tmp, src)?; self.assembler.emit_cbnz_label(Size::S64, src, label_loop)?; + + // Write 1 to `dest`, in case `src` is equal to 1. + #[cfg(target_vendor = "apple")] + { + self.assembler.emit_b_label(label_exit)?; // if comes from loop then goto exit + self.assembler.emit_label(src_is_one_label)?; + self.assembler.emit_mov(Size::S64, Location::Imm64(1), dest)?; // `dest` <= 1 + } + self.assembler.emit_label(label_exit)?; if ret != dest { self.move_location(Size::S64, dest, ret)?; diff --git a/tests/compilers/issues.rs b/tests/compilers/issues.rs index e8d5dfb56a6..aa8e572bf91 100644 --- a/tests/compilers/issues.rs +++ b/tests/compilers/issues.rs @@ -263,3 +263,57 @@ fn regression_gpr_exhaustion_for_calls(mut config: crate::Config) -> Result<()> let instance = Instance::new(&mut store, &module, &imports)?; Ok(()) } + +#[compiler_test(issues)] +fn test_popcnt(mut config: crate::Config) -> Result<()> { + let mut store = config.store(); + let mut env = FunctionEnv::new(&mut store, ()); + let imports: Imports = imports! {}; + + let wat = r#" + (module + (func $popcnt_i32 (export "popcnt_i32") (param i32) (result i32) + local.get 0 + i32.popcnt + ) + (func $popcnt_i64 (export "popcnt_i64") (param i64) (result i64) + local.get 0 + i64.popcnt + ) + )"#; + + let module = Module::new(&store, wat)?; + let instance = Instance::new(&mut store, &module, &imports)?; + + let popcnt_i32 = instance.exports.get_function("popcnt_i32").unwrap(); + let popcnt_i64 = instance.exports.get_function("popcnt_i64").unwrap(); + + let get_next_number_i32 = |mut num: i32| { + num ^= num << 13; + num ^= num >> 17; + num ^= num << 5; + num + }; + let get_next_number_i64 = |mut num: i64| { + num ^= num << 34; + num ^= num >> 40; + num ^= num << 7; + num + }; + + let mut num = 1; + for _ in 1..10000 { + let result = popcnt_i32.call(&mut store, &[Value::I32(num)]).unwrap(); + assert_eq!(&Value::I32(num.count_ones() as i32), result.get(0).unwrap()); + num = get_next_number_i32(num); + } + + let mut num = 1; + for _ in 1..10000 { + let result = popcnt_i64.call(&mut store, &[Value::I64(num)]).unwrap(); + assert_eq!(&Value::I64(num.count_ones() as i64), result.get(0).unwrap()); + num = get_next_number_i64(num); + } + + Ok(()) +} \ No newline at end of file From 913cb7a9a67d61bb06d7ec06a7baa0e13dd4a008 Mon Sep 17 00:00:00 2001 From: Gregory Sobol Date: Wed, 5 Oct 2022 15:21:09 +0300 Subject: [PATCH 2/4] change solution --- lib/compiler-singlepass/src/machine_arm64.rs | 78 ++------------------ 1 file changed, 8 insertions(+), 70 deletions(-) diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index d41ca1bebf6..3db42bfe5ca 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -3047,46 +3047,14 @@ impl Machine for MachineARM64 { }; let label_loop = self.assembler.get_label(); let label_exit = self.assembler.get_label(); - - // If target is apple M1 processor, then: - // ``` - // mov w1, 1 - // mov w2, 32 - // lsl w3, w1, w2 - // ``` - // result in `w3` register is not zero, - // in spite of the fact that in Aarch64 documentation the result must be 0. - #[cfg(target_vendor = "apple")] - let src_is_one_label = self.assembler.get_label(); - self.assembler .emit_mov(Size::S32, Location::GPR(GPR::XzrSp), dest)?; // 0 => dest self.assembler.emit_cbz_label(Size::S32, src, label_exit)?; // src==0, exit - - // If `src` reg value equal to `1`, then skip loop and go to `src_is_one_label`. - #[cfg(target_vendor = "apple")] - { - self.assembler.emit_cmp(Size::S32, Location::Imm32(1), src)?; // cmp `src`, 1 - self.assembler.emit_bcond_label(Condition::Eq, src_is_one_label)?; // if `src` == 1 goto src_is_one_label - } - self.assembler.emit_label(label_loop)?; // loop: - self.assembler - .emit_add(Size::S32, dest, Location::Imm8(1), dest)?; // inc dest self.assembler.emit_clz(Size::S32, src, tmp)?; // clz src => tmp - self.assembler - .emit_add(Size::S32, tmp, Location::Imm8(1), tmp)?; // inc tmp self.assembler.emit_lsl(Size::S32, src, tmp, src)?; // src << tmp => src + self.assembler.emit_lsl(Size::S32, src, Location::Imm8(1), src)?; // src << 1 => src self.assembler.emit_cbnz_label(Size::S32, src, label_loop)?; // if src!=0 goto loop - - // Write 1 to `dest`, in case `src` is equal to 1. - #[cfg(target_vendor = "apple")] - { - self.assembler.emit_b_label(label_exit)?; // if comes from loop then goto exit - self.assembler.emit_label(src_is_one_label)?; - self.assembler.emit_mov(Size::S32, Location::Imm32(1), dest)?; // `dest` <= 1 - } - self.assembler.emit_label(label_exit)?; if ret != dest { self.move_location(Size::S32, dest, ret)?; @@ -4114,46 +4082,16 @@ impl Machine for MachineARM64 { }; let label_loop = self.assembler.get_label(); let label_exit = self.assembler.get_label(); - - // If target is apple M1 processor, then: - // ``` - // mov x1, 1 - // mov x2, 32 - // lsl x3, x1, x2 - // ``` - // result in `x3` register is not zero, - // in spite of the fact that in Aarch64 documentation the result must be 0. - #[cfg(target_vendor = "apple")] - let src_is_one_label = self.assembler.get_label(); - self.assembler - .emit_mov(Size::S32, Location::GPR(GPR::XzrSp), dest)?; - self.assembler.emit_cbz_label(Size::S64, src, label_exit)?; - - // If `src` reg value equal to `1`, then skip loop and go to `src_is_one_label`. - #[cfg(target_vendor = "apple")] - { - self.assembler.emit_cmp(Size::S64, Location::Imm64(1), src)?; // cmp `src`, 1 - self.assembler.emit_bcond_label(Condition::Eq, src_is_one_label)?; // if `src` == 1 goto src_is_one_label - } - + .emit_mov(Size::S32, Location::GPR(GPR::XzrSp), dest)?; // dest <= 0 + self.assembler.emit_cbz_label(Size::S64, src, label_exit)?; // src == 0, then goto label_exit self.assembler.emit_label(label_loop)?; self.assembler - .emit_add(Size::S32, dest, Location::Imm8(1), dest)?; - self.assembler.emit_clz(Size::S64, src, tmp)?; - self.assembler - .emit_add(Size::S32, tmp, Location::Imm8(1), tmp)?; - self.assembler.emit_lsl(Size::S64, src, tmp, src)?; - self.assembler.emit_cbnz_label(Size::S64, src, label_loop)?; - - // Write 1 to `dest`, in case `src` is equal to 1. - #[cfg(target_vendor = "apple")] - { - self.assembler.emit_b_label(label_exit)?; // if comes from loop then goto exit - self.assembler.emit_label(src_is_one_label)?; - self.assembler.emit_mov(Size::S64, Location::Imm64(1), dest)?; // `dest` <= 1 - } - + .emit_add(Size::S32, dest, Location::Imm8(1), dest)?; // dest += 1 + self.assembler.emit_clz(Size::S64, src, tmp)?; // clz src => tmp + self.assembler.emit_lsl(Size::S64, src, tmp, src)?; // src << tmp => src + self.assembler.emit_lsl(Size::S64, src, Location::Imm8(1), src)?; // src << 1 => src + self.assembler.emit_cbnz_label(Size::S64, src, label_loop)?; // src != 0, then goto label_loop self.assembler.emit_label(label_exit)?; if ret != dest { self.move_location(Size::S64, dest, ret)?; From f995252c555de7f0f3b387854b9ce711b773cfa8 Mon Sep 17 00:00:00 2001 From: Gregory Sobol Date: Wed, 5 Oct 2022 15:25:02 +0300 Subject: [PATCH 3/4] fix popcnt32 --- lib/compiler-singlepass/src/machine_arm64.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index 3db42bfe5ca..b8b3818c22e 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -3051,6 +3051,8 @@ impl Machine for MachineARM64 { .emit_mov(Size::S32, Location::GPR(GPR::XzrSp), dest)?; // 0 => dest self.assembler.emit_cbz_label(Size::S32, src, label_exit)?; // src==0, exit self.assembler.emit_label(label_loop)?; // loop: + self.assembler + .emit_add(Size::S32, dest, Location::Imm8(1), dest)?; // dest += 1 self.assembler.emit_clz(Size::S32, src, tmp)?; // clz src => tmp self.assembler.emit_lsl(Size::S32, src, tmp, src)?; // src << tmp => src self.assembler.emit_lsl(Size::S32, src, Location::Imm8(1), src)?; // src << 1 => src From 25d6a49f1c40e29378497470a14a518a7adfbf60 Mon Sep 17 00:00:00 2001 From: Gregory Sobol Date: Wed, 5 Oct 2022 18:42:38 +0300 Subject: [PATCH 4/4] fmt --- lib/compiler-singlepass/src/machine_arm64.rs | 6 ++++-- tests/compilers/issues.rs | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/compiler-singlepass/src/machine_arm64.rs b/lib/compiler-singlepass/src/machine_arm64.rs index b8b3818c22e..e7aadbbfdfd 100644 --- a/lib/compiler-singlepass/src/machine_arm64.rs +++ b/lib/compiler-singlepass/src/machine_arm64.rs @@ -3055,7 +3055,8 @@ impl Machine for MachineARM64 { .emit_add(Size::S32, dest, Location::Imm8(1), dest)?; // dest += 1 self.assembler.emit_clz(Size::S32, src, tmp)?; // clz src => tmp self.assembler.emit_lsl(Size::S32, src, tmp, src)?; // src << tmp => src - self.assembler.emit_lsl(Size::S32, src, Location::Imm8(1), src)?; // src << 1 => src + self.assembler + .emit_lsl(Size::S32, src, Location::Imm8(1), src)?; // src << 1 => src self.assembler.emit_cbnz_label(Size::S32, src, label_loop)?; // if src!=0 goto loop self.assembler.emit_label(label_exit)?; if ret != dest { @@ -4092,7 +4093,8 @@ impl Machine for MachineARM64 { .emit_add(Size::S32, dest, Location::Imm8(1), dest)?; // dest += 1 self.assembler.emit_clz(Size::S64, src, tmp)?; // clz src => tmp self.assembler.emit_lsl(Size::S64, src, tmp, src)?; // src << tmp => src - self.assembler.emit_lsl(Size::S64, src, Location::Imm8(1), src)?; // src << 1 => src + self.assembler + .emit_lsl(Size::S64, src, Location::Imm8(1), src)?; // src << 1 => src self.assembler.emit_cbnz_label(Size::S64, src, label_loop)?; // src != 0, then goto label_loop self.assembler.emit_label(label_exit)?; if ret != dest { diff --git a/tests/compilers/issues.rs b/tests/compilers/issues.rs index aa8e572bf91..f8cd10f51e2 100644 --- a/tests/compilers/issues.rs +++ b/tests/compilers/issues.rs @@ -316,4 +316,4 @@ fn test_popcnt(mut config: crate::Config) -> Result<()> { } Ok(()) -} \ No newline at end of file +}