diff --git a/cranelift-codegen/meta/src/isa/x86/encodings.rs b/cranelift-codegen/meta/src/isa/x86/encodings.rs index b752b6bfa..f1df48f13 100644 --- a/cranelift-codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift-codegen/meta/src/isa/x86/encodings.rs @@ -1798,23 +1798,22 @@ pub(crate) fn define( } // SIMD extractlane - let mut x86_pextr_mapping: HashMap)> = - HashMap::new(); - x86_pextr_mapping.insert(8, (&PEXTRB, Some(use_sse41_simd))); - x86_pextr_mapping.insert(16, (&PEXTRW_SSE2, None)); - x86_pextr_mapping.insert(32, (&PEXTR, Some(use_sse41_simd))); - x86_pextr_mapping.insert(64, (&PEXTR, Some(use_sse41_simd))); + let mut x86_pextr_mapping: HashMap = HashMap::new(); + x86_pextr_mapping.insert(8, &PEXTRB); + x86_pextr_mapping.insert(16, &PEXTRW); + x86_pextr_mapping.insert(32, &PEXTR); + x86_pextr_mapping.insert(64, &PEXTR); for ty in ValueType::all_lane_types().filter(allowed_simd_type) { - if let Some((opcode, isap)) = x86_pextr_mapping.get(&ty.lane_bits()) { + if let Some(opcode) = x86_pextr_mapping.get(&ty.lane_bits()) { let instruction = x86_pextr.bind_vector_from_lane(ty, sse_vector_size); let template = rec_r_ib_unsigned_gpr.opcodes(opcode); if ty.lane_bits() < 64 { - e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap.clone()); + e.enc_32_64_maybe_isap(instruction, template.nonrex(), Some(use_sse41_simd)); } else { // It turns out the 64-bit widths have REX/W encodings and only are available on // x86_64. - e.enc64_maybe_isap(instruction, template.rex().w(), isap.clone()); + e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd)); } } } diff --git a/cranelift-codegen/meta/src/isa/x86/opcodes.rs b/cranelift-codegen/meta/src/isa/x86/opcodes.rs index 6ae740883..706774a3d 100644 --- a/cranelift-codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift-codegen/meta/src/isa/x86/opcodes.rs @@ -269,8 +269,8 @@ pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16]; /// Extract byte (SSE4.1). pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14]; -/// Extract word (SSE2). There is a 4-byte SSE4.1 variant that can also move to m/16. -pub static PEXTRW_SSE2: [u8; 3] = [0x66, 0x0f, 0xc5]; +/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16. +pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15]; /// Insert doubleword or quadword, depending on REX.W (SSE4.1). pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22]; diff --git a/filetests/isa/x86/extractlane-binemit.clif b/filetests/isa/x86/extractlane-binemit.clif index 86f16315c..d1478b99d 100644 --- a/filetests/isa/x86/extractlane-binemit.clif +++ b/filetests/isa/x86/extractlane-binemit.clif @@ -17,7 +17,7 @@ function %test_extractlane_i16() { ebb0: [-, %rax] v0 = iconst.i16 4 [-, %xmm1] v1 = splat.i16x8 v0 -[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f c5 c8 04 +[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f 3a 15 c8 04 return }