Skip to content

Commit

Permalink
Fix 16-bit x86_pextr encoding
Browse files Browse the repository at this point in the history
The x86 ISA has (at least) two encodings for PEXTRW:
 1. in the SSE2 opcode (66 0f c5) the XMM operand uses r/m and the GPR operand uses reg
 2. in the SSE4.1 opcode (66 0f 3a 15) the XMM operand uses reg and the GPR operand uses r/m

This changes the 16-bit x86_pextr encoding from #1 to #2 to match the other PEXTR* implementations (all #2 style).
  • Loading branch information
abrown committed Sep 27, 2019
1 parent 9ec2a73 commit e2d46dd
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 12 deletions.
17 changes: 8 additions & 9 deletions cranelift-codegen/meta/src/isa/x86/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1798,23 +1798,22 @@ pub(crate) fn define(
}

// SIMD extractlane
let mut x86_pextr_mapping: HashMap<u64, (&'static [u8], Option<SettingPredicateNumber>)> =
HashMap::new();
x86_pextr_mapping.insert(8, (&PEXTRB, Some(use_sse41_simd)));
x86_pextr_mapping.insert(16, (&PEXTRW_SSE2, None));
x86_pextr_mapping.insert(32, (&PEXTR, Some(use_sse41_simd)));
x86_pextr_mapping.insert(64, (&PEXTR, Some(use_sse41_simd)));
let mut x86_pextr_mapping: HashMap<u64, &'static [u8]> = HashMap::new();
x86_pextr_mapping.insert(8, &PEXTRB);
x86_pextr_mapping.insert(16, &PEXTRW);
x86_pextr_mapping.insert(32, &PEXTR);
x86_pextr_mapping.insert(64, &PEXTR);

for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
if let Some((opcode, isap)) = x86_pextr_mapping.get(&ty.lane_bits()) {
if let Some(opcode) = x86_pextr_mapping.get(&ty.lane_bits()) {
let instruction = x86_pextr.bind_vector_from_lane(ty, sse_vector_size);
let template = rec_r_ib_unsigned_gpr.opcodes(opcode);
if ty.lane_bits() < 64 {
e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap.clone());
e.enc_32_64_maybe_isap(instruction, template.nonrex(), Some(use_sse41_simd));
} else {
// It turns out the 64-bit widths have REX/W encodings and only are available on
// x86_64.
e.enc64_maybe_isap(instruction, template.rex().w(), isap.clone());
e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd));
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions cranelift-codegen/meta/src/isa/x86/opcodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,8 @@ pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16];
/// Extract byte (SSE4.1).
pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14];

/// Extract word (SSE2). There is a 4-byte SSE4.1 variant that can also move to m/16.
pub static PEXTRW_SSE2: [u8; 3] = [0x66, 0x0f, 0xc5];
/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16.
pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15];

/// Insert doubleword or quadword, depending on REX.W (SSE4.1).
pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22];
Expand Down
2 changes: 1 addition & 1 deletion filetests/isa/x86/extractlane-binemit.clif
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ function %test_extractlane_i16() {
ebb0:
[-, %rax] v0 = iconst.i16 4
[-, %xmm1] v1 = splat.i16x8 v0
[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f c5 c8 04
[-, %rax] v2 = x86_pextr v1, 4 ; bin: 66 0f 3a 15 c8 04
return
}

Expand Down

0 comments on commit e2d46dd

Please sign in to comment.