Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Discuss implementing constants for SIMD #1

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions cranelift-codegen/meta/src/cdsl/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ impl InstructionGroup {
pub fn by_name(&self, name: &'static str) -> &Instruction {
self.instructions
.iter()
.find(|inst| inst.name == name)
.find(|inst| &inst.name == name)
.expect(&format!("unexisting instruction with name {}", name))
}
}
Expand Down Expand Up @@ -155,7 +155,7 @@ impl ops::Deref for Instruction {

impl Instruction {
pub fn snake_name(&self) -> &str {
if self.name == "return" {
if &self.name == "return" {
"return_"
} else {
&self.name
Expand All @@ -177,8 +177,8 @@ impl Instruction {
bind(self.clone(), Some(lane_type.into()), Vec::new())
}

pub fn bind_vector(&self, lane_type: impl Into<LaneType>, num_lanes: u64) -> BoundInstruction {
bind_vector(self.clone(), lane_type.into(), num_lanes, Vec::new())
pub fn bind_vector_from_lane(&self, lane_type: impl Into<LaneType>) -> BoundInstruction {
bind_vector(self.clone(), lane_type.into(), Vec::new())
}

pub fn bind_any(&self) -> BoundInstruction {
Expand Down Expand Up @@ -406,8 +406,8 @@ impl BoundInstruction {
bind(self.inst, Some(lane_type.into()), self.value_types)
}

pub fn bind_vector(self, lane_type: impl Into<LaneType>, num_lanes: u64) -> BoundInstruction {
bind_vector(self.inst, lane_type.into(), num_lanes, self.value_types)
pub fn bind_vector_from_lane(self, lane_type: impl Into<LaneType>) -> BoundInstruction {
bind_vector(self.inst, lane_type.into(), self.value_types)
}

pub fn bind_any(self) -> BoundInstruction {
Expand Down Expand Up @@ -774,7 +774,7 @@ impl InstructionPredicateNode {
ret.extend(node.collect_leaves());
}
}
_ => ret.push(&self),
_ => ret.push(self),
}
ret
}
Expand Down Expand Up @@ -1081,9 +1081,9 @@ fn bind(
fn bind_vector(
inst: Instruction,
lane_type: LaneType,
num_lanes: u64,
mut value_types: Vec<ValueTypeOrAny>,
) -> BoundInstruction {
let num_lanes = 128 / lane_type.lane_bits();
let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes));
value_types.push(ValueTypeOrAny::ValueType(vector_type));
verify_polymorphic_binding(&inst, &value_types);
Expand Down
12 changes: 12 additions & 0 deletions cranelift-codegen/meta/src/cdsl/operands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ pub struct OperandKind {
/// The camel-cased name of an operand kind is also the Rust type used to represent it.
pub rust_type: String,

pub match_as_ref: bool,

pub fields: OperandKindFields,
}

Expand Down Expand Up @@ -165,6 +167,8 @@ pub struct OperandKindBuilder {
/// The camel-cased name of an operand kind is also the Rust type used to represent it.
rust_type: Option<String>,

match_as_ref: bool,

fields: OperandKindFields,
}

Expand All @@ -175,6 +179,7 @@ impl OperandKindBuilder {
doc: None,
default_member: None,
rust_type: None,
match_as_ref: false,
fields,
}
}
Expand All @@ -185,6 +190,7 @@ impl OperandKindBuilder {
doc: None,
default_member: None,
rust_type: None,
match_as_ref: false,
fields: OperandKindFields::ImmValue,
}
}
Expand All @@ -195,6 +201,7 @@ impl OperandKindBuilder {
doc: None,
default_member: None,
rust_type: None,
match_as_ref: false,
fields: OperandKindFields::ImmEnum(values),
}
}
Expand All @@ -214,6 +221,10 @@ impl OperandKindBuilder {
self.rust_type = Some(rust_type.to_string());
self
}
pub fn match_as_ref(mut self, enabled: bool) -> Self {
self.match_as_ref = enabled;
self
}

pub fn build(self) -> OperandKind {
let default_member = match self.default_member {
Expand Down Expand Up @@ -254,6 +265,7 @@ impl OperandKindBuilder {
doc,
default_member,
rust_type,
match_as_ref: self.match_as_ref,
fields: self.fields,
}
}
Expand Down
6 changes: 5 additions & 1 deletion cranelift-codegen/meta/src/gen_binemit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ fn gen_recipe(formats: &FormatRegistry, recipe: &EncodingRecipe, fmt: &mut Forma
fmt.indent(|fmt| {
fmt.line("opcode,");
for f in &inst_format.imm_fields {
fmtln!(fmt, "{},", f.member);
if f.kind.match_as_ref {
fmtln!(fmt, "ref {},", f.member);
} else {
fmtln!(fmt, "{},", f.member);
}
}
if want_args {
if inst_format.has_value_list || num_value_ops > 1 {
Expand Down
14 changes: 9 additions & 5 deletions cranelift-codegen/meta/src/gen_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,23 +333,27 @@ fn gen_instruction_data_impl(registry: &FormatRegistry, fmt: &mut Formatter) {
fmt.indent(|fmt| {
for format in registry.iter() {
let name = format!("InstructionData::{}", format.name);
let mut members = vec!["opcode"];
let mut members: Vec<String> = vec!["opcode".into()];

let args = if format.typevar_operand.is_none() {
"&()"
} else if format.has_value_list {
members.push("ref args");
members.push("ref args".into());
"args.as_slice(pool)"
} else if format.num_value_operands == 1 {
members.push("ref arg");
members.push("ref arg".into());
"arg"
} else {
members.push("ref args");
members.push("ref args".into());
"args"
};

for field in &format.imm_fields {
members.push(field.member);
if field.kind.match_as_ref {
members.push(format!("ref {}", field.member));
} else {
members.push(field.member.into());
}
}
let members = members.join(", ");

Expand Down
119 changes: 84 additions & 35 deletions cranelift-codegen/meta/src/isa/x86/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,10 @@ impl PerCpuModeEncodings {

fn add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber {
if let Some(found_index) = self.recipes_inverse.get(&recipe) {
assert!(
self.recipes[*found_index].name == recipe.name,
format!(
"trying to insert different recipes with a same name ({})",
recipe.name
)
assert_eq!(
self.recipes[*found_index].name, recipe.name,
"trying to insert different recipes with a same name ({})",
recipe.name
);
*found_index
} else {
Expand Down Expand Up @@ -252,14 +250,38 @@ impl PerCpuModeEncodings {
}

/// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened
fn enc_32_64_isap(
fn enc_32_64_maybe_isap(
&mut self,
inst: BoundInstruction,
inst: impl Clone + Into<InstSpec>,
template: Template,
isap: SettingPredicateNumber,
isap: Option<SettingPredicateNumber>,
) {
self.enc32_isap(inst.clone(), template.clone(), isap);
self.enc64_isap(inst, template, isap);
self.enc32_maybe_isap(inst.clone(), template.clone(), isap);
self.enc64_maybe_isap(inst, template, isap);
}

fn enc32_maybe_isap(
&mut self,
inst: impl Into<InstSpec>,
template: Template,
isap: Option<SettingPredicateNumber>,
) {
match isap {
None => self.enc32(inst, template),
Some(isap) => self.enc32_isap(inst, template, isap),
}
}

fn enc64_maybe_isap(
&mut self,
inst: impl Into<InstSpec>,
template: Template,
isap: Option<SettingPredicateNumber>,
) {
match isap {
None => self.enc64(inst, template),
Some(isap) => self.enc64_isap(inst, template, isap),
}
}
}

Expand Down Expand Up @@ -302,6 +324,7 @@ pub fn define(
let copy_special = shared.by_name("copy_special");
let ctz = shared.by_name("ctz");
let debugtrap = shared.by_name("debugtrap");
let extractlane = shared.by_name("extractlane");
let f32const = shared.by_name("f32const");
let f64const = shared.by_name("f64const");
let fadd = shared.by_name("fadd");
Expand Down Expand Up @@ -385,6 +408,7 @@ pub fn define(
let uload8_complex = shared.by_name("uload8_complex");
let ushr = shared.by_name("ushr");
let ushr_imm = shared.by_name("ushr_imm");
let vconst = shared.by_name("vconst");
let x86_bsf = x86.by_name("x86_bsf");
let x86_bsr = x86.by_name("x86_bsr");
let x86_cvtt2si = x86.by_name("x86_cvtt2si");
Expand Down Expand Up @@ -523,6 +547,7 @@ pub fn define(
let rec_urm = r.template("urm");
let rec_urm_noflags = r.template("urm_noflags");
let rec_urm_noflags_abcd = r.template("urm_noflags_abcd");
let rec_vconst = r.template("vconst");

// Predicates shorthands.
let all_ones_funcaddrs_and_not_is_pic =
Expand All @@ -534,7 +559,6 @@ pub fn define(
let use_popcnt = settings.predicate_by_name("use_popcnt");
let use_lzcnt = settings.predicate_by_name("use_lzcnt");
let use_bmi1 = settings.predicate_by_name("use_bmi1");
let use_sse2 = settings.predicate_by_name("use_sse2");
let use_ssse3 = settings.predicate_by_name("use_ssse3");
let use_sse41 = settings.predicate_by_name("use_sse41");

Expand Down Expand Up @@ -1598,60 +1622,78 @@ pub fn define(

// PSHUFB, 8-bit shuffle using two XMM registers
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
let number_of_lanes = 128 / ty.lane_bits();
let instruction = x86_pshufb.bind_vector(ty, number_of_lanes);
let template = rec_fa.nonrex().opcodes(vec![0x66, 0x0f, 0x38, 0x00]);
let instruction = x86_pshufb.bind_vector_from_lane(ty);
let template = rec_fa.nonrex().opcodes(vec![0x66, 0x0f, 0x38, 00]);
e.enc32_isap(instruction.clone(), template.clone(), use_ssse3);
e.enc64_isap(instruction, template, use_ssse3);
}

// PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
let number_of_lanes = 128 / ty.lane_bits();
let instruction = x86_pshufd.bind_vector(ty, number_of_lanes);
let instruction = x86_pshufd.bind_vector_from_lane(ty);
let template = rec_r_ib_unsigned.nonrex().opcodes(vec![0x66, 0x0f, 0x70]);
e.enc32_isap(instruction.clone(), template.clone(), use_sse2);
e.enc64_isap(instruction, template, use_sse2);
e.enc32(instruction.clone(), template.clone());
e.enc64(instruction, template);
}

// SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
// to the Intel manual: "When the destination operand is an XMM register, the source operand is
// written to the low doubleword of the register and the regiser is zero-extended to 128 bits."
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) {
let number_of_lanes = 128 / ty.lane_bits();
let instruction = scalar_to_vector.bind_vector(ty, number_of_lanes).bind(ty);
let instruction = scalar_to_vector.bind_vector_from_lane(ty).bind(ty);
let template = rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]); // MOVD/MOVQ
if ty.lane_bits() < 64 {
// no 32-bit encodings for 64-bit widths
e.enc32_isap(instruction.clone(), template.clone(), use_sse2);
e.enc32(instruction.clone(), template.clone());
}
e.enc_x86_64_isap(instruction, template, use_sse2);
e.enc_x86_64(instruction, template);
}

// SIMD insertlane
let mut insertlane_mapping: HashMap<u64, (Vec<u8>, SettingPredicateNumber)> = HashMap::new();
insertlane_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x20], use_sse41)); // PINSRB
insertlane_mapping.insert(16, (vec![0x66, 0x0f, 0xc4], use_sse2)); // PINSRW
insertlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x22], use_sse41)); // PINSRD
insertlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x22], use_sse41)); // PINSRQ, only x86_64
let mut insertlane_mapping: HashMap<u64, (Vec<u8>, Option<SettingPredicateNumber>)> =
HashMap::new();
insertlane_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x20], Some(use_sse41))); // PINSRB
insertlane_mapping.insert(16, (vec![0x66, 0x0f, 0xc4], None)); // PINSRW from SSE2
insertlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x22], Some(use_sse41))); // PINSRD
insertlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x22], Some(use_sse41))); // PINSRQ, only x86_64

for ty in ValueType::all_lane_types() {
if let Some((opcode, isap)) = insertlane_mapping.get(&ty.lane_bits()) {
let number_of_lanes = 128 / ty.lane_bits();
let instruction = insertlane.bind_vector(ty, number_of_lanes);
let instruction = insertlane.bind_vector_from_lane(ty);
let template = rec_r_ib_unsigned_r.opcodes(opcode.clone());
if ty.lane_bits() < 64 {
e.enc_32_64_isap(instruction, template.nonrex(), isap.clone());
e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap.clone());
} else {
// turns out the 64-bit widths have REX/W encodings and only are available on x86_64
e.enc64_isap(instruction, template.rex().w(), isap.clone());
e.enc64_maybe_isap(instruction, template.rex().w(), isap.clone());
}
}
}

// SIMD extractlane
let mut extractlane_mapping: HashMap<u64, (Vec<u8>, Option<SettingPredicateNumber>)> =
HashMap::new();
extractlane_mapping.insert(8, (vec![0x66, 0x0f, 0x3a, 0x14], Some(use_sse41))); // PEXTRB
extractlane_mapping.insert(16, (vec![0x66, 0x0f, 0xc5], None)); // PEXTRW from zSSE2, SSE4.1 has a PEXTRW that can move to reg/m16 but the opcode is four bytes
extractlane_mapping.insert(32, (vec![0x66, 0x0f, 0x3a, 0x16], Some(use_sse41))); // PEXTRD
extractlane_mapping.insert(64, (vec![0x66, 0x0f, 0x3a, 0x16], Some(use_sse41))); // PEXTRQ, only x86_64

for ty in ValueType::all_lane_types() {
if let Some((opcode, isap)) = extractlane_mapping.get(&ty.lane_bits()) {
let instruction = extractlane.bind_vector_from_lane(ty);
let template = rec_r_ib_unsigned.opcodes(opcode.clone());
if ty.lane_bits() < 64 {
e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap.clone());
} else {
// turns out the 64-bit widths have REX/W encodings and only are available on x86_64
e.enc64_maybe_isap(instruction, template.rex().w(), isap.clone());
}
}
}

// SIMD bitcast f64 to all 8-bit-lane vectors (for legalizing splat.x8x16); assumes that f64 is stored in an XMM register
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
let instruction = bitcast.bind_vector(ty, 16).bind(F64);
let instruction = bitcast.bind_vector_from_lane(ty).bind(F64);
e.enc32_rec(instruction.clone(), rec_null_fpr, 0);
e.enc64_rec(instruction, rec_null_fpr, 0);
}
Expand All @@ -1661,12 +1703,19 @@ pub fn define(
for to_type in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8 && *t != from_type)
{
let instruction = raw_bitcast
.bind_vector(to_type, 128 / to_type.lane_bits())
.bind_vector(from_type, 128 / from_type.lane_bits());
.bind_vector_from_lane(to_type)
.bind_vector_from_lane(from_type);
e.enc32_rec(instruction.clone(), rec_null_fpr, 0);
e.enc64_rec(instruction, rec_null_fpr, 0);
}
}

// SIMD vconst using MOVUPS
for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() >= 8) {
let instruction = vconst.bind_vector_from_lane(ty);
let template = rec_vconst.nonrex().opcodes(vec![0x0f, 0x10]);
e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
}

e
}
Loading