riscv64: Add VecALUImm instruction format (#6325)

* riscv64: Add `VecALUImm` instruction format * riscv64: Add VecOpCategory struct * riscv64: Fix `imm5_from_u64` * riscv64: Improve instruction encoding type safety * riscv64: Run rustfmt * riscv64: Use `VecOpCategory` in `vcfg` encoding
bytecodealliance · May 4, 2023 · 6938a02 · 6938a02
1 parent 6b704a4
commit 6938a02
Show file tree

Hide file tree

Showing 11 changed files with 399 additions and 61 deletions.
diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle
@@ -330,6 +330,13 @@
       (vs1 Reg)
       (vstate VState))
 
+    (VecAluRRImm5
+      (op VecAluOpRRImm5)
+      (vd WritableReg)
+      (vs2 Reg)
+      (imm Imm5)
+      (vstate VState))
+
     (VecSetState
       (rd WritableReg)
       (vstate VState))
@@ -697,6 +704,7 @@
 (type OptionUimm5 (primitive OptionUimm5))
 (type Imm12 (primitive Imm12))
 (type UImm5 (primitive UImm5))
+(type Imm5 (primitive Imm5))
 (type Imm20 (primitive Imm20))
 (type Imm3 (primitive Imm3))
 (type BranchTarget (primitive BranchTarget))
@@ -1323,6 +1331,17 @@
 (extern extractor imm12_from_u64 imm12_from_u64)
 
 
+;; Imm5 Extractors
+
+(decl imm5_from_u64 (Imm5) u64)
+(extern extractor imm5_from_u64 imm5_from_u64)
+
+;; Extractor that matches a `Value` equivalent to a replicated Imm5 on all lanes.
+;; TODO: Try matching vconst here as well
+(decl replicated_imm5 (Imm5) Value)
+(extractor (replicated_imm5 n)
+  (def_inst (splat (iconst (u64_from_imm64 (imm5_from_u64 n))))))
+
 
 ;; Float Helpers
 

diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs
@@ -5,6 +5,7 @@ use crate::ir::RelSourceLoc;
 use crate::ir::TrapCode;
 use crate::isa::riscv64::inst::*;
 use crate::isa::riscv64::inst::{zero_reg, AluOPRRR};
+use crate::isa::riscv64::lower::isle::generated_code::VecOpMasking;
 use crate::machinst::{AllocationConsumer, Reg, Writable};
 use cranelift_control::ControlPlane;
 use regalloc2::Allocation;
@@ -467,6 +468,7 @@ impl Inst {
             Inst::VecSetState { .. } => None,
 
             Inst::VecAluRRR { vstate, .. } |
+            Inst::VecAluRRImm5 { vstate, .. } |
             // TODO: Unit-stride loads and stores only need the AVL to be correct, not
             // the full vtype. A future optimization could be to decouple these two when
             // updating vstate. This would allow us to avoid emitting a VecSetState in
@@ -634,7 +636,7 @@ impl MachInstEmit for Inst {
 
                 sink.put4(encode_r_type(
                     alu_op.op_code(),
-                    rd.to_reg(),
+                    rd,
                     alu_op.funct3(),
                     rs1,
                     rs2,
@@ -2788,19 +2790,15 @@ impl MachInstEmit for Inst {
                 let vs2 = allocs.next(vs2);
                 let vd = allocs.next_writable(vd);
 
-                // This is the mask bit, we don't yet implement masking, so set it to 1, which means
-                // masking disabled.
-                let vm = 1;
-
-                sink.put4(encode_valu(
-                    op.opcode(),
-                    vd.to_reg(),
-                    op.funct3(),
-                    vs1,
-                    vs2,
-                    vm,
-                    op.funct6(),
-                ));
+                sink.put4(encode_valu(op, vd, vs1, vs2, VecOpMasking::Disabled));
+            }
+            &Inst::VecAluRRImm5 {
+                op, vd, imm, vs2, ..
+            } => {
+                let vs2 = allocs.next(vs2);
+                let vd = allocs.next_writable(vd);
+
+                sink.put4(encode_valu_imm(op, vd, imm, vs2, VecOpMasking::Disabled));
             }
             &Inst::VecSetState { rd, ref vstate } => {
                 let rd = allocs.next_writable(rd);
@@ -2840,17 +2838,14 @@ impl MachInstEmit for Inst {
                     sink.add_trap(TrapCode::HeapOutOfBounds);
                 }
 
-                // This is the mask bit, we don't yet implement masking, so set it to 1, which means
-                // masking disabled.
-                let vm = 1;
-
                 sink.put4(encode_vmem_load(
                     0x07,
                     to.to_reg(),
                     eew,
                     addr.to_reg(),
                     from.lumop(),
-                    vm,
+                    // We don't implement masking yet.
+                    VecOpMasking::Disabled,
                     from.mop(),
                     from.nf(),
                 ));
@@ -2880,17 +2875,14 @@ impl MachInstEmit for Inst {
                     sink.add_trap(TrapCode::HeapOutOfBounds);
                 }
 
-                // This is the mask bit, we don't yet implement masking, so set it to 1, which means
-                // masking disabled.
-                let vm = 1;
-
                 sink.put4(encode_vmem_store(
                     0x27,
                     from,
                     eew,
                     addr.to_reg(),
                     to.sumop(),
-                    vm,
+                    // We don't implement masking yet.
+                    VecOpMasking::Disabled,
                     to.mop(),
                     to.nf(),
                 ));

diff --git a/cranelift/codegen/src/isa/riscv64/inst/encode.rs b/cranelift/codegen/src/isa/riscv64/inst/encode.rs
@@ -6,27 +6,52 @@
 //! Some instructions especially in extensions have slight variations from
 //! the base RISC-V specification.
 
-use super::{UImm5, VType};
+use super::{Imm5, UImm5, VType};
 use crate::isa::riscv64::inst::reg_to_gpr_num;
-use crate::isa::riscv64::lower::isle::generated_code::VecElementWidth;
+use crate::isa::riscv64::lower::isle::generated_code::{
+    VecAluOpRRImm5, VecAluOpRRR, VecElementWidth, VecOpCategory, VecOpMasking,
+};
+use crate::machinst::isle::WritableReg;
 use crate::Reg;
 
-/// Encode an R-type instruction.
-///
+fn unsigned_field_width(value: u32, width: u8) -> u32 {
+    debug_assert_eq!(value & (!0 << width), 0);
+    value
+}
+
 /// Layout:
 /// 0-------6-7-------11-12------14-15------19-20------24-25-------31
 /// | Opcode |   rd     |  funct3  |   rs1    |   rs2    |   funct7  |
-pub fn encode_r_type(opcode: u32, rd: Reg, funct3: u32, rs1: Reg, rs2: Reg, funct7: u32) -> u32 {
+fn encode_r_type_bits(opcode: u32, rd: u32, funct3: u32, rs1: u32, rs2: u32, funct7: u32) -> u32 {
     let mut bits = 0;
-    bits |= opcode & 0b1111111;
-    bits |= reg_to_gpr_num(rd) << 7;
-    bits |= (funct3 & 0b111) << 12;
-    bits |= reg_to_gpr_num(rs1) << 15;
-    bits |= reg_to_gpr_num(rs2) << 20;
-    bits |= (funct7 & 0b1111111) << 25;
+    bits |= unsigned_field_width(opcode, 7);
+    bits |= unsigned_field_width(rd, 5) << 7;
+    bits |= unsigned_field_width(funct3, 3) << 12;
+    bits |= unsigned_field_width(rs1, 5) << 15;
+    bits |= unsigned_field_width(rs2, 5) << 20;
+    bits |= unsigned_field_width(funct7, 7) << 25;
     bits
 }
 
+/// Encode an R-type instruction.
+pub fn encode_r_type(
+    opcode: u32,
+    rd: WritableReg,
+    funct3: u32,
+    rs1: Reg,
+    rs2: Reg,
+    funct7: u32,
+) -> u32 {
+    encode_r_type_bits(
+        opcode,
+        reg_to_gpr_num(rd.to_reg()),
+        funct3,
+        reg_to_gpr_num(rs1),
+        reg_to_gpr_num(rs2),
+        funct7,
+    )
+}
+
 /// Encodes a Vector ALU instruction.
 ///
 /// Fields:
@@ -40,18 +65,53 @@ pub fn encode_r_type(opcode: u32, rd: Reg, funct3: u32, rs1: Reg, rs2: Reg, func
 ///
 /// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc
 pub fn encode_valu(
-    opcode: u32,
-    vd: Reg,
-    funct3: u32,
+    op: VecAluOpRRR,
+    vd: WritableReg,
     vs1: Reg,
     vs2: Reg,
-    vm: u32,
-    funct6: u32,
+    masking: VecOpMasking,
+) -> u32 {
+    let funct7 = (op.funct6() << 1) | masking.encode();
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        reg_to_gpr_num(vs1),
+        reg_to_gpr_num(vs2),
+        funct7,
+    )
+}
+
+/// Encodes a Vector ALU+Imm instruction.
+/// This is just a Vector ALU instruction with an immediate in the VS1 field.
+///
+/// Fields:
+/// - opcode (7 bits)
+/// - vd     (5 bits)
+/// - funct3 (3 bits)
+/// - imm    (5 bits)
+/// - vs2    (5 bits)
+/// - vm     (1 bit)
+/// - funct6 (6 bits)
+///
+/// See: https://github.com/riscv/riscv-v-spec/blob/master/valu-format.adoc
+pub fn encode_valu_imm(
+    op: VecAluOpRRImm5,
+    vd: WritableReg,
+    imm: Imm5,
+    vs2: Reg,
+    masking: VecOpMasking,
 ) -> u32 {
-    let funct6 = funct6 & 0b111111;
-    let vm = vm & 0b1;
-    let funct7 = (funct6 << 1) | vm;
-    encode_r_type(opcode, vd, funct3, vs1, vs2, funct7)
+    let funct7 = (op.funct6() << 1) | masking.encode();
+    let imm = imm.bits() as u32;
+    encode_r_type_bits(
+        op.opcode(),
+        reg_to_gpr_num(vd.to_reg()),
+        op.funct3(),
+        imm,
+        reg_to_gpr_num(vs2),
+        funct7,
+    )
 }
 
 /// Encodes a Vector CFG Imm instruction.
@@ -60,11 +120,11 @@ pub fn encode_valu(
 // TODO: Check if this is any of the known instruction types in the spec.
 pub fn encode_vcfg_imm(opcode: u32, rd: Reg, imm: UImm5, vtype: &VType) -> u32 {
     let mut bits = 0;
-    bits |= opcode & 0b1111111;
+    bits |= unsigned_field_width(opcode, 7);
     bits |= reg_to_gpr_num(rd) << 7;
-    bits |= 0b111 << 12;
-    bits |= (imm.bits() & 0b11111) << 15;
-    bits |= (vtype.encode() & 0b1111111111) << 20;
+    bits |= VecOpCategory::OPCFG.encode() << 12;
+    bits |= unsigned_field_width(imm.bits(), 5) << 15;
+    bits |= unsigned_field_width(vtype.encode(), 10) << 20;
     bits |= 0b11 << 30;
     bits
 }
@@ -79,7 +139,7 @@ pub fn encode_vmem_load(
     width: VecElementWidth,
     rs1: Reg,
     lumop: u32,
-    vm: u32,
+    masking: VecOpMasking,
     mop: u32,
     nf: u32,
 ) -> u32 {
@@ -92,19 +152,19 @@ pub fn encode_vmem_load(
     };
 
     let mut bits = 0;
-    bits |= opcode & 0b1111111;
+    bits |= unsigned_field_width(opcode, 7);
     bits |= reg_to_gpr_num(vd) << 7;
     bits |= width << 12;
     bits |= reg_to_gpr_num(rs1) << 15;
-    bits |= (lumop & 0b11111) << 20;
-    bits |= (vm & 0b1) << 25;
-    bits |= (mop & 0b11) << 26;
+    bits |= unsigned_field_width(lumop, 5) << 20;
+    bits |= masking.encode() << 25;
+    bits |= unsigned_field_width(mop, 2) << 26;
 
     // The mew bit (inst[28]) when set is expected to be used to encode expanded
     // memory sizes of 128 bits and above, but these encodings are currently reserved.
     bits |= 0b0 << 28;
 
-    bits |= (nf & 0b111) << 29;
+    bits |= unsigned_field_width(nf, 3) << 29;
     bits
 }
 
@@ -118,11 +178,11 @@ pub fn encode_vmem_store(
     width: VecElementWidth,
     rs1: Reg,
     sumop: u32,
-    vm: u32,
+    masking: VecOpMasking,
     mop: u32,
     nf: u32,
 ) -> u32 {
     // This is pretty much the same as the load instruction, just
     // with different names on the fields.
-    encode_vmem_load(opcode, vs3, width, rs1, sumop, vm, mop, nf)
+    encode_vmem_load(opcode, vs3, width, rs1, sumop, masking, mop, nf)
 }
diff --git a/cranelift/codegen/src/isa/riscv64/inst/imms.rs b/cranelift/codegen/src/isa/riscv64/inst/imms.rs
@@ -127,6 +127,34 @@ impl Display for UImm5 {
     }
 }
 
+/// A Signed 5-bit immediate.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct Imm5 {
+    value: i8,
+}
+
+impl Imm5 {
+    /// Create an signed 5-bit immediate from an i8.
+    pub fn maybe_from_i8(value: i8) -> Option<Imm5> {
+        if value >= -16 && value <= 15 {
+            Some(Imm5 { value })
+        } else {
+            None
+        }
+    }
+
+    /// Bits for encoding.
+    pub fn bits(&self) -> u8 {
+        self.value as u8 & 0x1f
+    }
+}
+
+impl Display for Imm5 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
+        write!(f, "{}", self.value)
+    }
+}
+
 impl Inst {
     pub(crate) fn imm_min() -> i64 {
         let imm20_max: i64 = (1 << 19) << 12;

diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
@@ -646,6 +646,10 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
             collector.reg_use(vs2);
             collector.reg_def(vd);
         }
+        &Inst::VecAluRRImm5 { vd, vs2, .. } => {
+            collector.reg_use(vs2);
+            collector.reg_def(vd);
+        }
         &Inst::VecSetState { rd, .. } => {
             collector.reg_def(rd);
         }
@@ -1585,6 +1589,18 @@ impl Inst {
                 // This is noted in Section 10.1 of the RISC-V Vector spec.
                 format!("{} {},{},{} {}", op, vd_s, vs2_s, vs1_s, vstate)
             }
+            &Inst::VecAluRRImm5 {
+                op,
+                vd,
+                imm,
+                vs2,
+                ref vstate,
+            } => {
+                let vs2_s = format_vec_reg(vs2, allocs);
+                let vd_s = format_vec_reg(vd.to_reg(), allocs);
+
+                format!("{} {},{},{} {}", op, vd_s, vs2_s, imm, vstate)
+            }
             &Inst::VecSetState { rd, ref vstate } => {
                 let rd_s = format_reg(rd.to_reg(), allocs);
                 assert!(vstate.avl.is_static());