Skip to content

Commit

Permalink
[machinst x64]: lower load_splat using memory addressing
Browse files Browse the repository at this point in the history
  • Loading branch information
abrown committed Oct 13, 2020
1 parent c9c8de6 commit c5b65b0
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3250,18 +3250,30 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}
}

Opcode::Splat => {
Opcode::Splat | Opcode::LoadSplat => {
let ty = ty.unwrap();
assert_eq!(ty.bits(), 128);
let src_ty = ctx.input_ty(insn, 0);
assert!(src_ty.bits() < 128);
let src = input_to_reg_mem(ctx, inputs[0]);

let (src, srcloc) = match op {
Opcode::Splat => (input_to_reg_mem(ctx, inputs[0]), None),
Opcode::LoadSplat => {
let offset = ctx.data(insn).load_store_offset().unwrap();
let amode = lower_to_amode(ctx, inputs[0], offset);
(RegMem::mem(amode), Some(ctx.srcloc(insn)))
}
_ => unreachable!(),
};
let dst = get_output_reg(ctx, outputs[0]);

// We know that splat will overwrite all of the lanes of `dst` but it takes several
// instructions to do so. Because of the multiple instructions, there is no good way to
// declare `dst` a `def` except with the following pseudo-instruction.
ctx.emit(Inst::xmm_uninit_value(dst));

// TODO: eventually many of these sequences could be optimized with AVX's VBROADCAST*
// and VPBROADCAST*.
match ty.lane_bits() {
8 => {
emit_insert_lane(ctx, src, dst, 0, ty.lane_type(), srcloc);
Expand Down

0 comments on commit c5b65b0

Please sign in to comment.