From 8acefc1358ce568a8ed58767e6f741c4b8c866af Mon Sep 17 00:00:00 2001 From: Iain Ireland Date: Mon, 5 Feb 2024 19:30:47 +0000 Subject: [PATCH] Bug 1878113: Generate better code for arm64 VM wrappers r=jandem There are three parts to this patch: 1. When we're building the exit frame, instead of moving both the SP and the PSP for every push, we move the SP all at once. This lets us eliminate 3 instructions. 2. The syncStackPtr in callWithABIPre is redundant with the syncStackPtr in call. Removing it eliminates another instruction. 3. Prior to this patch, the VM wrapper epilogue looked like this: ``` mov x28, x29 // restore fp to psp mov sp, x28 A // sync sp ldr x29, [x28], #8 // restore saved fp to fp ldr x16, [x28], #32 // load return address into x16 mov sp, x28 B // sync sp ret x16 // return to x16 ``` Instructions A and B look redundant. In my first version of this patch, I removed B. However, the two ldr instructions actually do a post-index modification of the PSP, so that version of the patch left SP and PSP unsynchronized when returning from the VM wrapper. Oddly, that version of the patch still passed all jit-tests. It seems much safer, though, to just remove A instead. In total, this removes 5 instructions from each VM wrapper. There is still some room for improvement around passing arguments: we emit a sync after we finish passing arguments, even if we aren't passing anything on the stack. In theory we could also allocate space for stack arguments while we're building the exit frame, but that would involve duplicating much more of the argument passing code. Differential Revision: https://phabricator.services.mozilla.com/D200541 --- js/src/jit/arm64/MacroAssembler-arm64.cpp | 6 -- js/src/jit/arm64/Trampoline-arm64.cpp | 112 ++++++++++++++++++---- 2 files changed, 93 insertions(+), 25 deletions(-) diff --git a/js/src/jit/arm64/MacroAssembler-arm64.cpp b/js/src/jit/arm64/MacroAssembler-arm64.cpp index 60c1b581213e9..95b7cb90fdf7b 100644 --- a/js/src/jit/arm64/MacroAssembler-arm64.cpp +++ b/js/src/jit/arm64/MacroAssembler-arm64.cpp @@ -1514,12 +1514,6 @@ void MacroAssembler::callWithABIPre(uint32_t* stackAdjust, bool callFromWasm) { emitter.finish(); } - // Call boundaries communicate stack via SP. - // (jseward, 2021Mar03) This sync may well be redundant, given that all of - // the MacroAssembler::call methods generate a sync before the call. - // Removing it does not cause any failures for all of jit-tests. - syncStackPtr(); - assertStackAlignment(ABIStackAlignment); } diff --git a/js/src/jit/arm64/Trampoline-arm64.cpp b/js/src/jit/arm64/Trampoline-arm64.cpp index 5a598d4c08178..12f24877586f2 100644 --- a/js/src/jit/arm64/Trampoline-arm64.cpp +++ b/js/src/jit/arm64/Trampoline-arm64.cpp @@ -591,32 +591,104 @@ bool JitRuntime::generateVMWrapper(JSContext* cx, MacroAssembler& masm, (Register::Codes::VolatileMask & ~Register::Codes::WrapperMask) == 0, "Wrapper register set must be a superset of the Volatile register set."); + // The first argument is the JSContext. + Register reg_cx = IntArgReg0; + regs.take(reg_cx); + Register temp = regs.getAny(); + + // On entry, the stack is: + // ... frame ... + // [args] + // descriptor + // + // Before we pass arguments (potentially pushing some of them on the stack), + // we want: + // ... frame ... + // [args] + // descriptor \ + // return address | <- exit frame + // saved frame pointer / + // VM id <- exit frame footer + // [space for out-param, if necessary]] + // [alignment padding, if necessary] + // + // To minimize PSP overhead, we compute the final stack size and update the + // stack pointer all in one go. Then we use the PSP to "push" the required + // values into the pre-allocated stack space. + size_t stackAdjustment = 0; + + // The descriptor was already pushed. + stackAdjustment += ExitFrameLayout::SizeWithFooter() - sizeof(uintptr_t); + stackAdjustment += f.sizeOfOutParamStackSlot(); + + masm.SetStackPointer64(sp); + + // First, update the actual stack pointer to its final aligned value. + masm.Sub(ARMRegister(temp, 64), masm.GetStackPointer64(), + Operand(stackAdjustment)); + masm.And(sp, ARMRegister(temp, 64), ~(uint64_t(JitStackAlignment) - 1)); + // On link-register platforms, it is the responsibility of the VM *callee* to // push the return address, while the caller must ensure that the address // is stored in lr on entry. This allows the VM wrapper to work with both // direct calls and tail calls. - masm.pushReturnAddress(); + masm.str(ARMRegister(lr, 64), + MemOperand(PseudoStackPointer64, -8, vixl::PreIndex)); - // First argument is the JSContext. - Register reg_cx = IntArgReg0; - regs.take(reg_cx); + // Push the frame pointer using the PSP. + masm.str(ARMRegister(FramePointer, 64), + MemOperand(PseudoStackPointer64, -8, vixl::PreIndex)); + + // Because we've been moving the PSP as we fill in the frame, we can set the + // frame pointer for this frame directly from the PSP. + masm.movePtr(PseudoStackPointer, FramePointer); - // Stack is: - // ... frame ... - // +12 [args] - // +8 descriptor - // +0 returnAddress (pushed by this function, caller sets as lr) - // - // Push the frame pointer to finish the exit frame, then link it up. - masm.Push(FramePointer); - masm.moveStackPtrTo(FramePointer); masm.loadJSContext(reg_cx); - masm.enterExitFrame(reg_cx, regs.getAny(), id); - // Reserve space for the outparameter. - masm.reserveVMFunctionOutParamSpace(f); + // Finish the exit frame. See MacroAssembler::enterExitFrame. + + // linkExitFrame + masm.loadPtr(Address(reg_cx, JSContext::offsetOfActivation()), temp); + masm.storePtr(FramePointer, + Address(temp, JitActivation::offsetOfPackedExitFP())); + + // Push `ExitFrameType::VMFunction + VMFunctionId` + uint32_t type = uint32_t(ExitFrameType::VMFunction) + uint32_t(id); + masm.move32(Imm32(type), temp); + masm.str(ARMRegister(temp, 64), + MemOperand(PseudoStackPointer64, -8, vixl::PreIndex)); + + // If the out parameter is a handle, initialize it to empty. + // See MacroAssembler::reserveVMFunctionOutParamSpace and PushEmptyRooted. + if (f.outParam == Type_Handle) { + switch (f.outParamRootType) { + case VMFunctionData::RootNone: + MOZ_CRASH("Handle must have root type"); + case VMFunctionData::RootObject: + case VMFunctionData::RootString: + case VMFunctionData::RootCell: + case VMFunctionData::RootBigInt: + masm.str(xzr, MemOperand(PseudoStackPointer64, -8, vixl::PreIndex)); + break; + case VMFunctionData::RootValue: + masm.movePtr(ImmWord(UndefinedValue().asRawBits()), temp); + masm.str(ARMRegister(temp, 64), + MemOperand(PseudoStackPointer64, -8, vixl::PreIndex)); + break; + case VMFunctionData::RootId: + masm.movePtr(ImmWord(JS::PropertyKey::Void().asRawBits()), temp); + masm.str(ARMRegister(temp, 64), + MemOperand(PseudoStackPointer64, -8, vixl::PreIndex)); + } + } - masm.setupUnalignedABICallDontSaveRestoreSP(); + // Now that we've filled in the stack frame, synchronize the PSP with the + // real stack pointer and return to PSP-mode while we pass arguments. + masm.moveStackPtrTo(PseudoStackPointer); + masm.SetStackPointer64(PseudoStackPointer64); + + MOZ_ASSERT(masm.framePushed() == 0); + masm.setupAlignedABICall(); masm.passABIArg(reg_cx); size_t argDisp = ExitFrameLayout::Size(); @@ -683,8 +755,10 @@ bool JitRuntime::generateVMWrapper(JSContext* cx, MacroAssembler& masm, masm.speculationBarrier(); } - // Pop frame and restore frame pointer. - masm.moveToStackPtr(FramePointer); + // Pop frame and restore frame pointer. We call Mov here directly instead + // of `moveToStackPtr` to avoid a syncStackPtr. The stack pointer will be + // synchronized as part of retn, after adjusting the PSP. + masm.Mov(masm.GetStackPointer64(), ARMRegister(FramePointer, 64)); masm.pop(FramePointer); // Return. Subtract sizeof(void*) for the frame pointer.