From b35ee3b0467e042621aec9af7f18a2d8c63029ad Mon Sep 17 00:00:00 2001 From: qmuntal Date: Thu, 9 Feb 2023 14:14:07 +0100 Subject: [PATCH] runtime: remove unnecessary NOFRAME flags on windows This CL removes some NOFRAME flags on Windows assembly files for several reasons: - windows/386 does not use a frame pointer - Leaf frameless functions already skip the frame pointer - Some non-leaf functions do not contain enough dragons to justify not using the frame pointer Updates #58378 Change-Id: I31e71bf7f769e1957a4adba91778da5af66ce1e4 Reviewed-on: https://go-review.googlesource.com/c/go/+/466835 Reviewed-by: Keith Randall Reviewed-by: Keith Randall Run-TryBot: Quim Muntal Reviewed-by: Dmitri Shuralyov TryBot-Result: Gopher Robot --- src/runtime/rt0_windows_amd64.s | 4 ++-- src/runtime/rt0_windows_arm64.s | 2 +- src/runtime/sys_windows_386.s | 6 +++--- src/runtime/sys_windows_amd64.s | 22 +++++++++++++--------- src/runtime/sys_windows_arm.s | 10 +++++----- src/runtime/sys_windows_arm64.s | 12 +++++------- src/runtime/time_windows_arm.s | 4 ++-- src/runtime/time_windows_arm64.s | 4 ++-- 8 files changed, 33 insertions(+), 31 deletions(-) diff --git a/src/runtime/rt0_windows_amd64.s b/src/runtime/rt0_windows_amd64.s index 9c60337ddc1d5..d5f0940540b0c 100644 --- a/src/runtime/rt0_windows_amd64.s +++ b/src/runtime/rt0_windows_amd64.s @@ -6,7 +6,7 @@ #include "go_tls.h" #include "textflag.h" -TEXT _rt0_amd64_windows(SB),NOSPLIT,$-8 +TEXT _rt0_amd64_windows(SB),NOSPLIT|NOFRAME,$-8 JMP _rt0_amd64(SB) // When building with -buildmode=(c-shared or c-archive), this @@ -24,7 +24,7 @@ TEXT _rt0_amd64_windows_lib(SB),NOSPLIT|NOFRAME,$0x20 CALL AX RET -TEXT _rt0_amd64_windows_lib_go(SB),NOSPLIT,$0 +TEXT _rt0_amd64_windows_lib_go(SB),NOSPLIT|NOFRAME,$0 MOVQ $0, DI MOVQ $0, SI MOVQ $runtime·rt0_go(SB), AX diff --git a/src/runtime/rt0_windows_arm64.s b/src/runtime/rt0_windows_arm64.s index bad85c28ac5ce..8802c2b82e0af 100644 --- a/src/runtime/rt0_windows_arm64.s +++ b/src/runtime/rt0_windows_arm64.s @@ -23,7 +23,7 @@ TEXT _rt0_arm64_windows_lib_go(SB),NOSPLIT|NOFRAME,$0 MOVD $runtime·rt0_go(SB), R2 B (R2) -TEXT main(SB),NOSPLIT,$0 +TEXT main(SB),NOSPLIT|NOFRAME,$0 MOVD $runtime·rt0_go(SB), R2 B (R2) diff --git a/src/runtime/sys_windows_386.s b/src/runtime/sys_windows_386.s index d29049899e8f8..818f1b4d7982c 100644 --- a/src/runtime/sys_windows_386.s +++ b/src/runtime/sys_windows_386.s @@ -50,7 +50,7 @@ TEXT runtime·getlasterror(SB),NOSPLIT,$0 MOVL AX, ret+0(FP) RET -TEXT runtime·sigFetchGSafe(SB),NOSPLIT|NOFRAME,$0 +TEXT runtime·sigFetchGSafe(SB),NOSPLIT,$0 get_tls(AX) CMPL AX, $0 JE 2(PC) @@ -93,7 +93,7 @@ TEXT sigtramp<>(SB),NOSPLIT,$0-0 // It switches stacks and jumps to the continuation address. // DX and CX are set above at the end of sigtrampgo // in the context that starts executing at sigresume. -TEXT runtime·sigresume(SB),NOSPLIT|NOFRAME,$0 +TEXT runtime·sigresume(SB),NOSPLIT,$0 MOVL DX, SP JMP CX @@ -311,7 +311,7 @@ useQPC: // This is called from rt0_go, which runs on the system stack // using the initial stack allocated by the OS. -TEXT runtime·wintls(SB),NOSPLIT|NOFRAME,$0 +TEXT runtime·wintls(SB),NOSPLIT,$0 // Allocate a TLS slot to hold g across calls to external code MOVL SP, BP MOVL runtime·_TlsAlloc(SB), AX diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s index 8780c45b1aa02..5eb03b014ed37 100644 --- a/src/runtime/sys_windows_amd64.s +++ b/src/runtime/sys_windows_amd64.s @@ -12,9 +12,12 @@ #define TEB_TlsSlots 0x1480 // void runtime·asmstdcall(void *c); -TEXT runtime·asmstdcall(SB),NOSPLIT|NOFRAME,$0 - // asmcgocall will put first argument into CX. - PUSHQ CX // save for later +TEXT runtime·asmstdcall(SB),NOSPLIT,$16 + MOVQ SP, AX + ANDQ $~15, SP // alignment as per Windows requirement + MOVQ AX, 8(SP) + MOVQ CX, 0(SP) // asmcgocall will put first argument into CX. + MOVQ libcall_fn(CX), AX MOVQ libcall_args(CX), SI MOVQ libcall_n(CX), CX @@ -61,7 +64,8 @@ loadregs: ADDQ $(const_maxArgs*8), SP // Return result. - POPQ CX + MOVQ 0(SP), CX + MOVQ 8(SP), SP MOVQ AX, libcall_r1(CX) // Floating point return values are returned in XMM0. Setting r2 to this // value in case this call returned a floating point value. For details, @@ -233,7 +237,7 @@ TEXT runtime·settls(SB),NOSPLIT,$0 // g may be nil. // The function leaves room for 4 syscall parameters // (as per windows amd64 calling convention). -TEXT runtime·usleep2(SB),NOSPLIT|NOFRAME,$48-4 +TEXT runtime·usleep2(SB),NOSPLIT,$48-4 MOVLQSX dt+0(FP), BX MOVQ SP, AX ANDQ $~15, SP // alignment as per Windows requirement @@ -249,7 +253,7 @@ TEXT runtime·usleep2(SB),NOSPLIT|NOFRAME,$48-4 // Runs on OS stack. duration (in -100ns units) is in dt+0(FP). // g is valid. -TEXT runtime·usleep2HighRes(SB),NOSPLIT|NOFRAME,$72-4 +TEXT runtime·usleep2HighRes(SB),NOSPLIT,$72-4 MOVLQSX dt+0(FP), BX get_tls(CX) @@ -281,7 +285,7 @@ TEXT runtime·usleep2HighRes(SB),NOSPLIT|NOFRAME,$72-4 RET // Runs on OS stack. -TEXT runtime·switchtothread(SB),NOSPLIT|NOFRAME,$0 +TEXT runtime·switchtothread(SB),NOSPLIT,$0 MOVQ SP, AX ANDQ $~15, SP // alignment as per Windows requirement SUBQ $(48), SP // room for SP and 4 args as per Windows requirement @@ -306,7 +310,7 @@ useQPC: // func osSetupTLS(mp *m) // Setup TLS. for use by needm on Windows. -TEXT runtime·osSetupTLS(SB),NOSPLIT|NOFRAME,$0-8 +TEXT runtime·osSetupTLS(SB),NOSPLIT,$0-8 MOVQ mp+0(FP), AX LEAQ m_tls(AX), DI CALL runtime·settls(SB) @@ -314,7 +318,7 @@ TEXT runtime·osSetupTLS(SB),NOSPLIT|NOFRAME,$0-8 // This is called from rt0_go, which runs on the system stack // using the initial stack allocated by the OS. -TEXT runtime·wintls(SB),NOSPLIT|NOFRAME,$0 +TEXT runtime·wintls(SB),NOSPLIT,$0 // Allocate a TLS slot to hold g across calls to external code MOVQ SP, AX ANDQ $~15, SP // alignment as per Windows requirement diff --git a/src/runtime/sys_windows_arm.s b/src/runtime/sys_windows_arm.s index f6496b8d054db..0af19db352cea 100644 --- a/src/runtime/sys_windows_arm.s +++ b/src/runtime/sys_windows_arm.s @@ -232,12 +232,12 @@ TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0 B runtime·armPublicationBarrier(SB) // never called (this is a GOARM=7 platform) -TEXT runtime·read_tls_fallback(SB),NOSPLIT|NOFRAME,$0 +TEXT runtime·read_tls_fallback(SB),NOSPLIT,$0 MOVW $0xabcd, R0 MOVW R0, (R0) RET -TEXT runtime·nanotime1(SB),NOSPLIT|NOFRAME,$0-8 +TEXT runtime·nanotime1(SB),NOSPLIT,$0-8 MOVW $0, R0 MOVB runtime·useQPCTime(SB), R0 CMP $0, R0 @@ -262,7 +262,7 @@ loop: MOVW R4, ret_hi+4(FP) RET useQPC: - B runtime·nanotimeQPC(SB) // tail call + RET runtime·nanotimeQPC(SB) // tail call // save_g saves the g register (R10) into thread local memory // so that we can call externally compiled @@ -273,7 +273,7 @@ useQPC: // Save the value in the _TEB->TlsSlots array. // Effectively implements TlsSetValue(). // tls_g stores the TLS slot allocated TlsAlloc(). -TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0 +TEXT runtime·save_g(SB),NOSPLIT,$0 MRC 15, 0, R0, C13, C0, 2 ADD $0xe10, R0 MOVW $runtime·tls_g(SB), R11 @@ -287,7 +287,7 @@ TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0 // ARM code that overwrote those registers. // Get the value from the _TEB->TlsSlots array. // Effectively implements TlsGetValue(). -TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0 +TEXT runtime·load_g(SB),NOSPLIT,$0 MRC 15, 0, R0, C13, C0, 2 ADD $0xe10, R0 MOVW $runtime·tls_g(SB), g diff --git a/src/runtime/sys_windows_arm64.s b/src/runtime/sys_windows_arm64.s index c78d37314ac32..6da866ac889a7 100644 --- a/src/runtime/sys_windows_arm64.s +++ b/src/runtime/sys_windows_arm64.s @@ -19,8 +19,7 @@ // load_g and save_g (in tls_arm64.s) clobber R27 (REGTMP) and R0. // void runtime·asmstdcall(void *c); -TEXT runtime·asmstdcall(SB),NOSPLIT|NOFRAME,$0 - STP.W (R29, R30), -32(RSP) // allocate C ABI stack frame +TEXT runtime·asmstdcall(SB),NOSPLIT,$16 STP (R19, R20), 16(RSP) // save old R19, R20 MOVD R0, R19 // save libcall pointer MOVD RSP, R20 // save stack pointer @@ -96,10 +95,9 @@ _0args: // Restore callee-saved registers. LDP 16(RSP), (R19, R20) - LDP.P 32(RSP), (R29, R30) RET -TEXT runtime·getlasterror(SB),NOSPLIT|NOFRAME,$0 +TEXT runtime·getlasterror(SB),NOSPLIT,$0 MOVD TEB_error(R18_PLATFORM), R0 MOVD R0, ret+0(FP) RET @@ -245,7 +243,7 @@ TEXT runtime·usleep2(SB),NOSPLIT,$32-4 // duration (in -100ns units) is in dt+0(FP). // g is valid. // TODO: needs to be implemented properly. -TEXT runtime·usleep2HighRes(SB),NOSPLIT,$0-4 +TEXT runtime·usleep2HighRes(SB),NOSPLIT|NOFRAME,$0-4 B runtime·abort(SB) // Runs on OS stack. @@ -256,7 +254,7 @@ TEXT runtime·switchtothread(SB),NOSPLIT,$16-0 ADD $16, RSP RET -TEXT runtime·nanotime1(SB),NOSPLIT|NOFRAME,$0-8 +TEXT runtime·nanotime1(SB),NOSPLIT,$0-8 MOVB runtime·useQPCTime(SB), R0 CMP $0, R0 BNE useQPC @@ -267,7 +265,7 @@ TEXT runtime·nanotime1(SB),NOSPLIT|NOFRAME,$0-8 MOVD R0, ret+0(FP) RET useQPC: - B runtime·nanotimeQPC(SB) // tail call + RET runtime·nanotimeQPC(SB) // tail call // This is called from rt0_go, which runs on the system stack // using the initial stack allocated by the OS. diff --git a/src/runtime/time_windows_arm.s b/src/runtime/time_windows_arm.s index 711af88307207..8d4469f9933d0 100644 --- a/src/runtime/time_windows_arm.s +++ b/src/runtime/time_windows_arm.s @@ -8,7 +8,7 @@ #include "textflag.h" #include "time_windows.h" -TEXT time·now(SB),NOSPLIT|NOFRAME,$0-20 +TEXT time·now(SB),NOSPLIT,$0-20 MOVW $0, R0 MOVB runtime·useQPCTime(SB), R0 CMP $0, R0 @@ -86,5 +86,5 @@ wall: MOVW R1,nsec+8(FP) RET useQPC: - B runtime·nowQPC(SB) // tail call + RET runtime·nowQPC(SB) // tail call diff --git a/src/runtime/time_windows_arm64.s b/src/runtime/time_windows_arm64.s index e0c7d28e153f7..7943d6b46dcbc 100644 --- a/src/runtime/time_windows_arm64.s +++ b/src/runtime/time_windows_arm64.s @@ -8,7 +8,7 @@ #include "textflag.h" #include "time_windows.h" -TEXT time·now(SB),NOSPLIT|NOFRAME,$0-24 +TEXT time·now(SB),NOSPLIT,$0-24 MOVB runtime·useQPCTime(SB), R0 CMP $0, R0 BNE useQPC @@ -43,5 +43,5 @@ TEXT time·now(SB),NOSPLIT|NOFRAME,$0-24 MOVW R0, nsec+8(FP) RET useQPC: - B runtime·nowQPC(SB) // tail call + RET runtime·nowQPC(SB) // tail call