From 8f2aa9fb488f140f155cdc02b438533a5c75eeed Mon Sep 17 00:00:00 2001 From: Vaughn Iverson Date: Mon, 14 Oct 2024 15:29:07 -0700 Subject: [PATCH] cpu: conditionally re-enable AVX512 support on darwin/amd64 Darwin opmask clobbering bug was fixed in kernel version 21.3.0 as released in MacOS 12.2.0. This commit resolves issue by checking for Darwin AVX512 support via a sysctl call with the addition of a kernel minimum version check. The kernel version check is completed without adding new dependencies to x/sys/cpu. A sysctl call is accomplished by copying a minimal amount of code from x/sys/unix, to retrieve only the needed KERN_OSRELEASE value. This code is structured in the same manner as an existing analogous AIX/PPC64 syscall. The resulting dotted version string value is then parsed for numeric comparison with a dependency free function. All code in this contribution is structured to ease removal of the special darwin/amd64 codepaths when that OS/arch combination is eventually no longer supported by golang. Resolves issue: golang/go#49233, reinstates fix for issue: golang/go#43089 Change-Id: I4755fc8b3865eb6562b0959ecc910e2c46ac6cb4 Reviewed-on: https://go-review.googlesource.com/c/sys/+/620256 Reviewed-by: Keith Randall Reviewed-by: Keith Randall LUCI-TryBot-Result: Go LUCI Reviewed-by: Carlos Amedee Reviewed-by: vsivsi@yahoo.com --- cpu/asm_darwin_x86_gc.s | 17 ++++++ cpu/cpu_darwin_x86.go | 61 ++++++++++++++++++++ cpu/cpu_gc_x86.go | 4 +- cpu/{cpu_x86.s => cpu_gc_x86.s} | 2 +- cpu/cpu_gccgo_x86.go | 6 -- cpu/cpu_other_x86.go | 11 ++++ cpu/cpu_x86.go | 6 +- cpu/syscall_darwin_x86_gc.go | 98 +++++++++++++++++++++++++++++++++ 8 files changed, 192 insertions(+), 13 deletions(-) create mode 100644 cpu/asm_darwin_x86_gc.s create mode 100644 cpu/cpu_darwin_x86.go rename cpu/{cpu_x86.s => cpu_gc_x86.s} (94%) create mode 100644 cpu/cpu_other_x86.go create mode 100644 cpu/syscall_darwin_x86_gc.go diff --git a/cpu/asm_darwin_x86_gc.s b/cpu/asm_darwin_x86_gc.s new file mode 100644 index 000000000..ec2acfe54 --- /dev/null +++ b/cpu/asm_darwin_x86_gc.s @@ -0,0 +1,17 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build darwin && amd64 && gc + +#include "textflag.h" + +TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_sysctl(SB) +GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8 +DATA ·libc_sysctl_trampoline_addr(SB)/8, $libc_sysctl_trampoline<>(SB) + +TEXT libc_sysctlbyname_trampoline<>(SB),NOSPLIT,$0-0 + JMP libc_sysctlbyname(SB) +GLOBL ·libc_sysctlbyname_trampoline_addr(SB), RODATA, $8 +DATA ·libc_sysctlbyname_trampoline_addr(SB)/8, $libc_sysctlbyname_trampoline<>(SB) diff --git a/cpu/cpu_darwin_x86.go b/cpu/cpu_darwin_x86.go new file mode 100644 index 000000000..b838cb9e9 --- /dev/null +++ b/cpu/cpu_darwin_x86.go @@ -0,0 +1,61 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build darwin && amd64 && gc + +package cpu + +// darwinSupportsAVX512 checks Darwin kernel for AVX512 support via sysctl +// call (see issue 43089). It also restricts AVX512 support for Darwin to +// kernel version 21.3.0 (MacOS 12.2.0) or later (see issue 49233). +// +// Background: +// Darwin implements a special mechanism to economize on thread state when +// AVX512 specific registers are not in use. This scheme minimizes state when +// preempting threads that haven't yet used any AVX512 instructions, but adds +// special requirements to check for AVX512 hardware support at runtime (e.g. +// via sysctl call or commpage inspection). See issue 43089 and link below for +// full background: +// https://github.com/apple-oss-distributions/xnu/blob/xnu-11215.1.10/osfmk/i386/fpu.c#L214-L240 +// +// Additionally, all versions of the Darwin kernel from 19.6.0 through 21.2.0 +// (corresponding to MacOS 10.15.6 - 12.1) have a bug that can cause corruption +// of the AVX512 mask registers (K0-K7) upon signal return. For this reason +// AVX512 is considered unsafe to use on Darwin for kernel versions prior to +// 21.3.0, where a fix has been confirmed. See issue 49233 for full background. +func darwinSupportsAVX512() bool { + return darwinSysctlEnabled([]byte("hw.optional.avx512f\x00")) && darwinKernelVersionCheck(21, 3, 0) +} + +// Ensure Darwin kernel version is at least major.minor.patch, avoiding dependencies +func darwinKernelVersionCheck(major, minor, patch int) bool { + var release [256]byte + err := darwinOSRelease(&release) + if err != nil { + return false + } + + var mmp [3]int + c := 0 +Loop: + for _, b := range release[:] { + switch { + case b >= '0' && b <= '9': + mmp[c] = 10*mmp[c] + int(b-'0') + case b == '.': + c++ + if c > 2 { + return false + } + case b == 0: + break Loop + default: + return false + } + } + if c != 2 { + return false + } + return mmp[0] > major || mmp[0] == major && (mmp[1] > minor || mmp[1] == minor && mmp[2] >= patch) +} diff --git a/cpu/cpu_gc_x86.go b/cpu/cpu_gc_x86.go index 910728fb1..32a44514e 100644 --- a/cpu/cpu_gc_x86.go +++ b/cpu/cpu_gc_x86.go @@ -6,10 +6,10 @@ package cpu -// cpuid is implemented in cpu_x86.s for gc compiler +// cpuid is implemented in cpu_gc_x86.s for gc compiler // and in cpu_gccgo.c for gccgo. func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) -// xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler +// xgetbv with ecx = 0 is implemented in cpu_gc_x86.s for gc compiler // and in cpu_gccgo.c for gccgo. func xgetbv() (eax, edx uint32) diff --git a/cpu/cpu_x86.s b/cpu/cpu_gc_x86.s similarity index 94% rename from cpu/cpu_x86.s rename to cpu/cpu_gc_x86.s index 7d7ba33ef..ce208ce6d 100644 --- a/cpu/cpu_x86.s +++ b/cpu/cpu_gc_x86.s @@ -18,7 +18,7 @@ TEXT ·cpuid(SB), NOSPLIT, $0-24 RET // func xgetbv() (eax, edx uint32) -TEXT ·xgetbv(SB),NOSPLIT,$0-8 +TEXT ·xgetbv(SB), NOSPLIT, $0-8 MOVL $0, CX XGETBV MOVL AX, eax+0(FP) diff --git a/cpu/cpu_gccgo_x86.go b/cpu/cpu_gccgo_x86.go index 99c60fe9f..170d21ddf 100644 --- a/cpu/cpu_gccgo_x86.go +++ b/cpu/cpu_gccgo_x86.go @@ -23,9 +23,3 @@ func xgetbv() (eax, edx uint32) { gccgoXgetbv(&a, &d) return a, d } - -// gccgo doesn't build on Darwin, per: -// https://github.com/Homebrew/homebrew-core/blob/HEAD/Formula/gcc.rb#L76 -func darwinSupportsAVX512() bool { - return false -} diff --git a/cpu/cpu_other_x86.go b/cpu/cpu_other_x86.go new file mode 100644 index 000000000..a0fd7e2f7 --- /dev/null +++ b/cpu/cpu_other_x86.go @@ -0,0 +1,11 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build 386 || amd64p32 || (amd64 && (!darwin || !gc)) + +package cpu + +func darwinSupportsAVX512() bool { + panic("only implemented for gc && amd64 && darwin") +} diff --git a/cpu/cpu_x86.go b/cpu/cpu_x86.go index c29f5e4c5..600a68078 100644 --- a/cpu/cpu_x86.go +++ b/cpu/cpu_x86.go @@ -92,10 +92,8 @@ func archInit() { osSupportsAVX = isSet(1, eax) && isSet(2, eax) if runtime.GOOS == "darwin" { - // Darwin doesn't save/restore AVX-512 mask registers correctly across signal handlers. - // Since users can't rely on mask register contents, let's not advertise AVX-512 support. - // See issue 49233. - osSupportsAVX512 = false + // Darwin requires special AVX512 checks, see cpu_darwin_x86.go + osSupportsAVX512 = osSupportsAVX && darwinSupportsAVX512() } else { // Check if OPMASK and ZMM registers have OS support. osSupportsAVX512 = osSupportsAVX && isSet(5, eax) && isSet(6, eax) && isSet(7, eax) diff --git a/cpu/syscall_darwin_x86_gc.go b/cpu/syscall_darwin_x86_gc.go new file mode 100644 index 000000000..4d0888b0c --- /dev/null +++ b/cpu/syscall_darwin_x86_gc.go @@ -0,0 +1,98 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Minimal copy of x/sys/unix so the cpu package can make a +// system call on Darwin without depending on x/sys/unix. + +//go:build darwin && amd64 && gc + +package cpu + +import ( + "syscall" + "unsafe" +) + +type _C_int int32 + +// adapted from unix.Uname() at x/sys/unix/syscall_darwin.go L419 +func darwinOSRelease(release *[256]byte) error { + // from x/sys/unix/zerrors_openbsd_amd64.go + const ( + CTL_KERN = 0x1 + KERN_OSRELEASE = 0x2 + ) + + mib := []_C_int{CTL_KERN, KERN_OSRELEASE} + n := unsafe.Sizeof(*release) + + return sysctl(mib, &release[0], &n, nil, 0) +} + +type Errno = syscall.Errno + +var _zero uintptr // Single-word zero for use when we need a valid pointer to 0 bytes. + +// from x/sys/unix/zsyscall_darwin_amd64.go L791-807 +func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) error { + var _p0 unsafe.Pointer + if len(mib) > 0 { + _p0 = unsafe.Pointer(&mib[0]) + } else { + _p0 = unsafe.Pointer(&_zero) + } + if _, _, err := syscall_syscall6( + libc_sysctl_trampoline_addr, + uintptr(_p0), + uintptr(len(mib)), + uintptr(unsafe.Pointer(old)), + uintptr(unsafe.Pointer(oldlen)), + uintptr(unsafe.Pointer(new)), + uintptr(newlen), + ); err != 0 { + return err + } + + return nil +} + +var libc_sysctl_trampoline_addr uintptr + +// adapted from internal/cpu/cpu_arm64_darwin.go +func darwinSysctlEnabled(name []byte) bool { + out := int32(0) + nout := unsafe.Sizeof(out) + if ret := sysctlbyname(&name[0], (*byte)(unsafe.Pointer(&out)), &nout, nil, 0); ret != nil { + return false + } + return out > 0 +} + +//go:cgo_import_dynamic libc_sysctl sysctl "/usr/lib/libSystem.B.dylib" + +var libc_sysctlbyname_trampoline_addr uintptr + +// adapted from runtime/sys_darwin.go in the pattern of sysctl() above, as defined in x/sys/unix +func sysctlbyname(name *byte, old *byte, oldlen *uintptr, new *byte, newlen uintptr) error { + if _, _, err := syscall_syscall6( + libc_sysctlbyname_trampoline_addr, + uintptr(unsafe.Pointer(name)), + uintptr(unsafe.Pointer(old)), + uintptr(unsafe.Pointer(oldlen)), + uintptr(unsafe.Pointer(new)), + uintptr(newlen), + 0, + ); err != 0 { + return err + } + + return nil +} + +//go:cgo_import_dynamic libc_sysctlbyname sysctlbyname "/usr/lib/libSystem.B.dylib" + +// Implemented in the runtime package (runtime/sys_darwin.go) +func syscall_syscall6(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err Errno) + +//go:linkname syscall_syscall6 syscall.syscall6