From d76453ac5eb1e4d6de198e3c2f35123e47cdbd87 Mon Sep 17 00:00:00 2001 From: hc-github-team-nomad-core <82989552+hc-github-team-nomad-core@users.noreply.github.com> Date: Tue, 28 Mar 2023 09:28:22 -0400 Subject: [PATCH] Backport of client/fingerprint: correctly fingerprint E/P cores of Apple Silicon chips into release/1.5.x (#16690) This pull request was automerged via backport-assistant --- .changelog/16672.txt | 3 + api/go.mod | 2 +- api/go.sum | 4 +- client/fingerprint/cpu.go | 155 +++++++++++------- client/fingerprint/cpu_darwin_test.go | 45 +++++ client/fingerprint/cpu_default.go | 5 +- .../{cpu_test.go => cpu_default_test.go} | 58 +++---- client/fingerprint/cpu_linux.go | 9 +- client/stats/cpu.go | 4 +- drivers/docker/util/stats_posix.go | 2 +- drivers/docker/util/stats_windows.go | 2 +- go.mod | 3 +- go.sum | 6 +- helper/stats/cpu.go | 108 +++++++----- helper/stats/cpu_darwin_test.go | 35 ++++ 15 files changed, 296 insertions(+), 145 deletions(-) create mode 100644 .changelog/16672.txt create mode 100644 client/fingerprint/cpu_darwin_test.go rename client/fingerprint/{cpu_test.go => cpu_default_test.go} (73%) create mode 100644 helper/stats/cpu_darwin_test.go diff --git a/.changelog/16672.txt b/.changelog/16672.txt new file mode 100644 index 00000000000..ce825678c1e --- /dev/null +++ b/.changelog/16672.txt @@ -0,0 +1,3 @@ +```release-note:improvement +fingerprint/cpu: correctly fingerprint P/E cores of Apple Silicon chips +``` diff --git a/api/go.mod b/api/go.mod index 46a26d2bc21..07878f4899a 100644 --- a/api/go.mod +++ b/api/go.mod @@ -11,7 +11,7 @@ require ( github.com/hashicorp/go-rootcerts v1.0.2 github.com/mitchellh/go-testing-interface v1.14.1 github.com/mitchellh/mapstructure v1.5.0 - github.com/shoenig/test v0.6.2 + github.com/shoenig/test v0.6.3 golang.org/x/exp v0.0.0-20230108222341-4b8118a2686a ) diff --git a/api/go.sum b/api/go.sum index 6a8d2f695ee..774b4b80682 100644 --- a/api/go.sum +++ b/api/go.sum @@ -25,8 +25,8 @@ github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyua github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/shoenig/test v0.6.2 h1:tdq+WGnznwE5xcOMXkqqXuudK75RkSGBazBGcP1lX6w= -github.com/shoenig/test v0.6.2/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= +github.com/shoenig/test v0.6.3 h1:GVXWJFk9PiOjN0KoJ7VrJGH6uLPnqxR7/fe3HUPfE0c= +github.com/shoenig/test v0.6.3/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= diff --git a/client/fingerprint/cpu.go b/client/fingerprint/cpu.go index 3bc886805f5..b4943549c80 100644 --- a/client/fingerprint/cpu.go +++ b/client/fingerprint/cpu.go @@ -6,7 +6,7 @@ import ( "github.com/hashicorp/nomad/lib/cpuset" - log "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-hclog" "github.com/hashicorp/nomad/helper/stats" "github.com/hashicorp/nomad/nomad/structs" ) @@ -22,85 +22,128 @@ const ( // CPUFingerprint is used to fingerprint the CPU type CPUFingerprint struct { StaticFingerprinter - logger log.Logger + logger hclog.Logger + + // accumulates result in these resource structs + resources *structs.Resources + nodeResources *structs.NodeResources } // NewCPUFingerprint is used to create a CPU fingerprint -func NewCPUFingerprint(logger log.Logger) Fingerprint { - f := &CPUFingerprint{logger: logger.Named("cpu")} - return f +func NewCPUFingerprint(logger hclog.Logger) Fingerprint { + return &CPUFingerprint{ + logger: logger.Named("cpu"), + resources: new(structs.Resources), // COMPAT (to be removed after 0.10) + nodeResources: new(structs.NodeResources), + } } -func (f *CPUFingerprint) Fingerprint(req *FingerprintRequest, resp *FingerprintResponse) error { - cfg := req.Config - setResourcesCPU := func(totalCompute int, totalCores uint16, reservableCores []uint16) { - // COMPAT(0.10): Remove in 0.10 - resp.Resources = &structs.Resources{ - CPU: totalCompute, - } +func (f *CPUFingerprint) Fingerprint(request *FingerprintRequest, response *FingerprintResponse) error { + f.initialize() - resp.NodeResources = &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: int64(totalCompute), - TotalCpuCores: totalCores, - ReservableCpuCores: reservableCores, - }, - } - } + f.setModelName(response) + + f.setFrequency(response) + + f.setCoreCount(response) + + f.setReservableCores(request, response) + + f.setTotalCompute(request, response) + + f.setResponseResources(response) + + response.Detected = true + + return nil +} +func (f *CPUFingerprint) initialize() { if err := stats.Init(); err != nil { f.logger.Warn("failed initializing stats collector", "error", err) } +} +func (f *CPUFingerprint) setModelName(response *FingerprintResponse) { if modelName := stats.CPUModelName(); modelName != "" { - resp.AddAttribute("cpu.modelname", modelName) + response.AddAttribute("cpu.modelname", modelName) + f.logger.Debug("detected CPU model", "name", modelName) } +} + +func (*CPUFingerprint) frequency(mhz uint64) string { + return fmt.Sprintf("%.0f", float64(mhz)) +} - if mhz := stats.CPUMHzPerCore(); mhz > 0 { - resp.AddAttribute("cpu.frequency", fmt.Sprintf("%.0f", mhz)) - f.logger.Debug("detected cpu frequency", "MHz", log.Fmt("%.0f", mhz)) +func (f *CPUFingerprint) setFrequency(response *FingerprintResponse) { + power, efficiency := stats.CPUMHzPerCore() + switch { + case efficiency > 0: + response.AddAttribute("cpu.frequency.efficiency", f.frequency(efficiency)) + response.AddAttribute("cpu.frequency.power", f.frequency(power)) + f.logger.Debug("detected CPU efficiency core speed", "mhz", efficiency) + f.logger.Debug("detected CPU power core speed", "mhz", power) + case power > 0: + response.AddAttribute("cpu.frequency", f.frequency(power)) + f.logger.Debug("detected CPU frequency", "mhz", power) } +} + +func (*CPUFingerprint) cores(count int) string { + return strconv.Itoa(count) +} - var numCores int - if numCores = stats.CPUNumCores(); numCores > 0 { - resp.AddAttribute("cpu.numcores", strconv.Itoa(numCores)) - f.logger.Debug("detected core count", "cores", numCores) +func (f *CPUFingerprint) setCoreCount(response *FingerprintResponse) { + power, efficiency := stats.CPUNumCores() + switch { + case efficiency > 0: + response.AddAttribute("cpu.numcores.efficiency", f.cores(efficiency)) + response.AddAttribute("cpu.numcores.power", f.cores(power)) + f.logger.Debug("detected CPU efficiency core count", "cores", efficiency) + f.logger.Debug("detected CPU power core count", "cores", power) + case power > 0: + response.AddAttribute("cpu.numcores", f.cores(power)) + f.logger.Debug("detected CPU core count", power) } + f.nodeResources.Cpu.TotalCpuCores = uint16(power + efficiency) +} - var reservableCores []uint16 - if req.Config.ReservableCores != nil { - reservableCores = req.Config.ReservableCores - f.logger.Debug("reservable cores set by config", "cpuset", reservableCores) +func (f *CPUFingerprint) setReservableCores(request *FingerprintRequest, response *FingerprintResponse) { + reservable := request.Config.ReservableCores + if len(reservable) > 0 { + f.logger.Debug("reservable cores set by config", "cpuset", reservable) } else { - if cores, err := f.deriveReservableCores(req); err != nil { - f.logger.Warn("failed to detect set of reservable cores", "error", err) - } else { - if req.Node.ReservedResources != nil { - reservableCores = cpuset.New(cores...).Difference(cpuset.New(req.Node.ReservedResources.Cpu.ReservedCpuCores...)).ToSlice() + cgroupParent := request.Config.CgroupParent + if reservable = f.deriveReservableCores(cgroupParent); reservable != nil { + if request.Node.ReservedResources != nil { + forNode := request.Node.ReservedResources.Cpu.ReservedCpuCores + reservable = cpuset.New(reservable...).Difference(cpuset.New(forNode...)).ToSlice() + f.logger.Debug("client configuration reserves these cores for node", "cores", forNode) } - f.logger.Debug("detected reservable cores", "cpuset", reservableCores) + f.logger.Debug("set of reservable cores available for tasks", "cores", reservable) } } - resp.AddAttribute("cpu.reservablecores", strconv.Itoa(len(reservableCores))) - tt := int(stats.TotalTicksAvailable()) - if cfg.CpuCompute > 0 { - f.logger.Debug("using user specified cpu compute", "cpu_compute", cfg.CpuCompute) - tt = cfg.CpuCompute - } + response.AddAttribute("cpu.reservablecores", strconv.Itoa(len(reservable))) + f.nodeResources.Cpu.ReservableCpuCores = reservable +} - // If we cannot detect the cpu total compute, fallback to a very low default - // value and log a message about configuring cpu_total_compute. This happens - // on Graviton instances where CPU information is unavailable. In that case, - // the env_aws fingerprinter updates the value with correct information. - if tt == 0 { - f.logger.Info("fallback to default cpu total compute, set client config option cpu_total_compute to override") - tt = defaultCPUTicks +func (f *CPUFingerprint) setTotalCompute(request *FingerprintRequest, response *FingerprintResponse) { + var ticks uint64 + switch { + case request.Config.CpuCompute > 0: + ticks = uint64(request.Config.CpuCompute) + case stats.TotalTicksAvailable() > 0: + ticks = stats.TotalTicksAvailable() + default: + ticks = defaultCPUTicks } + response.AddAttribute("cpu.totalcompute", fmt.Sprintf("%d", ticks)) + f.resources.CPU = int(ticks) + f.nodeResources.Cpu.CpuShares = int64(ticks) +} - resp.AddAttribute("cpu.totalcompute", fmt.Sprintf("%d", tt)) - setResourcesCPU(tt, uint16(numCores), reservableCores) - resp.Detected = true - - return nil +func (f *CPUFingerprint) setResponseResources(response *FingerprintResponse) { + response.Resources = f.resources + response.NodeResources = f.nodeResources } diff --git a/client/fingerprint/cpu_darwin_test.go b/client/fingerprint/cpu_darwin_test.go new file mode 100644 index 00000000000..aa5ce13ae89 --- /dev/null +++ b/client/fingerprint/cpu_darwin_test.go @@ -0,0 +1,45 @@ +//go:build darwin && arm64 && cgo + +package fingerprint + +import ( + "testing" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/config" + "github.com/hashicorp/nomad/helper/testlog" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +func TestCPUFingerprint_AppleSilicon(t *testing.T) { + ci.Parallel(t) + + f := NewCPUFingerprint(testlog.HCLogger(t)) + node := &structs.Node{Attributes: make(map[string]string)} + + request := &FingerprintRequest{Config: new(config.Config), Node: node} + var response FingerprintResponse + + err := f.Fingerprint(request, &response) + must.NoError(t, err) + + must.True(t, response.Detected) + + attributes := response.Attributes + must.NotNil(t, attributes) + must.MapContainsKey(t, attributes, "cpu.modelname") + must.MapContainsKey(t, attributes, "cpu.numcores.power") + must.MapContainsKey(t, attributes, "cpu.numcores.efficiency") + must.MapContainsKey(t, attributes, "cpu.frequency.power") + must.MapContainsKey(t, attributes, "cpu.frequency.efficiency") + must.MapContainsKey(t, attributes, "cpu.totalcompute") + must.Positive(t, response.Resources.CPU) + must.Positive(t, response.NodeResources.Cpu.CpuShares) + must.Positive(t, response.NodeResources.Cpu.SharesPerCore()) + must.SliceEmpty(t, response.NodeResources.Cpu.ReservableCpuCores) + + // not included for mixed core types (that we can detect) + must.MapNotContainsKey(t, attributes, "cpu.numcores") + must.MapNotContainsKey(t, attributes, "cpu.frequency") +} diff --git a/client/fingerprint/cpu_default.go b/client/fingerprint/cpu_default.go index 1b266c3c671..40bcf43cdca 100644 --- a/client/fingerprint/cpu_default.go +++ b/client/fingerprint/cpu_default.go @@ -1,8 +1,7 @@ //go:build !linux -// +build !linux package fingerprint -func (f *CPUFingerprint) deriveReservableCores(req *FingerprintRequest) ([]uint16, error) { - return nil, nil +func (_ *CPUFingerprint) deriveReservableCores(string) []uint16 { + return nil } diff --git a/client/fingerprint/cpu_test.go b/client/fingerprint/cpu_default_test.go similarity index 73% rename from client/fingerprint/cpu_test.go rename to client/fingerprint/cpu_default_test.go index bc378595550..7e0bbf48c00 100644 --- a/client/fingerprint/cpu_test.go +++ b/client/fingerprint/cpu_default_test.go @@ -1,3 +1,5 @@ +//go:build !darwin || !arm64 || !cgo + package fingerprint import ( @@ -8,54 +10,36 @@ import ( "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" ) -func TestCPUFingerprint(t *testing.T) { +func TestCPUFingerprint_Classic(t *testing.T) { ci.Parallel(t) f := NewCPUFingerprint(testlog.HCLogger(t)) - node := &structs.Node{ - Attributes: make(map[string]string), - } + node := &structs.Node{Attributes: make(map[string]string)} request := &FingerprintRequest{Config: &config.Config{}, Node: node} var response FingerprintResponse - err := f.Fingerprint(request, &response) - if err != nil { - t.Fatalf("err: %v", err) - } - if !response.Detected { - t.Fatalf("expected response to be applicable") - } + err := f.Fingerprint(request, &response) + must.NoError(t, err) - // CPU info + must.True(t, response.Detected) attributes := response.Attributes - if attributes == nil { - t.Fatalf("expected attributes to be initialized") - } - if attributes["cpu.numcores"] == "" { - t.Fatalf("Missing Num Cores") - } - if attributes["cpu.modelname"] == "" { - t.Fatalf("Missing Model Name") - } - - if attributes["cpu.frequency"] == "" { - t.Fatalf("Missing CPU Frequency") - } - if attributes["cpu.totalcompute"] == "" { - t.Fatalf("Missing CPU Total Compute") - } - - // COMPAT(0.10): Remove in 0.10 - if response.Resources == nil || response.Resources.CPU == 0 { - t.Fatalf("Expected to find CPU Resources") - } - - if response.NodeResources == nil || response.NodeResources.Cpu.CpuShares == 0 { - t.Fatalf("Expected to find CPU Resources") - } + must.NotNil(t, attributes) + must.MapContainsKey(t, attributes, "cpu.numcores") + must.MapContainsKey(t, attributes, "cpu.modelname") + must.MapContainsKey(t, attributes, "cpu.frequency") + must.MapContainsKey(t, attributes, "cpu.totalcompute") + must.Positive(t, response.Resources.CPU) + must.Positive(t, response.NodeResources.Cpu.CpuShares) + must.Positive(t, response.NodeResources.Cpu.SharesPerCore()) + must.SliceNotEmpty(t, response.NodeResources.Cpu.ReservableCpuCores) + + // asymetric core detection currently only works with apple silicon + must.MapNotContainsKey(t, attributes, "cpu.numcores.power") + must.MapNotContainsKey(t, attributes, "cpu.numcores.efficiency") } // TestCPUFingerprint_OverrideCompute asserts that setting cpu_total_compute in diff --git a/client/fingerprint/cpu_linux.go b/client/fingerprint/cpu_linux.go index 14c80faa3aa..aea8c347273 100644 --- a/client/fingerprint/cpu_linux.go +++ b/client/fingerprint/cpu_linux.go @@ -4,9 +4,14 @@ import ( "github.com/hashicorp/nomad/client/lib/cgutil" ) -func (f *CPUFingerprint) deriveReservableCores(req *FingerprintRequest) ([]uint16, error) { +func (f *CPUFingerprint) deriveReservableCores(cgroupParent string) []uint16 { // The cpuset cgroup manager is initialized (on linux), but not accessible // from the finger-printer. So we reach in and grab the information manually. // We may assume the hierarchy is already setup. - return cgutil.GetCPUsFromCgroup(req.Config.CgroupParent) + cpuset, err := cgutil.GetCPUsFromCgroup(cgroupParent) + if err != nil { + f.logger.Warn("failed to detect set of reservable cores", "error", err) + return nil + } + return cpuset } diff --git a/client/stats/cpu.go b/client/stats/cpu.go index a1393aeb773..1986b05887f 100644 --- a/client/stats/cpu.go +++ b/client/stats/cpu.go @@ -48,7 +48,7 @@ func (c *CpuStats) Percent(cpuTime float64) float64 { // TicksConsumed calculates the total ticks consumes by the process across all // cpu cores func (c *CpuStats) TicksConsumed(percent float64) float64 { - return (percent / 100) * shelpers.TotalTicksAvailable() / float64(c.totalCpus) + return (percent / 100) * float64(shelpers.TotalTicksAvailable()) / float64(c.totalCpus) } func (c *CpuStats) calculatePercent(t1, t2 float64, timeDelta int64) float64 { @@ -83,7 +83,7 @@ func (h *HostStatsCollector) collectCPUStats() (cpus []*CPUStats, totalTicks flo Idle: idle, Total: total, } - ticksConsumed += (total / 100.0) * (shelpers.TotalTicksAvailable() / float64(len(cpuStats))) + ticksConsumed += (total / 100.0) * (float64(shelpers.TotalTicksAvailable()) / float64(len(cpuStats))) } return cs, ticksConsumed, nil diff --git a/drivers/docker/util/stats_posix.go b/drivers/docker/util/stats_posix.go index 3a9f03f8f53..ae086d475ba 100644 --- a/drivers/docker/util/stats_posix.go +++ b/drivers/docker/util/stats_posix.go @@ -53,7 +53,7 @@ func DockerStatsToTaskResourceUsage(s *docker.Stats) *cstructs.TaskResourceUsage cs.UserMode = CalculateCPUPercent( s.CPUStats.CPUUsage.UsageInUsermode, s.PreCPUStats.CPUUsage.UsageInUsermode, s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, runtime.NumCPU()) - cs.TotalTicks = (cs.Percent / 100) * stats.TotalTicksAvailable() / float64(runtime.NumCPU()) + cs.TotalTicks = (cs.Percent / 100) * float64(stats.TotalTicksAvailable()) / float64(runtime.NumCPU()) return &cstructs.TaskResourceUsage{ ResourceUsage: &cstructs.ResourceUsage{ diff --git a/drivers/docker/util/stats_windows.go b/drivers/docker/util/stats_windows.go index 057edd399dc..9d4b5e81ce7 100644 --- a/drivers/docker/util/stats_windows.go +++ b/drivers/docker/util/stats_windows.go @@ -42,7 +42,7 @@ func DockerStatsToTaskResourceUsage(s *docker.Stats) *cstructs.TaskResourceUsage ThrottledPeriods: s.CPUStats.ThrottlingData.ThrottledPeriods, ThrottledTime: s.CPUStats.ThrottlingData.ThrottledTime, Percent: cpuPercent, - TotalTicks: (cpuPercent / 100) * stats.TotalTicksAvailable() / float64(runtime.NumCPU()), + TotalTicks: (cpuPercent / 100) * float64(stats.TotalTicksAvailable()) / float64(runtime.NumCPU()), Measured: DockerMeasuredCPUStats, } diff --git a/go.mod b/go.mod index 76178e8b452..77ece916e31 100644 --- a/go.mod +++ b/go.mod @@ -108,7 +108,8 @@ require ( github.com/ryanuber/go-glob v1.0.0 github.com/shirou/gopsutil/v3 v3.23.1 github.com/shoenig/go-landlock v0.1.5 - github.com/shoenig/test v0.6.2 + github.com/shoenig/go-m1cpu v0.1.4 + github.com/shoenig/test v0.6.3 github.com/skratchdot/open-golang v0.0.0-20160302144031-75fb7ed4208c github.com/stretchr/testify v1.8.1 github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 diff --git a/go.sum b/go.sum index ad1890960df..e7e9ccee149 100644 --- a/go.sum +++ b/go.sum @@ -1310,8 +1310,10 @@ github.com/shirou/gopsutil/v3 v3.23.1 h1:a9KKO+kGLKEvcPIs4W62v0nu3sciVDOOOPUD0Hz github.com/shirou/gopsutil/v3 v3.23.1/go.mod h1:NN6mnm5/0k8jw4cBfCnJtr5L7ErOTg18tMNpgFkn0hA= github.com/shoenig/go-landlock v0.1.5 h1:0a/YjKzqdbll7f/iztN/6pKRSHJEmm8olFWD8xSM86A= github.com/shoenig/go-landlock v0.1.5/go.mod h1:CxztF/8LRAUKUMguGxGTQBJIBiiawxx/BKYVGrHg1/0= -github.com/shoenig/test v0.6.2 h1:tdq+WGnznwE5xcOMXkqqXuudK75RkSGBazBGcP1lX6w= -github.com/shoenig/test v0.6.2/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= +github.com/shoenig/go-m1cpu v0.1.4 h1:SZPIgRM2sEF9NJy50mRHu9PKGwxyyTTJIWvCtgVbozs= +github.com/shoenig/go-m1cpu v0.1.4/go.mod h1:Wwvst4LR89UxjeFtLRMrpgRiyY4xPsejnVZym39dbAQ= +github.com/shoenig/test v0.6.3 h1:GVXWJFk9PiOjN0KoJ7VrJGH6uLPnqxR7/fe3HUPfE0c= +github.com/shoenig/test v0.6.3/go.mod h1:byHiCGXqrVaflBLAMq/srcZIHynQPQgeyvkvXnjqq0k= github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ= github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= diff --git a/helper/stats/cpu.go b/helper/stats/cpu.go index 5e182bc1c7a..59cb6d12d02 100644 --- a/helper/stats/cpu.go +++ b/helper/stats/cpu.go @@ -2,13 +2,13 @@ package stats import ( "context" + "errors" "fmt" - "math" "sync" "time" - multierror "github.com/hashicorp/go-multierror" "github.com/shirou/gopsutil/v3/cpu" + "github.com/shoenig/go-m1cpu" ) const ( @@ -19,63 +19,97 @@ const ( ) var ( - cpuMhzPerCore float64 - cpuModelName string - cpuNumCores int - cpuTotalTicks float64 + cpuPowerCoreCount int + cpuPowerCoreMHz uint64 + cpuEfficiencyCoreCount int + cpuEfficiencyCoreMHz uint64 + cpuTotalTicks uint64 + cpuModelName string +) +var ( initErr error onceLer sync.Once ) func Init() error { onceLer.Do(func() { - var merrs *multierror.Error - var err error - if cpuNumCores, err = cpu.Counts(true); err != nil { - merrs = multierror.Append(merrs, fmt.Errorf("Unable to determine the number of CPU cores available: %v", err)) - } + switch { + case m1cpu.IsAppleSilicon(): + cpuModelName = m1cpu.ModelName() + cpuPowerCoreCount = m1cpu.PCoreCount() + cpuPowerCoreMHz = m1cpu.PCoreHz() / 1_000_000 + cpuEfficiencyCoreCount = m1cpu.ECoreCount() + cpuEfficiencyCoreMHz = m1cpu.ECoreHz() / 1_000_000 + bigTicks := uint64(cpuPowerCoreCount) * cpuPowerCoreMHz + littleTicks := uint64(cpuEfficiencyCoreCount) * cpuEfficiencyCoreMHz + cpuTotalTicks = bigTicks + littleTicks + default: + // for now, all other cpu types assume only power cores + // todo: this is already not true for Intel 13th generation - var cpuInfo []cpu.InfoStat - ctx, cancel := context.WithTimeout(context.Background(), cpuInfoTimeout) - defer cancel() - if cpuInfo, err = cpu.InfoWithContext(ctx); err != nil { - merrs = multierror.Append(merrs, fmt.Errorf("Unable to obtain CPU information: %v", err)) - } + var err error + if cpuPowerCoreCount, err = cpu.Counts(true); err != nil { + initErr = errors.Join(initErr, fmt.Errorf("failed to detect number of CPU cores: %w", err)) + } - for _, cpu := range cpuInfo { - cpuModelName = cpu.ModelName - cpuMhzPerCore = cpu.Mhz - break - } + ctx, cancel := context.WithTimeout(context.Background(), cpuInfoTimeout) + defer cancel() + + var cpuInfoStats []cpu.InfoStat + if cpuInfoStats, err = cpu.InfoWithContext(ctx); err != nil { + initErr = errors.Join(initErr, fmt.Errorf("Unable to obtain CPU information: %w", err)) + } - // Floor all of the values such that small difference don't cause the - // node to fall into a unique computed node class - cpuMhzPerCore = math.Floor(cpuMhzPerCore) - cpuTotalTicks = math.Floor(float64(cpuNumCores) * cpuMhzPerCore) + for _, infoStat := range cpuInfoStats { + cpuModelName = infoStat.ModelName + cpuPowerCoreMHz = uint64(infoStat.Mhz) + break + } - // Set any errors that occurred - initErr = merrs.ErrorOrNil() + // compute ticks using only power core, until we add support for + // detecting little cores on non-apple platforms + cpuTotalTicks = uint64(cpuPowerCoreCount) * cpuPowerCoreMHz + + initErr = err + } }) return initErr } -// CPUNumCores returns the number of CPU cores available -func CPUNumCores() int { - return cpuNumCores +// CPUNumCores returns the number of CPU cores available. +// +// This is represented with two values - (Power (P), Efficiency (E)) so we can +// correctly compute total compute for processors with asymetric cores such as +// Apple Silicon. +// +// For platforms with symetric cores (or where we do not correcly detect asymetric +// cores), all cores are presented as P cores. +func CPUNumCores() (int, int) { + return cpuPowerCoreCount, cpuEfficiencyCoreCount } -// CPUMHzPerCore returns the MHz per CPU core -func CPUMHzPerCore() float64 { - return cpuMhzPerCore +// CPUMHzPerCore returns the MHz per CPU (P, E) core type. +// +// As with CPUNumCores, asymetric core detection currently only works with +// Apple Silicon CPUs. +func CPUMHzPerCore() (uint64, uint64) { + return cpuPowerCoreMHz, cpuEfficiencyCoreMHz } -// CPUModelName returns the model name of the CPU +// CPUModelName returns the model name of the CPU. func CPUModelName() string { return cpuModelName } -// TotalTicksAvailable calculates the total Mhz available across all cores -func TotalTicksAvailable() float64 { +// TotalTicksAvailable calculates the total MHz available across all cores. +// +// Where asymetric cores are correctly detected, the total ticks is the sum of +// the performance across both core types. +// +// Where asymetric cores are not correctly detected (such as Intel 13th gen), +// the total ticks available is over-estimated, as we assume all cores are P +// cores. +func TotalTicksAvailable() uint64 { return cpuTotalTicks } diff --git a/helper/stats/cpu_darwin_test.go b/helper/stats/cpu_darwin_test.go new file mode 100644 index 00000000000..34a99a65e8f --- /dev/null +++ b/helper/stats/cpu_darwin_test.go @@ -0,0 +1,35 @@ +//go:build darwin && arm64 && cgo + +package stats + +import ( + "testing" + + "github.com/shoenig/test/must" +) + +func TestCPU_Init(t *testing.T) { + must.NoError(t, Init()) +} + +func TestCPU_CPUNumCores(t *testing.T) { + big, little := CPUNumCores() + must.Between(t, 4, big, 32) + must.Between(t, 2, little, 8) +} + +func TestCPU_CPUMHzPerCore(t *testing.T) { + big, little := CPUMHzPerCore() + must.Between(t, 3_000, big, 6_000) + must.Between(t, 2_000, little, 4_000) +} + +func TestCPU_CPUModelName(t *testing.T) { + name := CPUModelName() + must.NotEq(t, "", name) +} + +func TestCPU_CPUTotalTicksAvailable(t *testing.T) { + ticks := TotalTicksAvailable() + must.Positive(t, ticks) +}