Skip to content

Commit

Permalink
Release 0.15.2 (#751)
Browse files Browse the repository at this point in the history
* cpu: Support processor-less (memory-only) NUMA nodes (#734)

* cpu: Support processor-less (memory-only) NUMA nodes

Processor-less (memory-only) NUMA nodes exist e.g. in systems that use
Intel Optane drives for RAM expansion using Intel Memory Drive
Technology (IMDT).

IMDT RAM expansion supports two modes:

* "Unify Remote Memory domains": present a processor-less (memory-only)
  NUMA domain, which is the default
* "Expand local memory domains": to expand each processor’s memory domain
  with a portion of the memory made available by Optane and IMDT

This commit fixes a crash in the first case (when "cpulist" is empty).

Here's an example of such a system:

$ numastat -m|head -n5

Per-node system memory usage (in MBs):
                          Node 0          Node 1          Node 2           Total
                 --------------- --------------- --------------- ---------------
MemTotal               118239.56       130816.00       464384.00       713439.56

$ for i in {0..2}; do echo -n "$i: " ; cat /sys/bus/node/devices/node$i/cpulist ; done
0: 0-7,16-23
1: 8-15,24-31
2:

$ /opt/vsmp/bin/vsmpversion -vvv
Memory Drive Technology: 8.2.1455.74 (Sep 28 2017 13:09:59)
System configuration:
    Boards:      3
       1 x Proc. + I/O + Memory
       2 x NVM devices (Intel SSDPED1K375GAQ)
    Processors:  2, Cores: 16, Threads: 32
        Intel(R) Xeon(R) CPU E5-2667 v4 @ 3.20GHz Stepping 01
    Memory (MB): 713472 (of 977450), Cache: 251416, Private: 12562
       1 x 249088MB   [262036/   678/12270]
       1 x 232192MB   [357707/125369/  146]  82:00.0#1
       1 x 232192MB   [357707/125369/  146]  83:00.0#1

* cpu: rename some variables (pkg => node)

* cpu: Use %v not %q in log.Debugf() format strings

* Release v0.15.2
  • Loading branch information
SuperQ authored Dec 6, 2017
1 parent 4a6d2cd commit 9a5e842
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 15 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## v0.15.2 / 2017-12-06

* [BUGFIX] cpu: Support processor-less (memory-only) NUMA nodes #734

## v0.15.1 / 2017-11-07

* [BUGFIX] xfs: expose correct fields, fix metric names #708
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.15.1
0.15.2
35 changes: 21 additions & 14 deletions collector/cpu_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
_, cpuname := filepath.Split(cpu)

if _, err := os.Stat(filepath.Join(cpu, "cpufreq")); os.IsNotExist(err) {
log.Debugf("CPU %q is missing cpufreq", cpu)
log.Debugf("CPU %v is missing cpufreq", cpu)
} else {
// sysfs cpufreq values are kHz, thus multiply by 1000 to export base units (hz).
// See https://www.kernel.org/doc/Documentation/cpu-freq/user-guide.txt
Expand All @@ -132,7 +132,7 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
}

if _, err := os.Stat(filepath.Join(cpu, "thermal_throttle")); os.IsNotExist(err) {
log.Debugf("CPU %q is missing thermal_throttle", cpu)
log.Debugf("CPU %v is missing thermal_throttle", cpu)
continue
}
if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle", "core_throttle_count")); err != nil {
Expand All @@ -141,36 +141,43 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuname)
}

pkgs, err := filepath.Glob(sysFilePath("bus/node/devices/node[0-9]*"))
nodes, err := filepath.Glob(sysFilePath("bus/node/devices/node[0-9]*"))
if err != nil {
return err
}

// package/node loop
for _, pkg := range pkgs {
if _, err := os.Stat(filepath.Join(pkg, "cpulist")); os.IsNotExist(err) {
log.Debugf("package %q is missing cpulist", pkg)
// package / NUMA node loop
for _, node := range nodes {
if _, err := os.Stat(filepath.Join(node, "cpulist")); os.IsNotExist(err) {
log.Debugf("NUMA node %v is missing cpulist", node)
continue
}
cpulist, err := ioutil.ReadFile(filepath.Join(pkg, "cpulist"))
cpulist, err := ioutil.ReadFile(filepath.Join(node, "cpulist"))
if err != nil {
log.Debugf("could not read cpulist of package %q", pkg)
log.Debugf("could not read cpulist of NUMA node %v", node)
return err
}
// cpulist example of one package/node with HT: "0-11,24-35"
line := strings.Split(string(cpulist), "\n")[0]
if line == "" {
// Skip processor-less (memory-only) NUMA nodes.
// E.g. RAM expansion with Intel Optane Drive(s) using
// Intel Memory Drive Technology (IMDT).
log.Debugf("skipping processor-less (memory-only) NUMA node %v", node)
continue
}
firstCPU := strings.FieldsFunc(line, func(r rune) bool {
return r == '-' || r == ','
})[0]
if _, err := os.Stat(filepath.Join(pkg, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); os.IsNotExist(err) {
log.Debugf("Package %q CPU %q is missing package_throttle", pkg, firstCPU)
if _, err := os.Stat(filepath.Join(node, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); os.IsNotExist(err) {
log.Debugf("Node %v CPU %v is missing package_throttle", node, firstCPU)
continue
}
if value, err = readUintFromFile(filepath.Join(pkg, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); err != nil {
if value, err = readUintFromFile(filepath.Join(node, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); err != nil {
return err
}
pkgno := digitRegexp.FindAllString(pkg, 1)[0]
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), pkgno)
nodeno := digitRegexp.FindAllString(node, 1)[0]
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), nodeno)
}

return nil
Expand Down
8 changes: 8 additions & 0 deletions collector/fixtures/sys.ttar
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,14 @@ Mode: 644
Path: sys/bus/node/devices/node0/cpulist
Lines: 1
0-3
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/bus/node/devices/node1
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/bus/node/devices/node1/cpulist
Lines: 1

Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/class
Expand Down

0 comments on commit 9a5e842

Please sign in to comment.