-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Ipmi.v2.schema #4450
Ipmi.v2.schema #4450
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,10 @@ If no servers are specified, the plugin will query the local machine sensor stat | |
``` | ||
ipmitool sdr | ||
``` | ||
or with the version 2 schema: | ||
``` | ||
ipmitool sdr elist | ||
``` | ||
|
||
When one or more servers are specified, the plugin will use the following command to collect remote host sensor stats: | ||
|
||
|
@@ -41,19 +45,36 @@ ipmitool -I lan -H SERVER -U USERID -P PASSW0RD sdr | |
|
||
## Timeout for the ipmitool command to complete. Default is 20 seconds. | ||
timeout = "20s" | ||
|
||
## Schema Version: (Optional, defaults to version 1) | ||
schemaVersion = 2 | ||
``` | ||
|
||
### Measurements | ||
|
||
Version 1 schema: | ||
- ipmi_sensor: | ||
- tags: | ||
- name | ||
- unit | ||
- host | ||
- server (only when retrieving stats from remote servers) | ||
- fields: | ||
- status (int) | ||
- status (int, 1=ok status_code/0=anything else) | ||
- value (float) | ||
|
||
Version 2 schema: | ||
- ipmi_sensor: | ||
- tags: | ||
- name | ||
- entity_id (can help uniquify duplicate names) | ||
- status_code (two letter code from IPMI documentation) | ||
- status_desc (extended status description field) | ||
- unit (only on analog values) | ||
- host | ||
- server (only when retrieving stats from remote) | ||
- fields: | ||
- value (float) | ||
|
||
#### Permissions | ||
|
||
|
@@ -68,24 +89,36 @@ KERNEL=="ipmi*", MODE="660", GROUP="telegraf" | |
|
||
### Example Output | ||
|
||
#### Version 1 Schema | ||
When retrieving stats from a remote server: | ||
``` | ||
ipmi_sensor,server=10.20.2.203,unit=degrees_c,name=ambient_temp status=1i,value=20 1458488465012559455 | ||
ipmi_sensor,server=10.20.2.203,unit=feet,name=altitude status=1i,value=80 1458488465012688613 | ||
ipmi_sensor,server=10.20.2.203,unit=watts,name=avg_power status=1i,value=220 1458488465012776511 | ||
ipmi_sensor,server=10.20.2.203,unit=volts,name=planar_3.3v status=1i,value=3.28 1458488465012861875 | ||
ipmi_sensor,server=10.20.2.203,unit=volts,name=planar_vbat status=1i,value=3.04 1458488465013072508 | ||
ipmi_sensor,server=10.20.2.203,unit=rpm,name=fan_1a_tach status=1i,value=2610 1458488465013137932 | ||
ipmi_sensor,server=10.20.2.203,unit=rpm,name=fan_1b_tach status=1i,value=1775 1458488465013279896 | ||
ipmi_sensor,server=10.20.2.203,name=uid_light,host=foo.bar.com value=0,status=1i 1517125513000000000 | ||
ipmi_sensor,server=10.20.2.203,name=sys._health_led,host=foo.bar.com status=1i,value=0 1517125513000000000 | ||
ipmi_sensor,server=10.20.2.203,unit=watts,host=foo.bar.com,name=power_supply_1 status=1i,value=110 1517125513000000000 | ||
ipmi_sensor,server=10.20.2.203,host=foo.bar.com,name=power_supply_2,unit=watts status=1i,value=120 1517125513000000000 | ||
ipmi_sensor,server=10.20.2.203,name=power_supplies,host=foo.bar.com value=0,status=1i 1517125513000000000 | ||
ipmi_sensor,server=10.20.2.203,name=fan_1,unit=percent,host=foo.bar.com status=1i,value=43.12 1517125513000000000 | ||
``` | ||
|
||
|
||
When retrieving stats from the local machine (no server specified): | ||
``` | ||
ipmi_sensor,name=uid_light,host=foo.bar.com value=0,status=1i 1517125513000000000 | ||
ipmi_sensor,name=sys._health_led,host=foo.bar.com status=1i,value=0 1517125513000000000 | ||
ipmi_sensor,unit=watts,host=foo.bar.com,name=power_supply_1 status=1i,value=110 1517125513000000000 | ||
ipmi_sensor,host=foo.bar.com,name=power_supply_2,unit=watts status=1i,value=120 1517125513000000000 | ||
ipmi_sensor,name=power_supplies,host=foo.bar.com value=0,status=1i 1517125513000000000 | ||
ipmi_sensor,name=fan_1,unit=percent,host=foo.bar.com status=1i,value=43.12 1517125513000000000 | ||
``` | ||
|
||
#### Version 2 Schema | ||
|
||
When retrieving stats from the local machine (no server specified): | ||
``` | ||
ipmi_sensor,unit=degrees_c,name=ambient_temp status=1i,value=20 1458488465012559455 | ||
ipmi_sensor,unit=feet,name=altitude status=1i,value=80 1458488465012688613 | ||
ipmi_sensor,unit=watts,name=avg_power status=1i,value=220 1458488465012776511 | ||
ipmi_sensor,unit=volts,name=planar_3.3v status=1i,value=3.28 1458488465012861875 | ||
ipmi_sensor,unit=volts,name=planar_vbat status=1i,value=3.04 1458488465013072508 | ||
ipmi_sensor,unit=rpm,name=fan_1a_tach status=1i,value=2610 1458488465013137932 | ||
ipmi_sensor,unit=rpm,name=fan_1b_tach status=1i,value=1775 1458488465013279896 | ||
ipmi_sensor,name=uid_light,entity_id=23.1,status_code=ok,status_desc=ok,host=foo.bar.com value=0 1517125474000000000 | ||
ipmi_sensor,host=foo.bar.com,name=sys._health_led,entity_id=23.2,status_code=ok,status_desc=ok value=0 1517125474000000000 | ||
ipmi_sensor,status_code=ok,unit=watts,status_desc=presence_detected,host=foo.bar.com,name=power_supply_1,entity_id=10.1 value=110 1517125474000000000 | ||
ipmi_sensor,host=foo.bar.com,name=power_supply_2,entity_id=10.2,status_code=ok,unit=watts,status_desc=presence_detected value=125 1517125474000000000 | ||
ipmi_sensor,name=power_supplies,entity_id=10.3,status_code=ok,status_desc=fully_redundant,host=foo.bar.com value=0 1517125474000000000 | ||
ipmi_sensor,unit=percent,status_desc=transition_to_running,host=foo.bar.com,name=fan_1,entity_id=7.1,status_code=ok value=43.12 1517125474000000000 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you regen this output with Telegraf 1.7, it should sort the tags. You can leave out the host tag since it is added everywhere. |
||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ package ipmi_sensor | |
import ( | ||
"fmt" | ||
"os/exec" | ||
"regexp" | ||
"strconv" | ||
"strings" | ||
"sync" | ||
|
@@ -17,11 +18,13 @@ var ( | |
execCommand = exec.Command // execCommand is used to mock commands in tests. | ||
) | ||
|
||
// Ipmi stores the configuration values for the ipmi_sensor input plugin | ||
type Ipmi struct { | ||
Path string | ||
Privilege string | ||
Servers []string | ||
Timeout internal.Duration | ||
Path string | ||
Privilege string | ||
Servers []string | ||
Timeout internal.Duration | ||
SchemaVersion int | ||
} | ||
|
||
var sampleConfig = ` | ||
|
@@ -46,16 +49,22 @@ var sampleConfig = ` | |
|
||
## Timeout for the ipmitool command to complete | ||
timeout = "20s" | ||
|
||
## Schema Version: (Optional, defaults to version 1) | ||
schemaVersion = 2 | ||
` | ||
|
||
// SampleConfig returns the documentation about the sample configuration | ||
func (m *Ipmi) SampleConfig() string { | ||
return sampleConfig | ||
} | ||
|
||
// Description returns a basic description for the plugin functions | ||
func (m *Ipmi) Description() string { | ||
return "Read metrics from the bare metal servers via IPMI" | ||
} | ||
|
||
// Gather is the main execution function for the plugin | ||
func (m *Ipmi) Gather(acc telegraf.Accumulator) error { | ||
if len(m.Path) == 0 { | ||
return fmt.Errorf("ipmitool not found: verify that ipmitool is installed and that ipmitool is in your PATH") | ||
|
@@ -93,23 +102,32 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error { | |
opts = conn.options() | ||
} | ||
opts = append(opts, "sdr") | ||
if m.SchemaVersion == 2 { | ||
opts = append(opts, "elist") | ||
} | ||
cmd := execCommand(m.Path, opts...) | ||
out, err := internal.CombinedOutputTimeout(cmd, m.Timeout.Duration) | ||
if err != nil { | ||
return fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), err, string(out)) | ||
} | ||
if m.SchemaVersion == 2 { | ||
return parseV2(acc, hostname, string(out)) | ||
} | ||
return parseV1(acc, hostname, string(out)) | ||
} | ||
|
||
func parseV1(acc telegraf.Accumulator, hostname string, cmdOut string) error { | ||
// each line will look something like | ||
// Planar VBAT | 3.05 Volts | ok | ||
lines := strings.Split(string(out), "\n") | ||
lines := strings.Split(cmdOut, "\n") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll bet this is was an existing bug in Windows due to only allowing CR line endings. What would be ideal is if we switched to bufio.Scanner here. |
||
for i := 0; i < len(lines); i++ { | ||
vals := strings.Split(lines[i], "|") | ||
if len(vals) != 3 { | ||
ipmiFields := extractFieldsFromRegex(`^(?P<name>[^|]*)\|(?P<description>[^|]*)\|(?P<status_code>.*)`, lines[i]) | ||
if len(ipmiFields) != 3 { | ||
continue | ||
} | ||
|
||
tags := map[string]string{ | ||
"name": transform(vals[0]), | ||
"name": transform(ipmiFields["name"]), | ||
} | ||
|
||
// tag the server is we have one | ||
|
@@ -118,18 +136,16 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error { | |
} | ||
|
||
fields := make(map[string]interface{}) | ||
if strings.EqualFold("ok", trim(vals[2])) { | ||
if strings.EqualFold("ok", trim(ipmiFields["status_code"])) { | ||
fields["status"] = 1 | ||
} else { | ||
fields["status"] = 0 | ||
} | ||
|
||
val1 := trim(vals[1]) | ||
|
||
if strings.Index(val1, " ") > 0 { | ||
if strings.Index(ipmiFields["description"], " ") > 0 { | ||
// split middle column into value and unit | ||
valunit := strings.SplitN(val1, " ", 2) | ||
fields["value"] = Atofloat(valunit[0]) | ||
valunit := strings.SplitN(ipmiFields["description"], " ", 2) | ||
fields["value"] = aToFloat(valunit[0]) | ||
if len(valunit) > 1 { | ||
tags["unit"] = transform(valunit[1]) | ||
} | ||
|
@@ -143,13 +159,76 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error { | |
return nil | ||
} | ||
|
||
func Atofloat(val string) float64 { | ||
func parseV2(acc telegraf.Accumulator, hostname string, cmdOut string) error { | ||
// each line will look something like | ||
// CMOS Battery | 65h | ok | 7.1 | | ||
// Temp | 0Eh | ok | 3.1 | 55 degrees C | ||
// Drive 0 | A0h | ok | 7.1 | Drive Present | ||
lines := strings.Split(cmdOut, "\n") | ||
for i := 0; i < len(lines); i++ { | ||
ipmiFields := extractFieldsFromRegex(`^(?P<name>[^|]*)\|[^|]+\|(?P<status_code>[^|]*)\|(?P<entity_id>[^|]*)\|(?:(?P<description>[^|]+))?`, lines[i]) | ||
if len(ipmiFields) < 3 || len(ipmiFields) > 4 { | ||
continue | ||
} | ||
|
||
tags := map[string]string{ | ||
"name": transform(ipmiFields["name"]), | ||
} | ||
|
||
// tag the server is we have one | ||
if hostname != "" { | ||
tags["server"] = hostname | ||
} | ||
tags["entity_id"] = transform(ipmiFields["entity_id"]) | ||
tags["status_code"] = trim(ipmiFields["status_code"]) | ||
fields := make(map[string]interface{}) | ||
descriptionResults := extractFieldsFromRegex(`^(?P<analogValue>[0-9.]+)\s(?P<analogUnit>.*)|(?P<status>.+)|^$`, trim(ipmiFields["description"])) | ||
// This is an analog value with a unit | ||
if descriptionResults["analogValue"] != "" && len(descriptionResults["analogUnit"]) >= 1 { | ||
fields["value"] = aToFloat(descriptionResults["analogValue"]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think if the value cannot be converted to a float then we should skip the line. |
||
// Some implementations add an extra status to their analog units | ||
unitResults := extractFieldsFromRegex(`^(?P<realAnalogUnit>[^,]+)(?:,\s*(?P<statusDesc>.*))?`, descriptionResults["analogUnit"]) | ||
tags["unit"] = transform(unitResults["realAnalogUnit"]) | ||
if unitResults["statusDesc"] != "" { | ||
tags["status_desc"] = transform(unitResults["statusDesc"]) | ||
} | ||
} else { | ||
// This is a status value | ||
fields["value"] = 0.0 | ||
// Extended status descriptions aren't required, in which case for consistency re-use the status code | ||
if descriptionResults["status"] != "" { | ||
tags["status_desc"] = transform(descriptionResults["status"]) | ||
} else { | ||
tags["status_desc"] = transform(ipmiFields["status_code"]) | ||
} | ||
} | ||
|
||
acc.AddFields("ipmi_sensor", fields, tags, time.Now()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Call time.Now() once before the loop, then pass the same time in for all metrics. Usually doesn't matter due to timestamp rounding but it is still a good idea. |
||
} | ||
|
||
return nil | ||
} | ||
|
||
// extractFieldsFromRegex consumes a regex with named capture groups and returns a kvp map of strings with the results | ||
func extractFieldsFromRegex(regex string, input string) map[string]string { | ||
re := regexp.MustCompile(regex) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should compile all the regex once at the package level, then reuse the compiled regex here. |
||
submatches := re.FindStringSubmatch(input) | ||
results := make(map[string]string) | ||
for i, name := range re.SubexpNames() { | ||
if name != input && name != "" && input != "" { | ||
results[name] = trim(submatches[i]) | ||
} | ||
} | ||
return results | ||
} | ||
|
||
// aToFloat converts string representations of numbers to float64 values | ||
func aToFloat(val string) float64 { | ||
f, err := strconv.ParseFloat(val, 64) | ||
if err != nil { | ||
return 0.0 | ||
} else { | ||
return f | ||
} | ||
return f | ||
} | ||
|
||
func trim(s string) string { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you change this to be
metric_version
, this will match the option I used in in themysql
plugin for similar functionality. Make sure to use snake_case too.