From 90e524a7f06da7f52dc6573a411d0c541cf8f303 Mon Sep 17 00:00:00 2001 From: Alexander Shepelin Date: Tue, 27 Feb 2018 00:08:06 +0300 Subject: [PATCH 1/3] Add regex processor plugin --- plugins/processors/all/all.go | 1 + plugins/processors/regex/README.md | 46 +++++ plugins/processors/regex/regex.go | 105 +++++++++++ plugins/processors/regex/regex_test.go | 249 +++++++++++++++++++++++++ 4 files changed, 401 insertions(+) create mode 100644 plugins/processors/regex/README.md create mode 100644 plugins/processors/regex/regex.go create mode 100644 plugins/processors/regex/regex_test.go diff --git a/plugins/processors/all/all.go b/plugins/processors/all/all.go index 462298f6bbbd1..8c666bd742e79 100644 --- a/plugins/processors/all/all.go +++ b/plugins/processors/all/all.go @@ -2,4 +2,5 @@ package all import ( _ "github.com/influxdata/telegraf/plugins/processors/printer" + _ "github.com/influxdata/telegraf/plugins/processors/regex" ) diff --git a/plugins/processors/regex/README.md b/plugins/processors/regex/README.md new file mode 100644 index 0000000000000..c9eec037b5161 --- /dev/null +++ b/plugins/processors/regex/README.md @@ -0,0 +1,46 @@ +# Regex Processor Plugin + +The `regex` plugin transforms tag and field values with regex pattern. If `result_key` parameter is present, it can produce new tags and fields from existing ones. + +### Configuration: + +```toml +[[processors.regex]] + namepass = ["nginx_requests"] + + # Tag and field conversions defined in a separate sub-tables + [[processors.regex.tags]] + ## Tag to change + key = "resp_code" + ## Regular expression to match on a tag value + pattern = "^(\\d)\\d\\d$" + ## Pattern for constructing a new value (${1} represents first subgroup) + replacement = "${1}xx" + + [[processors.regex.fields]] + key = "request" + ## All the power of the Go regular expressions available here + ## For example, named subgroups + pattern = "^/api(?P/[\\w/]+)\\S*" + replacement = "${method}" + ## If result_key is present, a new field will be created + ## instead of changing existing field + result_key = "method" + + # Multiple conversions may be applied for one field sequentially + # Let's extract one more value + [[processors.regex.fields]] + key = "request" + pattern = ".*category=(\\w+).*" + replacement = "${1}" + result_key = "search_category" +``` + +### Tags: + +No tags are applied by this processor. + +### Example Output: +``` +nginx_requests,verb=GET,resp_code=2xx request="/api/search/?category=plugins&q=regex&sort=asc",method="/search/",search_category="plugins",referrer="-",ident="-",http_version=1.1,agent="UserAgent",client_ip="127.0.0.1",auth="-",resp_bytes=270i 1519652321000000000 +``` diff --git a/plugins/processors/regex/regex.go b/plugins/processors/regex/regex.go new file mode 100644 index 0000000000000..5420cf710a4eb --- /dev/null +++ b/plugins/processors/regex/regex.go @@ -0,0 +1,105 @@ +package regex + +import ( + "regexp" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/processors" +) + +type Regex struct { + Tags []converter + Fields []converter +} + +type converter struct { + Key string + Pattern string + Replacement string + ResultKey string +} + +const sampleConfig = ` + ## Tag and field conversions defined in a separate sub-tables + # [[processors.regex.tags]] + # ## Tag to change + # key = "resp_code" + # ## Regular expression to match on a tag value + # pattern = "^(\\d)\\d\\d$" + # ## Pattern for constructing a new value (${1} represents first subgroup) + # replacement = "${1}xx" + + # [[processors.regex.fields]] + # key = "request" + # ## All the power of the Go regular expressions available here + # ## For example, named subgroups + # pattern = "^/api(?P/[\\w/]+)\\S*" + # replacement = "${method}" + # ## If result_key is present, a new field will be created + # ## instead of changing existing field + # result_key = "method" + + ## Multiple conversions may be applied for one field sequentially + ## Let's extract one more value + # [[processors.regex.fields]] + # key = "request" + # pattern = ".*category=(\\w+).*" + # replacement = "${1}" + # result_key = "search_category" +` + +func (r *Regex) SampleConfig() string { + return sampleConfig +} + +func (r *Regex) Description() string { + return "Transforms tag and field values with regex pattern" +} + +func (r *Regex) Apply(in ...telegraf.Metric) []telegraf.Metric { + for _, metric := range in { + for _, converter := range r.Tags { + if value, ok := metric.Tags()[converter.Key]; ok { + metric.AddTag( + getKey(converter), + getValue(converter, value), + ) + } + } + + for _, converter := range r.Fields { + if fieldValue, ok := metric.Fields()[converter.Key]; ok { + switch fieldValue := fieldValue.(type) { + case string: + metric.AddField( + getKey(converter), + getValue(converter, fieldValue), + ) + } + } + } + } + + return in +} + +func getKey(c converter) string { + if c.ResultKey != "" { + return c.ResultKey + } + return c.Key +} + +func getValue(c converter, value string) string { + regex := regexp.MustCompile(c.Pattern) + if c.ResultKey != "" && !regex.MatchString(value) { + return "" + } + return regex.ReplaceAllString(value, c.Replacement) +} + +func init() { + processors.Add("regex", func() telegraf.Processor { + return &Regex{} + }) +} diff --git a/plugins/processors/regex/regex_test.go b/plugins/processors/regex/regex_test.go new file mode 100644 index 0000000000000..9ff70b35dc494 --- /dev/null +++ b/plugins/processors/regex/regex_test.go @@ -0,0 +1,249 @@ +package regex + +import ( + "testing" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/metric" + "github.com/stretchr/testify/assert" +) + +func newM1() telegraf.Metric { + m1, _ := metric.New("access_log", + map[string]string{ + "verb": "GET", + "resp_code": "200", + }, + map[string]interface{}{ + "request": "/users/42/", + }, + time.Now(), + ) + return m1 +} + +func newM2() telegraf.Metric { + m2, _ := metric.New("access_log", + map[string]string{ + "verb": "GET", + "resp_code": "200", + }, + map[string]interface{}{ + "request": "/api/search/?category=plugins&q=regex&sort=asc", + "ignore_number": int64(200), + "ignore_bool": true, + }, + time.Now(), + ) + return m2 +} + +func TestFieldConversions(t *testing.T) { + tests := []struct { + message string + converter converter + expectedFields map[string]interface{} + }{ + { + message: "Should change existing field", + converter: converter{ + Key: "request", + Pattern: "^/users/\\d+/$", + Replacement: "/users/{id}/", + }, + expectedFields: map[string]interface{}{ + "request": "/users/{id}/", + }, + }, + { + message: "Should add new field", + converter: converter{ + Key: "request", + Pattern: "^/users/\\d+/$", + Replacement: "/users/{id}/", + ResultKey: "normalized_request", + }, + expectedFields: map[string]interface{}{ + "request": "/users/42/", + "normalized_request": "/users/{id}/", + }, + }, + } + + for _, test := range tests { + regex := &Regex{} + regex.Fields = []converter{ + test.converter, + } + + processed := regex.Apply(newM1()) + + expectedTags := map[string]string{ + "verb": "GET", + "resp_code": "200", + } + + assert.Equal(t, test.expectedFields, processed[0].Fields(), test.message) + assert.Equal(t, expectedTags, processed[0].Tags(), "Should not change tags") + assert.Equal(t, "access_log", processed[0].Name(), "Should not change name") + } +} + +func TestTagConversions(t *testing.T) { + tests := []struct { + message string + converter converter + expectedTags map[string]string + }{ + { + message: "Should change existing tag", + converter: converter{ + Key: "resp_code", + Pattern: "^(\\d)\\d\\d$", + Replacement: "${1}xx", + }, + expectedTags: map[string]string{ + "verb": "GET", + "resp_code": "2xx", + }, + }, + { + message: "Should add new tag", + converter: converter{ + Key: "resp_code", + Pattern: "^(\\d)\\d\\d$", + Replacement: "${1}xx", + ResultKey: "resp_code_group", + }, + expectedTags: map[string]string{ + "verb": "GET", + "resp_code": "200", + "resp_code_group": "2xx", + }, + }, + } + + for _, test := range tests { + regex := &Regex{} + regex.Tags = []converter{ + test.converter, + } + + processed := regex.Apply(newM1()) + + expectedFields := map[string]interface{}{ + "request": "/users/42/", + } + + assert.Equal(t, expectedFields, processed[0].Fields(), test.message, "Should not change fields") + assert.Equal(t, test.expectedTags, processed[0].Tags(), test.message) + assert.Equal(t, "access_log", processed[0].Name(), "Should not change name") + } +} + +func TestMultipleConversions(t *testing.T) { + regex := &Regex{} + regex.Tags = []converter{ + { + Key: "resp_code", + Pattern: "^(\\d)\\d\\d$", + Replacement: "${1}xx", + ResultKey: "resp_code_group", + }, + { + Key: "resp_code_group", + Pattern: "2xx", + Replacement: "OK", + ResultKey: "resp_code_text", + }, + } + regex.Fields = []converter{ + { + Key: "request", + Pattern: "^/api(?P/[\\w/]+)\\S*", + Replacement: "${method}", + ResultKey: "method", + }, + { + Key: "request", + Pattern: ".*category=(\\w+).*", + Replacement: "${1}", + ResultKey: "search_category", + }, + } + + processed := regex.Apply(newM2()) + + expectedFields := map[string]interface{}{ + "request": "/api/search/?category=plugins&q=regex&sort=asc", + "method": "/search/", + "search_category": "plugins", + "ignore_number": int64(200), + "ignore_bool": true, + } + expectedTags := map[string]string{ + "verb": "GET", + "resp_code": "200", + "resp_code_group": "2xx", + "resp_code_text": "OK", + } + + assert.Equal(t, expectedFields, processed[0].Fields()) + assert.Equal(t, expectedTags, processed[0].Tags()) +} + +func TestNoMatches(t *testing.T) { + tests := []struct { + message string + converter converter + expectedFields map[string]interface{} + }{ + { + message: "Should not change anything if there is no field with given key", + converter: converter{ + Key: "not_exists", + Pattern: "\\.*", + Replacement: "x", + }, + expectedFields: map[string]interface{}{ + "request": "/users/42/", + }, + }, + { + message: "Should not change anything if regex doesn't match", + converter: converter{ + Key: "request", + Pattern: "not_match", + Replacement: "x", + }, + expectedFields: map[string]interface{}{ + "request": "/users/42/", + }, + }, + { + message: "Should emit empty string when result_key given but regex doesn't match", + converter: converter{ + Key: "request", + Pattern: "not_match", + Replacement: "x", + ResultKey: "new_field", + }, + expectedFields: map[string]interface{}{ + "request": "/users/42/", + "new_field": "", + }, + }, + } + + for _, test := range tests { + regex := &Regex{} + regex.Fields = []converter{ + test.converter, + } + + processed := regex.Apply(newM1()) + + assert.Equal(t, test.expectedFields, processed[0].Fields(), test.message) + } +} From 0978bb2371fbe47ad6e5f4a57e1f75cefc5f1726 Mon Sep 17 00:00:00 2001 From: Alexander Shepelin Date: Tue, 27 Feb 2018 09:21:50 +0300 Subject: [PATCH 2/3] Rename var --- plugins/processors/regex/regex.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/processors/regex/regex.go b/plugins/processors/regex/regex.go index 5420cf710a4eb..32afbe49d12c1 100644 --- a/plugins/processors/regex/regex.go +++ b/plugins/processors/regex/regex.go @@ -68,12 +68,12 @@ func (r *Regex) Apply(in ...telegraf.Metric) []telegraf.Metric { } for _, converter := range r.Fields { - if fieldValue, ok := metric.Fields()[converter.Key]; ok { - switch fieldValue := fieldValue.(type) { + if value, ok := metric.Fields()[converter.Key]; ok { + switch value := value.(type) { case string: metric.AddField( getKey(converter), - getValue(converter, fieldValue), + getValue(converter, value), ) } } From 143e3adcd131acd9f1cce31e1ba1f1bb4e1300fa Mon Sep 17 00:00:00 2001 From: Alexander Shepelin Date: Sun, 20 May 2018 19:33:46 +0300 Subject: [PATCH 3/3] Compile regexp once, use GetTag/GetField to reduce allocations --- plugins/processors/regex/regex.go | 51 ++++++++++++++------------ plugins/processors/regex/regex_test.go | 32 ++++++++++++++-- 2 files changed, 56 insertions(+), 27 deletions(-) diff --git a/plugins/processors/regex/regex.go b/plugins/processors/regex/regex.go index 32afbe49d12c1..3282406c8599b 100644 --- a/plugins/processors/regex/regex.go +++ b/plugins/processors/regex/regex.go @@ -8,8 +8,9 @@ import ( ) type Regex struct { - Tags []converter - Fields []converter + Tags []converter + Fields []converter + regexCache map[string]*regexp.Regexp } type converter struct { @@ -48,6 +49,12 @@ const sampleConfig = ` # result_key = "search_category" ` +func NewRegex() *Regex { + return &Regex{ + regexCache: make(map[string]*regexp.Regexp), + } +} + func (r *Regex) SampleConfig() string { return sampleConfig } @@ -59,22 +66,16 @@ func (r *Regex) Description() string { func (r *Regex) Apply(in ...telegraf.Metric) []telegraf.Metric { for _, metric := range in { for _, converter := range r.Tags { - if value, ok := metric.Tags()[converter.Key]; ok { - metric.AddTag( - getKey(converter), - getValue(converter, value), - ) + if value, ok := metric.GetTag(converter.Key); ok { + metric.AddTag(r.convert(converter, value)) } } for _, converter := range r.Fields { - if value, ok := metric.Fields()[converter.Key]; ok { + if value, ok := metric.GetField(converter.Key); ok { switch value := value.(type) { case string: - metric.AddField( - getKey(converter), - getValue(converter, value), - ) + metric.AddField(r.convert(converter, value)) } } } @@ -83,23 +84,27 @@ func (r *Regex) Apply(in ...telegraf.Metric) []telegraf.Metric { return in } -func getKey(c converter) string { - if c.ResultKey != "" { - return c.ResultKey +func (r *Regex) convert(c converter, src string) (string, string) { + regex, compiled := r.regexCache[c.Pattern] + if !compiled { + regex = regexp.MustCompile(c.Pattern) + r.regexCache[c.Pattern] = regex } - return c.Key -} -func getValue(c converter, value string) string { - regex := regexp.MustCompile(c.Pattern) - if c.ResultKey != "" && !regex.MatchString(value) { - return "" + value := "" + if c.ResultKey == "" || regex.MatchString(src) { + value = regex.ReplaceAllString(src, c.Replacement) } - return regex.ReplaceAllString(value, c.Replacement) + + if c.ResultKey != "" { + return c.ResultKey, value + } + + return c.Key, value } func init() { processors.Add("regex", func() telegraf.Processor { - return &Regex{} + return NewRegex() }) } diff --git a/plugins/processors/regex/regex_test.go b/plugins/processors/regex/regex_test.go index 9ff70b35dc494..e7c15e5aaef52 100644 --- a/plugins/processors/regex/regex_test.go +++ b/plugins/processors/regex/regex_test.go @@ -72,7 +72,7 @@ func TestFieldConversions(t *testing.T) { } for _, test := range tests { - regex := &Regex{} + regex := NewRegex() regex.Fields = []converter{ test.converter, } @@ -125,7 +125,7 @@ func TestTagConversions(t *testing.T) { } for _, test := range tests { - regex := &Regex{} + regex := NewRegex() regex.Tags = []converter{ test.converter, } @@ -143,7 +143,7 @@ func TestTagConversions(t *testing.T) { } func TestMultipleConversions(t *testing.T) { - regex := &Regex{} + regex := NewRegex() regex.Tags = []converter{ { Key: "resp_code", @@ -237,7 +237,7 @@ func TestNoMatches(t *testing.T) { } for _, test := range tests { - regex := &Regex{} + regex := NewRegex() regex.Fields = []converter{ test.converter, } @@ -247,3 +247,27 @@ func TestNoMatches(t *testing.T) { assert.Equal(t, test.expectedFields, processed[0].Fields(), test.message) } } + +func BenchmarkConversions(b *testing.B) { + regex := NewRegex() + regex.Tags = []converter{ + { + Key: "resp_code", + Pattern: "^(\\d)\\d\\d$", + Replacement: "${1}xx", + ResultKey: "resp_code_group", + }, + } + regex.Fields = []converter{ + { + Key: "request", + Pattern: "^/users/\\d+/$", + Replacement: "/users/{id}/", + }, + } + + for n := 0; n < b.N; n++ { + processed := regex.Apply(newM1()) + _ = processed + } +}