From 16f5c205081fa14cb247dae3b7a1319028c46c5c Mon Sep 17 00:00:00 2001 From: Max U Date: Fri, 27 Jul 2018 13:37:39 -0700 Subject: [PATCH] address some of greg+chris's comments. includes config for trimspace and comment --- docs/DATA_FORMATS_INPUT.md | 8 +++++ internal/config/config.go | 27 +++++++++++++++ plugins/parsers/csv/parser.go | 10 ++++++ plugins/parsers/csv/parser_test.go | 53 +++++++++++++++++++++++++++--- plugins/parsers/registry.go | 8 +++++ 5 files changed, 102 insertions(+), 4 deletions(-) diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md index cf23c5c1daee3..ac4157bac8dd2 100644 --- a/docs/DATA_FORMATS_INPUT.md +++ b/docs/DATA_FORMATS_INPUT.md @@ -810,6 +810,14 @@ or an error will be thrown. ## By default, the parser assumes a comma (",") # csv_delimiter = "," + ## The character reserved for marking a row as a comment row + ## Commented rows are skipped and not parsed + # csv_comment = "" + + ## If set to true, the parser will remove leading whitespace from fields + ## By default, this is false + # csv_trim_space = false + ## For assigning custom names to columns ## If this is specified, all columns must have a name ## ie there should be the same number of names listed diff --git a/internal/config/config.go b/internal/config/config.go index eb2d21bde3ae9..f1238b5cb9252 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -1444,6 +1444,14 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { } } + if node, ok := tbl.Fields["csv_comment"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if str, ok := kv.Value.(*ast.String); ok { + c.CSVComment = str.Value + } + } + } + if node, ok := tbl.Fields["csv_name_column"]; ok { if kv, ok := node.(*ast.KeyValue); ok { if str, ok := kv.Value.(*ast.String); ok { @@ -1487,6 +1495,25 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) { } } + if node, ok := tbl.Fields["csv_trim_space"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if str, ok := kv.Value.(*ast.Boolean); ok { + //for config with no quotes + val, _ := strconv.ParseBool(str.Value) + c.CSVTrimSpace = val + } else { + //for config with quotes + strVal := kv.Value.(*ast.String) + val, err := strconv.ParseBool(strVal.Value) + if err != nil { + log.Printf("E! parsing to bool: %v", err) + } else { + c.CSVTrimSpace = val + } + } + } + } + c.MetricName = name delete(tbl.Fields, "data_format") diff --git a/plugins/parsers/csv/parser.go b/plugins/parsers/csv/parser.go index 6d1dd10798a92..56c8eea8c3b3a 100644 --- a/plugins/parsers/csv/parser.go +++ b/plugins/parsers/csv/parser.go @@ -15,6 +15,8 @@ type CSVParser struct { MetricName string Header bool Delimiter string + Comment string + TrimSpace bool DataColumns []string TagColumns []string FieldColumns []string @@ -34,6 +36,14 @@ func (p *CSVParser) compile(r *bytes.Reader) (*csv.Reader, error) { } csvReader.Comma = runeStr[0] } + if p.Comment != "" { + runeStr := []rune(p.Comment) + if len(runeStr) > 1 { + return csvReader, fmt.Errorf("comment must be a single character, got: %s", p.Comment) + } + csvReader.Comment = runeStr[0] + } + csvReader.TrimLeadingSpace = p.TrimSpace return csvReader, nil } diff --git a/plugins/parsers/csv/parser_test.go b/plugins/parsers/csv/parser_test.go index 0762537c5e108..1de96be8d8d3b 100644 --- a/plugins/parsers/csv/parser_test.go +++ b/plugins/parsers/csv/parser_test.go @@ -140,9 +140,54 @@ func TestValueConversion(t *testing.T) { require.NoError(t, err2) //deep equal fields - for k := range goodMetric.Fields() { - log.Printf("expected field: %v, %T", goodMetric.Fields()[k], goodMetric.Fields()[k]) - log.Printf("returned field: %v, %T", returnedMetric.Fields()[k], returnedMetric.Fields()[k]) - } require.True(t, reflect.DeepEqual(goodMetric.Fields(), returnedMetric.Fields())) } + +func TestSkipComment(t *testing.T) { + p := CSVParser{ + Header: false, + Comment: "#", + DataColumns: []string{"first", "second", "third", "fourth"}, + FieldColumns: []string{"second", "first", "third", "fourth"}, + MetricName: "test_value", + } + testCSV := `#3.3,4,true,hello +4,9.9,true,name_this` + + expectedFields := map[string]interface{}{ + "first": int64(4), + "second": 9.9, + "third": true, + "fourth": "name_this", + } + + metrics, err := p.Parse([]byte(testCSV)) + require.NoError(t, err) + require.Equal(t, true, reflect.DeepEqual(expectedFields, metrics[0].Fields())) +} + +func TestTrimSpace(t *testing.T) { + p := CSVParser{ + Header: false, + TrimSpace: true, + DataColumns: []string{"first", "second", "third", "fourth"}, + FieldColumns: []string{"second", "first", "third", "fourth"}, + MetricName: "test_value", + } + testCSV := ` 3.3, 4, true,hello` + + expectedFields := map[string]interface{}{ + "first": 3.3, + "second": int64(4), + "third": true, + "fourth": "hello", + } + + metrics, err := p.Parse([]byte(testCSV)) + for k := range metrics[0].Fields() { + log.Printf("want: %v, %T", expectedFields[k], expectedFields[k]) + log.Printf("got: %v, %T", metrics[0].Fields()[k], metrics[0].Fields()[k]) + } + require.NoError(t, err) + require.Equal(t, true, reflect.DeepEqual(expectedFields, metrics[0].Fields())) +} diff --git a/plugins/parsers/registry.go b/plugins/parsers/registry.go index aa0c5495c4775..454990d531f2c 100644 --- a/plugins/parsers/registry.go +++ b/plugins/parsers/registry.go @@ -103,6 +103,8 @@ type Config struct { //csv configuration CSVHeader bool CSVDelimiter string + CSVComment string + CSVTrimSpace bool CSVDataColumns []string CSVTagColumns []string CSVFieldColumns []string @@ -154,6 +156,8 @@ func NewParser(config *Config) (Parser, error) { parser, err = newCSVParser(config.MetricName, config.CSVHeader, config.CSVDelimiter, + config.CSVComment, + config.CSVTrimSpace, config.CSVDataColumns, config.CSVTagColumns, config.CSVFieldColumns, @@ -170,6 +174,8 @@ func NewParser(config *Config) (Parser, error) { func newCSVParser(metricName string, header bool, delimiter string, + comment string, + trimSpace bool, dataColumns []string, tagColumns []string, fieldColumns []string, @@ -181,6 +187,8 @@ func newCSVParser(metricName string, MetricName: metricName, Header: header, Delimiter: delimiter, + Comment: comment, + TrimSpace: trimSpace, DataColumns: dataColumns, TagColumns: tagColumns, FieldColumns: fieldColumns,