diff --git a/autocomplete/autocomplete.go b/autocomplete/autocomplete.go index 313467ba6..19fa449ca 100644 --- a/autocomplete/autocomplete.go +++ b/autocomplete/autocomplete.go @@ -73,7 +73,7 @@ func (h *Handler) requestExpr(r *http.Request) (*where.Where, *where.Where, map[ return wr, pw, usedTags, nil } - terms, err := finder.ParseTaggedConditions(expr) + terms, err := finder.ParseTaggedConditions(expr, h.config.ClickHouse.TaggedCosts) if err != nil { return wr, pw, usedTags, err } diff --git a/config/config.go b/config/config.go index 7ae6e058a..f9c11b360 100644 --- a/config/config.go +++ b/config/config.go @@ -43,6 +43,11 @@ type IndexReverseRule struct { Reverse string `toml:"reverse" json:"reverse" comment:"same as index-reverse"` } +type Costs struct { + Cost int `toml:"cost" json:"cost" comment:"default cost (for wildcarded equalence or matched with regex, or if no value cost set)"` + ValuesCost map[string]int `toml:"values-cost" json:"values-cost" comment:"cost with some value (for equalence without wildcards) (additional tuning, usually not needed)"` +} + // IndexReverses is a slise of ptrs to IndexReverseRule type IndexReverses []*IndexReverseRule @@ -64,23 +69,24 @@ var IndexReverseNames = []string{"auto", "direct", "reversed"} // ClickHouse config type ClickHouse struct { - URL string `toml:"url" json:"url" comment:"see https://clickhouse.tech/docs/en/interfaces/http"` - DataTimeout time.Duration `toml:"data-timeout" json:"data-timeout" comment:"total timeout to fetch data"` - IndexTable string `toml:"index-table" json:"index-table" comment:"see doc/index-table.md"` - IndexUseDaily bool `toml:"index-use-daily" json:"index-use-daily"` - IndexReverse string `toml:"index-reverse" json:"index-reverse" comment:"see doc/config.md"` - IndexReverses IndexReverses `toml:"index-reverses" json:"index-reverses" comment:"see doc/config.md" commented:"true"` - IndexTimeout time.Duration `toml:"index-timeout" json:"index-timeout" comment:"total timeout to fetch series list from index"` - TaggedTable string `toml:"tagged-table" json:"tagged-table" comment:"'tagged' table from carbon-clickhouse, required for seriesByTag"` - TaggedAutocompleDays int `toml:"tagged-autocomplete-days" json:"tagged-autocomplete-days" comment:"or how long the daemon will query tags during autocomplete"` - TreeTable string `toml:"tree-table" json:"tree-table" comment:"old index table, DEPRECATED, see description in doc/config.md" commented:"true"` - ReverseTreeTable string `toml:"reverse-tree-table" json:"reverse-tree-table" commented:"true"` - DateTreeTable string `toml:"date-tree-table" json:"date-tree-table" commented:"true"` - DateTreeTableVersion int `toml:"date-tree-table-version" json:"date-tree-table-version" commented:"true"` - TreeTimeout time.Duration `toml:"tree-timeout" json:"tree-timeout" commented:"true"` - TagTable string `toml:"tag-table" json:"tag-table" comment:"is not recommended to use, https://github.com/lomik/graphite-clickhouse/wiki/TagsRU" commented:"true"` - ExtraPrefix string `toml:"extra-prefix" json:"extra-prefix" comment:"add extra prefix (directory in graphite) for all metrics, w/o trailing dot"` - ConnectTimeout time.Duration `toml:"connect-timeout" json:"connect-timeout" comment:"TCP connection timeout"` + URL string `toml:"url" json:"url" comment:"see https://clickhouse.tech/docs/en/interfaces/http"` + DataTimeout time.Duration `toml:"data-timeout" json:"data-timeout" comment:"total timeout to fetch data"` + IndexTable string `toml:"index-table" json:"index-table" comment:"see doc/index-table.md"` + IndexUseDaily bool `toml:"index-use-daily" json:"index-use-daily"` + IndexReverse string `toml:"index-reverse" json:"index-reverse" comment:"see doc/config.md"` + IndexReverses IndexReverses `toml:"index-reverses" json:"index-reverses" comment:"see doc/config.md" commented:"true"` + IndexTimeout time.Duration `toml:"index-timeout" json:"index-timeout" comment:"total timeout to fetch series list from index"` + TaggedTable string `toml:"tagged-table" json:"tagged-table" comment:"'tagged' table from carbon-clickhouse, required for seriesByTag"` + TaggedAutocompleDays int `toml:"tagged-autocomplete-days" json:"tagged-autocomplete-days" comment:"or how long the daemon will query tags during autocomplete"` + TaggedCosts map[string]*Costs `toml:"tagged-costs" json:"tagged-costs" commented:"true" comment:"costs for tags (for tune which tag will be used as primary), by default is 0, increase for costly (with poor selectivity) tags"` + TreeTable string `toml:"tree-table" json:"tree-table" comment:"old index table, DEPRECATED, see description in doc/config.md" commented:"true"` + ReverseTreeTable string `toml:"reverse-tree-table" json:"reverse-tree-table" commented:"true"` + DateTreeTable string `toml:"date-tree-table" json:"date-tree-table" commented:"true"` + DateTreeTableVersion int `toml:"date-tree-table-version" json:"date-tree-table-version" commented:"true"` + TreeTimeout time.Duration `toml:"tree-timeout" json:"tree-timeout" commented:"true"` + TagTable string `toml:"tag-table" json:"tag-table" comment:"is not recommended to use, https://github.com/lomik/graphite-clickhouse/wiki/TagsRU" commented:"true"` + ExtraPrefix string `toml:"extra-prefix" json:"extra-prefix" comment:"add extra prefix (directory in graphite) for all metrics, w/o trailing dot"` + ConnectTimeout time.Duration `toml:"connect-timeout" json:"connect-timeout" comment:"TCP connection timeout"` // TODO: remove in v0.14 DataTableLegacy string `toml:"data-table" json:"data-table" comment:"will be removed in 0.14" commented:"true"` // TODO: remove in v0.14 diff --git a/deploy/doc/config.md b/deploy/doc/config.md index d6b065020..62cd0dbb3 100644 --- a/deploy/doc/config.md +++ b/deploy/doc/config.md @@ -63,6 +63,20 @@ When `reverse = true` is set for data-table, there are two possibles cases for [ Depends on it for having a proper retention and aggregation you must additionally set `rollup-use-reverted = true` for the first case and `rollup-use-reverted = false` for the second. +#### Additional tuning tagged find for seriesByTag and autocomplete +Only one tag used as filter for index field Tag1, see graphite_tagged table [structure](https://github.com/lomik/carbon-clickhouse#clickhouse-configuration) + +So, if the first tag in filter is costly (poor selectivity), like environment (with several possible values), query perfomance will be degraded. +Tune this with `tagged-costs` options: + +` +tagged-costs = { + "environment" = { cost: 100 }, + "project" = { values-cost = { "HugeProject" = 90 } } # overwrite tag value cost for some value only +}` + +Default cost is 0 and positive or negative numbers can be used. So if environment is first tag filter in query, it will used as primary only if no other filters with equal operation. Costs from values-cost also applied to regex match or wilrdcarded equal. + ## Carbonlink `[carbonlink]` The configuration to get metrics from carbon-cache. See details in [graphite-web](https://graphite.readthedocs.io/en/latest/carbon-daemons.html#carbon-relay-py) documentation. diff --git a/doc/config.md b/doc/config.md index 5289ed72e..d632a964f 100644 --- a/doc/config.md +++ b/doc/config.md @@ -66,6 +66,20 @@ When `reverse = true` is set for data-table, there are two possibles cases for [ Depends on it for having a proper retention and aggregation you must additionally set `rollup-use-reverted = true` for the first case and `rollup-use-reverted = false` for the second. +#### Additional tuning tagged find for seriesByTag and autocomplete +Only one tag used as filter for index field Tag1, see graphite_tagged table [structure](https://github.com/lomik/carbon-clickhouse#clickhouse-configuration) + +So, if the first tag in filter is costly (poor selectivity), like environment (with several possible values), query perfomance will be degraded. +Tune this with `tagged-costs` options: + +` +tagged-costs = { + "environment" = { cost: 100 }, + "project" = { values-cost = { "HugeProject" = 90 } } # overwrite tag value cost for some value only +}` + +Default cost is 0 and positive or negative numbers can be used. So if environment is first tag filter in query, it will used as primary only if no other filters with equal operation. Costs from values-cost also applied to regex match or wilrdcarded equal. + ## Carbonlink `[carbonlink]` The configuration to get metrics from carbon-cache. See details in [graphite-web](https://graphite.readthedocs.io/en/latest/carbon-daemons.html#carbon-relay-py) documentation. @@ -131,6 +145,9 @@ It's possible to set multiple loggers. See `Config` description in [config.go](h tagged-table = "graphite_tagged" # or how long the daemon will query tags during autocomplete tagged-autocomplete-days = 7 + + # costs for tags (for tune which tag will be used as primary), by default is 0, increase for costly (with poor selectivity) tags + # [clickhouse.tagged-costs] # old index table, DEPRECATED, see description in doc/config.md # tree-table = "" # reverse-tree-table = "" diff --git a/finder/finder.go b/finder/finder.go index afefa76e6..bec527e1b 100644 --- a/finder/finder.go +++ b/finder/finder.go @@ -29,7 +29,7 @@ func newPlainFinder(ctx context.Context, config *config.Config, query string, fr var f Finder if config.ClickHouse.TaggedTable != "" && strings.HasPrefix(strings.TrimSpace(query), "seriesByTag") { - f = NewTagged(config.ClickHouse.URL, config.ClickHouse.TaggedTable, false, opts) + f = NewTagged(config.ClickHouse.URL, config.ClickHouse.TaggedTable, false, opts, config.ClickHouse.TaggedCosts) if len(config.Common.Blacklist) > 0 { f = WrapBlacklist(f, config.Common.Blacklist) @@ -112,7 +112,7 @@ func FindTagged(config *config.Config, ctx context.Context, terms []TaggedTerm, return Result(plain), nil } - fnd := NewTagged(config.ClickHouse.URL, config.ClickHouse.TaggedTable, true, opts) + fnd := NewTagged(config.ClickHouse.URL, config.ClickHouse.TaggedTable, true, opts, config.ClickHouse.TaggedCosts) err := fnd.ExecutePrepared(ctx, terms, from, until) if err != nil { diff --git a/finder/tagged.go b/finder/tagged.go index f6206310e..9c5d8c230 100644 --- a/finder/tagged.go +++ b/finder/tagged.go @@ -10,6 +10,7 @@ import ( "time" "github.com/go-graphite/carbonapi/pkg/parser" + "github.com/lomik/graphite-clickhouse/config" "github.com/lomik/graphite-clickhouse/helper/clickhouse" "github.com/lomik/graphite-clickhouse/pkg/scope" "github.com/lomik/graphite-clickhouse/pkg/where" @@ -29,6 +30,10 @@ type TaggedTerm struct { Op TaggedTermOp Value string HasWildcard bool // only for TaggedTermEq + + Cost int // tag cost for use ad primary filter (use tag with maximal selectivity). 0 by default, minimal is better. + // __name__ tag is prefered, if some tag has better selectivity than name, set it cost to < 0 + // values with wildcards or regex matching also has lower priority, set if needed it cost to < 0 } type TaggedTermList []TaggedTerm @@ -59,19 +64,22 @@ func (s TaggedTermList) Less(i, j int) bool { } type TaggedFinder struct { - url string // clickhouse dsn - table string // graphite_tag table - absKeepEncoded bool // Abs returns url encoded value. For queries from prometheus - opts clickhouse.Options // clickhouse query timeout - body []byte // clickhouse response + url string // clickhouse dsn + table string // graphite_tag table + absKeepEncoded bool // Abs returns url encoded value. For queries from prometheus + opts clickhouse.Options // clickhouse query timeout + taggedCosts map[string]*config.Costs // costs for taggs (sor tune index search) + + body []byte // clickhouse response } -func NewTagged(url string, table string, absKeepEncoded bool, opts clickhouse.Options) *TaggedFinder { +func NewTagged(url string, table string, absKeepEncoded bool, opts clickhouse.Options, taggedCosts map[string]*config.Costs) *TaggedFinder { return &TaggedFinder{ url: url, table: table, absKeepEncoded: absKeepEncoded, opts: opts, + taggedCosts: taggedCosts, } } @@ -180,7 +188,35 @@ func TaggedTermWhereN(term *TaggedTerm) (string, error) { } } -func ParseTaggedConditions(conditions []string) ([]TaggedTerm, error) { +func setCost(term *TaggedTerm, costs *config.Costs) { + if len(costs.ValuesCost) > 0 { + if cost, ok := costs.ValuesCost[term.Value]; ok { + term.Cost = cost + return + } + } + if term.Op == TaggedTermEq && !term.HasWildcard { + term.Cost = costs.Cost // only for non-wildcared eq + } +} + +func lessCosts(terms []TaggedTerm, i, j int) (bool, bool) { + if terms[i].Cost != terms[j].Cost { + if terms[i].Cost == 0 && (terms[i].Op != TaggedTermEq || terms[i].HasWildcard) { + return false, false + } + if terms[j].Cost == 0 && (terms[j].Op != TaggedTermEq || terms[j].HasWildcard) { + return false, false + } + + // compare taggs costs + return terms[i].Cost < terms[j].Cost, true + } + + return false, false +} + +func ParseTaggedConditions(conditions []string, taggedCosts map[string]*config.Costs) ([]TaggedTerm, error) { terms := make([]TaggedTerm, len(conditions)) for i := 0; i < len(conditions); i++ { @@ -226,14 +262,54 @@ func ParseTaggedConditions(conditions []string) ([]TaggedTerm, error) { default: return nil, fmt.Errorf("wrong seriesByTag expr: %#v", s) } + if len(taggedCosts) > 0 { + if costs, ok := taggedCosts[terms[i].Key]; ok { + setCost(&terms[i], costs) + } + } } - sort.Sort(TaggedTermList(terms)) + if len(taggedCosts) == 0 { + sort.Sort(TaggedTermList(terms)) + } else { + // compare with taggs costs + sort.Slice(terms, func(i, j int) bool { + eq, comparable := lessCosts(terms, i, j) + if comparable { + return eq + } + + if terms[i].Op < terms[j].Op { + return true + } + if terms[i].Op > terms[j].Op { + return false + } + + if terms[i].Op == TaggedTermEq && !terms[i].HasWildcard && terms[j].HasWildcard { + // globs as fist eq might be have a bad perfomance + return true + } + + if terms[i].Key == "__name__" && terms[j].Key != "__name__" { + return true + } + + if (terms[i].Cost >= 0 || terms[j].Cost >= 0) && terms[i].HasWildcard == terms[j].HasWildcard { + // compare taggs costs + if terms[i].Cost < terms[j].Cost { + return true + } + } + + return false + }) + } return terms, nil } -func ParseSeriesByTag(query string) ([]TaggedTerm, error) { +func ParseSeriesByTag(query string, tagCosts map[string]*config.Costs) ([]TaggedTerm, error) { expr, _, err := parser.ParseExpr(query) if err != nil { return nil, err @@ -269,7 +345,7 @@ func ParseSeriesByTag(query string) ([]TaggedTerm, error) { conditions = append(conditions, s) } - return ParseTaggedConditions(conditions) + return ParseTaggedConditions(conditions, tagCosts) } func TaggedWhere(terms []TaggedTerm) (*where.Where, *where.Where, error) { @@ -296,7 +372,7 @@ func TaggedWhere(terms []TaggedTerm) (*where.Where, *where.Where, error) { } func (t *TaggedFinder) Execute(ctx context.Context, query string, from int64, until int64) error { - terms, err := ParseSeriesByTag(query) + terms, err := ParseSeriesByTag(query, t.taggedCosts) if err != nil { return err } diff --git a/finder/tagged_test.go b/finder/tagged_test.go index 034236e8c..34a9b76b1 100644 --- a/finder/tagged_test.go +++ b/finder/tagged_test.go @@ -4,6 +4,7 @@ import ( "fmt" "testing" + "github.com/lomik/graphite-clickhouse/config" "github.com/stretchr/testify/assert" ) @@ -50,7 +51,7 @@ func TestTaggedWhere(t *testing.T) { for _, test := range table { testName := fmt.Sprintf("query: %#v", test.query) - terms, err := ParseSeriesByTag(test.query) + terms, err := ParseSeriesByTag(test.query, nil) if !test.isErr { assert.NoError(err, testName+", err") @@ -74,34 +75,139 @@ func TestParseSeriesByTag(t *testing.T) { assert := assert.New(t) ok := func(query string, expected []TaggedTerm) { - p, err := ParseSeriesByTag(query) + p, err := ParseSeriesByTag(query, nil) assert.NoError(err) - assert.Equal(expected, p) + length := len(expected) + if length < len(p) { + length = len(p) + } + for i := 0; i < length; i++ { + if i >= len(p) { + t.Errorf("%s\n- [%d]=%+v", query, i, expected[i]) + } else if i >= len(expected) { + t.Errorf("%s\n+ [%d]=%+v", query, i, p[i]) + } else if p[i] != expected[i] { + t.Errorf("%s\n- [%d]=%+v\n+ [%d]=%+v", query, i, expected[i], i, p[i]) + } + } } ok(`seriesByTag('key=value')`, []TaggedTerm{ - TaggedTerm{Op: TaggedTermEq, Key: "key", Value: "value"}, + {Op: TaggedTermEq, Key: "key", Value: "value"}, }) ok(`seriesByTag('name=rps')`, []TaggedTerm{ - TaggedTerm{Op: TaggedTermEq, Key: "__name__", Value: "rps"}, + {Op: TaggedTermEq, Key: "__name__", Value: "rps"}, }) ok(`seriesByTag('name=~cpu.usage')`, []TaggedTerm{ - TaggedTerm{Op: TaggedTermMatch, Key: "__name__", Value: "cpu.usage"}, + {Op: TaggedTermMatch, Key: "__name__", Value: "cpu.usage"}, }) ok(`seriesByTag('name!=cpu.usage')`, []TaggedTerm{ - TaggedTerm{Op: TaggedTermNe, Key: "__name__", Value: "cpu.usage"}, + {Op: TaggedTermNe, Key: "__name__", Value: "cpu.usage"}, }) ok(`seriesByTag('name!=~cpu.usage')`, []TaggedTerm{ - TaggedTerm{Op: TaggedTermNotMatch, Key: "__name__", Value: "cpu.usage"}, + {Op: TaggedTermNotMatch, Key: "__name__", Value: "cpu.usage"}, }) ok(`seriesByTag('cpu=cpu-total','host=~Vladimirs-MacBook-Pro\.local')`, []TaggedTerm{ - TaggedTerm{Op: TaggedTermEq, Key: "cpu", Value: "cpu-total"}, - TaggedTerm{Op: TaggedTermMatch, Key: "host", Value: `Vladimirs-MacBook-Pro\.local`}, + {Op: TaggedTermEq, Key: "cpu", Value: "cpu-total"}, + {Op: TaggedTermMatch, Key: "host", Value: `Vladimirs-MacBook-Pro\.local`}, }) } + +func TestParseSeriesByTagWithCosts(t *testing.T) { + assert := assert.New(t) + + taggedCosts := map[string]*config.Costs{ + "environment": {Cost: 100}, + "dc": {Cost: 60}, + "project": {Cost: 50}, + "__name__": {Cost: 0, ValuesCost: map[string]int{"high_cost": 70}}, + "key": {ValuesCost: map[string]int{"value2": 70, "value3": -1, "val*4": -1, "^val.*4$": -1}}, + } + + ok := func(query string, expected []TaggedTerm) { + p, err := ParseSeriesByTag(query, taggedCosts) + assert.NoError(err) + length := len(expected) + if length < len(p) { + length = len(p) + } + for i := 0; i < length; i++ { + if i >= len(p) { + t.Errorf("%s\n- [%d]=%+v", query, i, expected[i]) + } else if i >= len(expected) { + t.Errorf("%s\n+ [%d]=%+v", query, i, p[i]) + } else if p[i] != expected[i] { + t.Errorf("%s\n- [%d]=%+v\n+ [%d]=%+v", query, i, expected[i], i, p[i]) + } + } + } + + ok(`seriesByTag('environment=production', 'dc=west', 'key=value')`, []TaggedTerm{ + {Op: TaggedTermEq, Key: "key", Value: "value"}, + {Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 60}, + {Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100}, + }) + + // Check for values cost (key=value2) + ok(`seriesByTag('environment=production', 'dc=west', 'key=value2')`, []TaggedTerm{ + {Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 60}, + {Op: TaggedTermEq, Key: "key", Value: "value2", Cost: 70}, + {Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100}, + }) + + // Check for __name_ preference + ok(`seriesByTag('environment=production', 'dc=west', 'key=value', 'name=cpu.load_avg')`, []TaggedTerm{ + {Op: TaggedTermEq, Key: "__name__", Value: "cpu.load_avg"}, + {Op: TaggedTermEq, Key: "key", Value: "value"}, + {Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 60}, + {Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100}, + }) + + // Check for __name_ preference overrided + ok(`seriesByTag('environment=production', 'dc=west', 'name=cpu.load_avg', 'key=value3')`, []TaggedTerm{ + {Op: TaggedTermEq, Key: "key", Value: "value3", Cost: -1}, + {Op: TaggedTermEq, Key: "__name__", Value: "cpu.load_avg"}, + {Op: TaggedTermEq, Key: "dc", Value: "west", Cost: 60}, + {Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100}, + }) + + // wildcard (dc=west*) + ok(`seriesByTag('environment=production', 'dc=west*', 'name=cpu.load_avg', 'key=value3')`, []TaggedTerm{ + {Op: TaggedTermEq, Key: "key", Value: "value3", Cost: -1}, + {Op: TaggedTermEq, Key: "__name__", Value: "cpu.load_avg"}, + {Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100}, + {Op: TaggedTermEq, Key: "dc", Value: "west*", HasWildcard: true}, + }) + + // wildcard cost -1 + ok(`seriesByTag('dc=west*', 'environment=production', 'name=cpu.load_avg', 'key=val*4')`, []TaggedTerm{ + {Op: TaggedTermEq, Key: "key", Value: "val*4", Cost: -1, HasWildcard: true}, + {Op: TaggedTermEq, Key: "__name__", Value: "cpu.load_avg"}, + {Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100}, + {Op: TaggedTermEq, Key: "dc", Value: "west*", HasWildcard: true}, + }) + + // match cost -1 - not as wildcard + ok(`seriesByTag('dc=~west.*', 'environment=production', 'name=cpu.load_avg', 'key=~^val.*4$')`, []TaggedTerm{ + {Op: TaggedTermMatch, Key: "key", Value: "^val.*4$", Cost: -1}, + {Op: TaggedTermEq, Key: "__name__", Value: "cpu.load_avg"}, + {Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100}, + {Op: TaggedTermMatch, Key: "dc", Value: "west.*"}, + }) + + // reduce cost for __name__ + ok(`seriesByTag('dc=~west.*', 'environment=production', 'name=high_cost', 'key=~^val.*4$', 'key2=~^val.*4$', 'key3=val.*4')`, []TaggedTerm{ + {Op: TaggedTermMatch, Key: "key", Value: "^val.*4$", Cost: -1}, + {Op: TaggedTermEq, Key: "__name__", Value: "high_cost", Cost: 70}, + {Op: TaggedTermEq, Key: "environment", Value: "production", Cost: 100}, + {Op: TaggedTermEq, Key: "key3", Value: "val.*4", HasWildcard: true}, + {Op: TaggedTermMatch, Key: "dc", Value: "west.*"}, + {Op: TaggedTermMatch, Key: "key2", Value: "^val.*4$"}, + }) +}