Skip to content

Commit

Permalink
finder: tagged search tune (allow for costly tags set cost to be less…
Browse files Browse the repository at this point in the history
… prefered
  • Loading branch information
msaf1980 committed Jan 29, 2022
1 parent 950f05d commit 7f4c461
Show file tree
Hide file tree
Showing 7 changed files with 260 additions and 41 deletions.
2 changes: 1 addition & 1 deletion autocomplete/autocomplete.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func (h *Handler) requestExpr(r *http.Request) (*where.Where, *where.Where, map[
return wr, pw, usedTags, nil
}

terms, err := finder.ParseTaggedConditions(expr)
terms, err := finder.ParseTaggedConditions(expr, h.config.ClickHouse.TaggedCosts)
if err != nil {
return wr, pw, usedTags, err
}
Expand Down
40 changes: 23 additions & 17 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ type IndexReverseRule struct {
Reverse string `toml:"reverse" json:"reverse" comment:"same as index-reverse"`
}

type Costs struct {
Cost int `toml:"cost" json:"cost" comment:"default cost (for wildcarded equalence or matched with regex, or if no value cost set)"`
ValuesCost map[string]int `toml:"values-cost" json:"values-cost" comment:"cost with some value (for equalence without wildcards) (additional tuning, usually not needed)"`
}

// IndexReverses is a slise of ptrs to IndexReverseRule
type IndexReverses []*IndexReverseRule

Expand All @@ -64,23 +69,24 @@ var IndexReverseNames = []string{"auto", "direct", "reversed"}

// ClickHouse config
type ClickHouse struct {
URL string `toml:"url" json:"url" comment:"see https://clickhouse.tech/docs/en/interfaces/http"`
DataTimeout time.Duration `toml:"data-timeout" json:"data-timeout" comment:"total timeout to fetch data"`
IndexTable string `toml:"index-table" json:"index-table" comment:"see doc/index-table.md"`
IndexUseDaily bool `toml:"index-use-daily" json:"index-use-daily"`
IndexReverse string `toml:"index-reverse" json:"index-reverse" comment:"see doc/config.md"`
IndexReverses IndexReverses `toml:"index-reverses" json:"index-reverses" comment:"see doc/config.md" commented:"true"`
IndexTimeout time.Duration `toml:"index-timeout" json:"index-timeout" comment:"total timeout to fetch series list from index"`
TaggedTable string `toml:"tagged-table" json:"tagged-table" comment:"'tagged' table from carbon-clickhouse, required for seriesByTag"`
TaggedAutocompleDays int `toml:"tagged-autocomplete-days" json:"tagged-autocomplete-days" comment:"or how long the daemon will query tags during autocomplete"`
TreeTable string `toml:"tree-table" json:"tree-table" comment:"old index table, DEPRECATED, see description in doc/config.md" commented:"true"`
ReverseTreeTable string `toml:"reverse-tree-table" json:"reverse-tree-table" commented:"true"`
DateTreeTable string `toml:"date-tree-table" json:"date-tree-table" commented:"true"`
DateTreeTableVersion int `toml:"date-tree-table-version" json:"date-tree-table-version" commented:"true"`
TreeTimeout time.Duration `toml:"tree-timeout" json:"tree-timeout" commented:"true"`
TagTable string `toml:"tag-table" json:"tag-table" comment:"is not recommended to use, https://github.com/lomik/graphite-clickhouse/wiki/TagsRU" commented:"true"`
ExtraPrefix string `toml:"extra-prefix" json:"extra-prefix" comment:"add extra prefix (directory in graphite) for all metrics, w/o trailing dot"`
ConnectTimeout time.Duration `toml:"connect-timeout" json:"connect-timeout" comment:"TCP connection timeout"`
URL string `toml:"url" json:"url" comment:"see https://clickhouse.tech/docs/en/interfaces/http"`
DataTimeout time.Duration `toml:"data-timeout" json:"data-timeout" comment:"total timeout to fetch data"`
IndexTable string `toml:"index-table" json:"index-table" comment:"see doc/index-table.md"`
IndexUseDaily bool `toml:"index-use-daily" json:"index-use-daily"`
IndexReverse string `toml:"index-reverse" json:"index-reverse" comment:"see doc/config.md"`
IndexReverses IndexReverses `toml:"index-reverses" json:"index-reverses" comment:"see doc/config.md" commented:"true"`
IndexTimeout time.Duration `toml:"index-timeout" json:"index-timeout" comment:"total timeout to fetch series list from index"`
TaggedTable string `toml:"tagged-table" json:"tagged-table" comment:"'tagged' table from carbon-clickhouse, required for seriesByTag"`
TaggedAutocompleDays int `toml:"tagged-autocomplete-days" json:"tagged-autocomplete-days" comment:"or how long the daemon will query tags during autocomplete"`
TaggedCosts map[string]*Costs `toml:"tagged-costs" json:"tagged-costs" commented:"true" comment:"costs for tags (for tune which tag will be used as primary), by default is 0, increase for costly (with poor selectivity) tags"`
TreeTable string `toml:"tree-table" json:"tree-table" comment:"old index table, DEPRECATED, see description in doc/config.md" commented:"true"`
ReverseTreeTable string `toml:"reverse-tree-table" json:"reverse-tree-table" commented:"true"`
DateTreeTable string `toml:"date-tree-table" json:"date-tree-table" commented:"true"`
DateTreeTableVersion int `toml:"date-tree-table-version" json:"date-tree-table-version" commented:"true"`
TreeTimeout time.Duration `toml:"tree-timeout" json:"tree-timeout" commented:"true"`
TagTable string `toml:"tag-table" json:"tag-table" comment:"is not recommended to use, https://github.com/lomik/graphite-clickhouse/wiki/TagsRU" commented:"true"`
ExtraPrefix string `toml:"extra-prefix" json:"extra-prefix" comment:"add extra prefix (directory in graphite) for all metrics, w/o trailing dot"`
ConnectTimeout time.Duration `toml:"connect-timeout" json:"connect-timeout" comment:"TCP connection timeout"`
// TODO: remove in v0.14
DataTableLegacy string `toml:"data-table" json:"data-table" comment:"will be removed in 0.14" commented:"true"`
// TODO: remove in v0.14
Expand Down
14 changes: 14 additions & 0 deletions deploy/doc/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,20 @@ When `reverse = true` is set for data-table, there are two possibles cases for [

Depends on it for having a proper retention and aggregation you must additionally set `rollup-use-reverted = true` for the first case and `rollup-use-reverted = false` for the second.

#### Additional tuning tagged find for seriesByTag and autocomplete
Only one tag used as filter for index field Tag1, see graphite_tagged table [structure](https://github.com/lomik/carbon-clickhouse#clickhouse-configuration)

So, if the first tag in filter is costly (poor selectivity), like environment (with several possible values), query perfomance will be degraded.
Tune this with `tagged-costs` options:

`
tagged-costs = {
"environment" = { cost: 100 },
"project" = { values-cost = { "HugeProject" = 90 } } # overwrite tag value cost for some value only
}`

Default cost is 0 and positive or negative numbers can be used. So if environment is first tag filter in query, it will used as primary only if no other filters with equal operation. Costs from values-cost also applied to regex match or wilrdcarded equal.

## Carbonlink `[carbonlink]`
The configuration to get metrics from carbon-cache. See details in [graphite-web](https://graphite.readthedocs.io/en/latest/carbon-daemons.html#carbon-relay-py) documentation.

Expand Down
17 changes: 17 additions & 0 deletions doc/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,20 @@ When `reverse = true` is set for data-table, there are two possibles cases for [

Depends on it for having a proper retention and aggregation you must additionally set `rollup-use-reverted = true` for the first case and `rollup-use-reverted = false` for the second.

#### Additional tuning tagged find for seriesByTag and autocomplete
Only one tag used as filter for index field Tag1, see graphite_tagged table [structure](https://github.com/lomik/carbon-clickhouse#clickhouse-configuration)

So, if the first tag in filter is costly (poor selectivity), like environment (with several possible values), query perfomance will be degraded.
Tune this with `tagged-costs` options:

`
tagged-costs = {
"environment" = { cost: 100 },
"project" = { values-cost = { "HugeProject" = 90 } } # overwrite tag value cost for some value only
}`

Default cost is 0 and positive or negative numbers can be used. So if environment is first tag filter in query, it will used as primary only if no other filters with equal operation. Costs from values-cost also applied to regex match or wilrdcarded equal.

## Carbonlink `[carbonlink]`
The configuration to get metrics from carbon-cache. See details in [graphite-web](https://graphite.readthedocs.io/en/latest/carbon-daemons.html#carbon-relay-py) documentation.

Expand Down Expand Up @@ -131,6 +145,9 @@ It's possible to set multiple loggers. See `Config` description in [config.go](h
tagged-table = "graphite_tagged"
# or how long the daemon will query tags during autocomplete
tagged-autocomplete-days = 7

# costs for tags (for tune which tag will be used as primary), by default is 0, increase for costly (with poor selectivity) tags
# [clickhouse.tagged-costs]
# old index table, DEPRECATED, see description in doc/config.md
# tree-table = ""
# reverse-tree-table = ""
Expand Down
4 changes: 2 additions & 2 deletions finder/finder.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func newPlainFinder(ctx context.Context, config *config.Config, query string, fr
var f Finder

if config.ClickHouse.TaggedTable != "" && strings.HasPrefix(strings.TrimSpace(query), "seriesByTag") {
f = NewTagged(config.ClickHouse.URL, config.ClickHouse.TaggedTable, false, opts)
f = NewTagged(config.ClickHouse.URL, config.ClickHouse.TaggedTable, false, opts, config.ClickHouse.TaggedCosts)

if len(config.Common.Blacklist) > 0 {
f = WrapBlacklist(f, config.Common.Blacklist)
Expand Down Expand Up @@ -112,7 +112,7 @@ func FindTagged(config *config.Config, ctx context.Context, terms []TaggedTerm,
return Result(plain), nil
}

fnd := NewTagged(config.ClickHouse.URL, config.ClickHouse.TaggedTable, true, opts)
fnd := NewTagged(config.ClickHouse.URL, config.ClickHouse.TaggedTable, true, opts, config.ClickHouse.TaggedCosts)

err := fnd.ExecutePrepared(ctx, terms, from, until)
if err != nil {
Expand Down
98 changes: 87 additions & 11 deletions finder/tagged.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"time"

"github.com/go-graphite/carbonapi/pkg/parser"
"github.com/lomik/graphite-clickhouse/config"
"github.com/lomik/graphite-clickhouse/helper/clickhouse"
"github.com/lomik/graphite-clickhouse/pkg/scope"
"github.com/lomik/graphite-clickhouse/pkg/where"
Expand All @@ -29,6 +30,10 @@ type TaggedTerm struct {
Op TaggedTermOp
Value string
HasWildcard bool // only for TaggedTermEq

Cost int // tag cost for use ad primary filter (use tag with maximal selectivity). 0 by default, minimal is better.
// __name__ tag is prefered, if some tag has better selectivity than name, set it cost to < 0
// values with wildcards or regex matching also has lower priority, set if needed it cost to < 0
}

type TaggedTermList []TaggedTerm
Expand Down Expand Up @@ -59,19 +64,22 @@ func (s TaggedTermList) Less(i, j int) bool {
}

type TaggedFinder struct {
url string // clickhouse dsn
table string // graphite_tag table
absKeepEncoded bool // Abs returns url encoded value. For queries from prometheus
opts clickhouse.Options // clickhouse query timeout
body []byte // clickhouse response
url string // clickhouse dsn
table string // graphite_tag table
absKeepEncoded bool // Abs returns url encoded value. For queries from prometheus
opts clickhouse.Options // clickhouse query timeout
taggedCosts map[string]*config.Costs // costs for taggs (sor tune index search)

body []byte // clickhouse response
}

func NewTagged(url string, table string, absKeepEncoded bool, opts clickhouse.Options) *TaggedFinder {
func NewTagged(url string, table string, absKeepEncoded bool, opts clickhouse.Options, taggedCosts map[string]*config.Costs) *TaggedFinder {
return &TaggedFinder{
url: url,
table: table,
absKeepEncoded: absKeepEncoded,
opts: opts,
taggedCosts: taggedCosts,
}
}

Expand Down Expand Up @@ -180,7 +188,35 @@ func TaggedTermWhereN(term *TaggedTerm) (string, error) {
}
}

func ParseTaggedConditions(conditions []string) ([]TaggedTerm, error) {
func setCost(term *TaggedTerm, costs *config.Costs) {
if len(costs.ValuesCost) > 0 {
if cost, ok := costs.ValuesCost[term.Value]; ok {
term.Cost = cost
return
}
}
if term.Op == TaggedTermEq && !term.HasWildcard {
term.Cost = costs.Cost // only for non-wildcared eq
}
}

func lessCosts(terms []TaggedTerm, i, j int) (bool, bool) {
if terms[i].Cost != terms[j].Cost {
if terms[i].Cost == 0 && (terms[i].Op != TaggedTermEq || terms[i].HasWildcard) {
return false, false
}
if terms[j].Cost == 0 && (terms[j].Op != TaggedTermEq || terms[j].HasWildcard) {
return false, false
}

// compare taggs costs
return terms[i].Cost < terms[j].Cost, true
}

return false, false
}

func ParseTaggedConditions(conditions []string, taggedCosts map[string]*config.Costs) ([]TaggedTerm, error) {
terms := make([]TaggedTerm, len(conditions))

for i := 0; i < len(conditions); i++ {
Expand Down Expand Up @@ -226,14 +262,54 @@ func ParseTaggedConditions(conditions []string) ([]TaggedTerm, error) {
default:
return nil, fmt.Errorf("wrong seriesByTag expr: %#v", s)
}
if len(taggedCosts) > 0 {
if costs, ok := taggedCosts[terms[i].Key]; ok {
setCost(&terms[i], costs)
}
}
}

sort.Sort(TaggedTermList(terms))
if len(taggedCosts) == 0 {
sort.Sort(TaggedTermList(terms))
} else {
// compare with taggs costs
sort.Slice(terms, func(i, j int) bool {
eq, comparable := lessCosts(terms, i, j)
if comparable {
return eq
}

if terms[i].Op < terms[j].Op {
return true
}
if terms[i].Op > terms[j].Op {
return false
}

if terms[i].Op == TaggedTermEq && !terms[i].HasWildcard && terms[j].HasWildcard {
// globs as fist eq might be have a bad perfomance
return true
}

if terms[i].Key == "__name__" && terms[j].Key != "__name__" {
return true
}

if (terms[i].Cost >= 0 || terms[j].Cost >= 0) && terms[i].HasWildcard == terms[j].HasWildcard {
// compare taggs costs
if terms[i].Cost < terms[j].Cost {
return true
}
}

return false
})
}

return terms, nil
}

func ParseSeriesByTag(query string) ([]TaggedTerm, error) {
func ParseSeriesByTag(query string, tagCosts map[string]*config.Costs) ([]TaggedTerm, error) {
expr, _, err := parser.ParseExpr(query)
if err != nil {
return nil, err
Expand Down Expand Up @@ -269,7 +345,7 @@ func ParseSeriesByTag(query string) ([]TaggedTerm, error) {
conditions = append(conditions, s)
}

return ParseTaggedConditions(conditions)
return ParseTaggedConditions(conditions, tagCosts)
}

func TaggedWhere(terms []TaggedTerm) (*where.Where, *where.Where, error) {
Expand All @@ -296,7 +372,7 @@ func TaggedWhere(terms []TaggedTerm) (*where.Where, *where.Where, error) {
}

func (t *TaggedFinder) Execute(ctx context.Context, query string, from int64, until int64) error {
terms, err := ParseSeriesByTag(query)
terms, err := ParseSeriesByTag(query, t.taggedCosts)
if err != nil {
return err
}
Expand Down
Loading

0 comments on commit 7f4c461

Please sign in to comment.