Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

finder: tagged search tune (allow to set cost for tags to be less prefered) #179

Merged
merged 2 commits into from
Feb 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion autocomplete/autocomplete.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func (h *Handler) requestExpr(r *http.Request) (*where.Where, *where.Where, map[
return wr, pw, usedTags, nil
}

terms, err := finder.ParseTaggedConditions(expr)
terms, err := finder.ParseTaggedConditions(expr, h.config.ClickHouse.TaggedCosts)
if err != nil {
return wr, pw, usedTags, err
}
Expand Down
40 changes: 23 additions & 17 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ type IndexReverseRule struct {
Reverse string `toml:"reverse" json:"reverse" comment:"same as index-reverse"`
}

type Costs struct {
Cost *int `toml:"cost" json:"cost" comment:"default cost (for wildcarded equalence or matched with regex, or if no value cost set)"`
ValuesCost map[string]int `toml:"values-cost" json:"values-cost" comment:"cost with some value (for equalence without wildcards) (additional tuning, usually not needed)"`
}

// IndexReverses is a slise of ptrs to IndexReverseRule
type IndexReverses []*IndexReverseRule

Expand All @@ -64,23 +69,24 @@ var IndexReverseNames = []string{"auto", "direct", "reversed"}

// ClickHouse config
type ClickHouse struct {
URL string `toml:"url" json:"url" comment:"see https://clickhouse.tech/docs/en/interfaces/http"`
DataTimeout time.Duration `toml:"data-timeout" json:"data-timeout" comment:"total timeout to fetch data"`
IndexTable string `toml:"index-table" json:"index-table" comment:"see doc/index-table.md"`
IndexUseDaily bool `toml:"index-use-daily" json:"index-use-daily"`
IndexReverse string `toml:"index-reverse" json:"index-reverse" comment:"see doc/config.md"`
IndexReverses IndexReverses `toml:"index-reverses" json:"index-reverses" comment:"see doc/config.md" commented:"true"`
IndexTimeout time.Duration `toml:"index-timeout" json:"index-timeout" comment:"total timeout to fetch series list from index"`
TaggedTable string `toml:"tagged-table" json:"tagged-table" comment:"'tagged' table from carbon-clickhouse, required for seriesByTag"`
TaggedAutocompleDays int `toml:"tagged-autocomplete-days" json:"tagged-autocomplete-days" comment:"or how long the daemon will query tags during autocomplete"`
TreeTable string `toml:"tree-table" json:"tree-table" comment:"old index table, DEPRECATED, see description in doc/config.md" commented:"true"`
ReverseTreeTable string `toml:"reverse-tree-table" json:"reverse-tree-table" commented:"true"`
DateTreeTable string `toml:"date-tree-table" json:"date-tree-table" commented:"true"`
DateTreeTableVersion int `toml:"date-tree-table-version" json:"date-tree-table-version" commented:"true"`
TreeTimeout time.Duration `toml:"tree-timeout" json:"tree-timeout" commented:"true"`
TagTable string `toml:"tag-table" json:"tag-table" comment:"is not recommended to use, https://github.com/lomik/graphite-clickhouse/wiki/TagsRU" commented:"true"`
ExtraPrefix string `toml:"extra-prefix" json:"extra-prefix" comment:"add extra prefix (directory in graphite) for all metrics, w/o trailing dot"`
ConnectTimeout time.Duration `toml:"connect-timeout" json:"connect-timeout" comment:"TCP connection timeout"`
URL string `toml:"url" json:"url" comment:"see https://clickhouse.tech/docs/en/interfaces/http"`
DataTimeout time.Duration `toml:"data-timeout" json:"data-timeout" comment:"total timeout to fetch data"`
IndexTable string `toml:"index-table" json:"index-table" comment:"see doc/index-table.md"`
IndexUseDaily bool `toml:"index-use-daily" json:"index-use-daily"`
IndexReverse string `toml:"index-reverse" json:"index-reverse" comment:"see doc/config.md"`
IndexReverses IndexReverses `toml:"index-reverses" json:"index-reverses" comment:"see doc/config.md" commented:"true"`
IndexTimeout time.Duration `toml:"index-timeout" json:"index-timeout" comment:"total timeout to fetch series list from index"`
TaggedTable string `toml:"tagged-table" json:"tagged-table" comment:"'tagged' table from carbon-clickhouse, required for seriesByTag"`
TaggedAutocompleDays int `toml:"tagged-autocomplete-days" json:"tagged-autocomplete-days" comment:"or how long the daemon will query tags during autocomplete"`
TaggedCosts map[string]*Costs `toml:"tagged-costs" json:"tagged-costs" commented:"true" comment:"costs for tags (for tune which tag will be used as primary), by default is 0, increase for costly (with poor selectivity) tags"`
TreeTable string `toml:"tree-table" json:"tree-table" comment:"old index table, DEPRECATED, see description in doc/config.md" commented:"true"`
ReverseTreeTable string `toml:"reverse-tree-table" json:"reverse-tree-table" commented:"true"`
DateTreeTable string `toml:"date-tree-table" json:"date-tree-table" commented:"true"`
DateTreeTableVersion int `toml:"date-tree-table-version" json:"date-tree-table-version" commented:"true"`
TreeTimeout time.Duration `toml:"tree-timeout" json:"tree-timeout" commented:"true"`
TagTable string `toml:"tag-table" json:"tag-table" comment:"is not recommended to use, https://github.com/lomik/graphite-clickhouse/wiki/TagsRU" commented:"true"`
ExtraPrefix string `toml:"extra-prefix" json:"extra-prefix" comment:"add extra prefix (directory in graphite) for all metrics, w/o trailing dot"`
ConnectTimeout time.Duration `toml:"connect-timeout" json:"connect-timeout" comment:"TCP connection timeout"`
// TODO: remove in v0.14
DataTableLegacy string `toml:"data-table" json:"data-table" comment:"will be removed in 0.14" commented:"true"`
// TODO: remove in v0.14
Expand Down
14 changes: 14 additions & 0 deletions deploy/doc/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,20 @@ When `reverse = true` is set for data-table, there are two possibles cases for [

Depends on it for having a proper retention and aggregation you must additionally set `rollup-use-reverted = true` for the first case and `rollup-use-reverted = false` for the second.

#### Additional tuning tagged find for seriesByTag and autocomplete
Only one tag used as filter for index field Tag1, see graphite_tagged table [structure](https://github.com/lomik/carbon-clickhouse#clickhouse-configuration)

So, if the first tag in filter is costly (poor selectivity), like environment (with several possible values), query perfomance will be degraded.
Tune this with `tagged-costs` options:

`
tagged-costs = {
"environment" = { cost: 100 },
"project" = { values-cost = { "HugeProject" = 90 } } # overwrite tag value cost for some value only
}`

Default cost is 0 and positive or negative numbers can be used. So if environment is first tag filter in query, it will used as primary only if no other filters with equal operation. Costs from values-cost also applied to regex match or wilrdcarded equal.

## Carbonlink `[carbonlink]`
The configuration to get metrics from carbon-cache. See details in [graphite-web](https://graphite.readthedocs.io/en/latest/carbon-daemons.html#carbon-relay-py) documentation.

Expand Down
17 changes: 17 additions & 0 deletions doc/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,20 @@ When `reverse = true` is set for data-table, there are two possibles cases for [

Depends on it for having a proper retention and aggregation you must additionally set `rollup-use-reverted = true` for the first case and `rollup-use-reverted = false` for the second.

#### Additional tuning tagged find for seriesByTag and autocomplete
Only one tag used as filter for index field Tag1, see graphite_tagged table [structure](https://github.com/lomik/carbon-clickhouse#clickhouse-configuration)

So, if the first tag in filter is costly (poor selectivity), like environment (with several possible values), query perfomance will be degraded.
Tune this with `tagged-costs` options:

`
tagged-costs = {
"environment" = { cost: 100 },
"project" = { values-cost = { "HugeProject" = 90 } } # overwrite tag value cost for some value only
}`

Default cost is 0 and positive or negative numbers can be used. So if environment is first tag filter in query, it will used as primary only if no other filters with equal operation. Costs from values-cost also applied to regex match or wilrdcarded equal.

## Carbonlink `[carbonlink]`
The configuration to get metrics from carbon-cache. See details in [graphite-web](https://graphite.readthedocs.io/en/latest/carbon-daemons.html#carbon-relay-py) documentation.

Expand Down Expand Up @@ -131,6 +145,9 @@ It's possible to set multiple loggers. See `Config` description in [config.go](h
tagged-table = "graphite_tagged"
# or how long the daemon will query tags during autocomplete
tagged-autocomplete-days = 7

# costs for tags (for tune which tag will be used as primary), by default is 0, increase for costly (with poor selectivity) tags
# [clickhouse.tagged-costs]
# old index table, DEPRECATED, see description in doc/config.md
# tree-table = ""
# reverse-tree-table = ""
Expand Down
4 changes: 2 additions & 2 deletions finder/finder.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func newPlainFinder(ctx context.Context, config *config.Config, query string, fr
var f Finder

if config.ClickHouse.TaggedTable != "" && strings.HasPrefix(strings.TrimSpace(query), "seriesByTag") {
f = NewTagged(config.ClickHouse.URL, config.ClickHouse.TaggedTable, false, opts)
f = NewTagged(config.ClickHouse.URL, config.ClickHouse.TaggedTable, false, opts, config.ClickHouse.TaggedCosts)

if len(config.Common.Blacklist) > 0 {
f = WrapBlacklist(f, config.Common.Blacklist)
Expand Down Expand Up @@ -112,7 +112,7 @@ func FindTagged(config *config.Config, ctx context.Context, terms []TaggedTerm,
return Result(plain), nil
}

fnd := NewTagged(config.ClickHouse.URL, config.ClickHouse.TaggedTable, true, opts)
fnd := NewTagged(config.ClickHouse.URL, config.ClickHouse.TaggedTable, true, opts, config.ClickHouse.TaggedCosts)

err := fnd.ExecutePrepared(ctx, terms, from, until)
if err != nil {
Expand Down
87 changes: 76 additions & 11 deletions finder/tagged.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"time"

"github.com/go-graphite/carbonapi/pkg/parser"
"github.com/lomik/graphite-clickhouse/config"
"github.com/lomik/graphite-clickhouse/helper/clickhouse"
"github.com/lomik/graphite-clickhouse/pkg/scope"
"github.com/lomik/graphite-clickhouse/pkg/where"
Expand All @@ -29,6 +30,11 @@ type TaggedTerm struct {
Op TaggedTermOp
Value string
HasWildcard bool // only for TaggedTermEq

NonDefaultCost bool
Cost int // tag cost for use ad primary filter (use tag with maximal selectivity). 0 by default, minimal is better.
// __name__ tag is prefered, if some tag has better selectivity than name, set it cost to < 0
// values with wildcards or regex matching also has lower priority, set if needed it cost to < 0
}

type TaggedTermList []TaggedTerm
Expand Down Expand Up @@ -59,19 +65,22 @@ func (s TaggedTermList) Less(i, j int) bool {
}

type TaggedFinder struct {
url string // clickhouse dsn
table string // graphite_tag table
absKeepEncoded bool // Abs returns url encoded value. For queries from prometheus
opts clickhouse.Options // clickhouse query timeout
body []byte // clickhouse response
url string // clickhouse dsn
table string // graphite_tag table
absKeepEncoded bool // Abs returns url encoded value. For queries from prometheus
opts clickhouse.Options // clickhouse query timeout
taggedCosts map[string]*config.Costs // costs for taggs (sor tune index search)

body []byte // clickhouse response
}

func NewTagged(url string, table string, absKeepEncoded bool, opts clickhouse.Options) *TaggedFinder {
func NewTagged(url string, table string, absKeepEncoded bool, opts clickhouse.Options, taggedCosts map[string]*config.Costs) *TaggedFinder {
return &TaggedFinder{
url: url,
table: table,
absKeepEncoded: absKeepEncoded,
opts: opts,
taggedCosts: taggedCosts,
}
}

Expand Down Expand Up @@ -180,7 +189,23 @@ func TaggedTermWhereN(term *TaggedTerm) (string, error) {
}
}

func ParseTaggedConditions(conditions []string) ([]TaggedTerm, error) {
func setCost(term *TaggedTerm, costs *config.Costs) {
if term.Op == TaggedTermEq || term.Op == TaggedTermMatch {
if len(costs.ValuesCost) > 0 {
if cost, ok := costs.ValuesCost[term.Value]; ok {
term.Cost = cost
term.NonDefaultCost = true
return
}
}
if term.Op == TaggedTermEq && !term.HasWildcard && costs.Cost != nil {
term.Cost = *costs.Cost // only for non-wildcared eq
term.NonDefaultCost = true
}
}
}

func ParseTaggedConditions(conditions []string, taggedCosts map[string]*config.Costs) ([]TaggedTerm, error) {
terms := make([]TaggedTerm, len(conditions))

for i := 0; i < len(conditions); i++ {
Expand Down Expand Up @@ -226,14 +251,54 @@ func ParseTaggedConditions(conditions []string) ([]TaggedTerm, error) {
default:
return nil, fmt.Errorf("wrong seriesByTag expr: %#v", s)
}
if len(taggedCosts) > 0 {
if costs, ok := taggedCosts[terms[i].Key]; ok {
setCost(&terms[i], costs)
}
}
}

sort.Sort(TaggedTermList(terms))
if len(taggedCosts) == 0 {
sort.Sort(TaggedTermList(terms))
} else {
// compare with taggs costs
sort.Slice(terms, func(i, j int) bool {
// compare taggs costs, if all of TaggegTerms has custom cost.
// this is allow overwrite operators order (Eq with or without wildcards/Match), use with carefully
if terms[i].Cost != terms[j].Cost {
if terms[i].NonDefaultCost && terms[j].NonDefaultCost ||
(terms[i].NonDefaultCost && terms[j].Op == TaggedTermEq && !terms[j].HasWildcard) ||
(terms[j].NonDefaultCost && terms[i].Op == TaggedTermEq && !terms[i].HasWildcard) {
return terms[i].Cost < terms[j].Cost
}
}

if terms[i].Op == terms[j].Op {
if terms[i].Op == TaggedTermEq && !terms[i].HasWildcard && terms[j].HasWildcard {
// globs as fist eq might be have a bad perfomance
return true
}

if terms[i].Key == "__name__" && terms[j].Key != "__name__" {
return true
}

if terms[i].Cost != terms[j].Cost && terms[i].HasWildcard == terms[j].HasWildcard {
// compare taggs costs
return terms[i].Cost < terms[j].Cost
}

return false
} else {
return terms[i].Op < terms[j].Op
}
})
}

return terms, nil
}

func ParseSeriesByTag(query string) ([]TaggedTerm, error) {
func ParseSeriesByTag(query string, tagCosts map[string]*config.Costs) ([]TaggedTerm, error) {
expr, _, err := parser.ParseExpr(query)
if err != nil {
return nil, err
Expand Down Expand Up @@ -269,7 +334,7 @@ func ParseSeriesByTag(query string) ([]TaggedTerm, error) {
conditions = append(conditions, s)
}

return ParseTaggedConditions(conditions)
return ParseTaggedConditions(conditions, tagCosts)
}

func TaggedWhere(terms []TaggedTerm) (*where.Where, *where.Where, error) {
Expand All @@ -296,7 +361,7 @@ func TaggedWhere(terms []TaggedTerm) (*where.Where, *where.Where, error) {
}

func (t *TaggedFinder) Execute(ctx context.Context, query string, from int64, until int64) error {
terms, err := ParseSeriesByTag(query)
terms, err := ParseSeriesByTag(query, t.taggedCosts)
if err != nil {
return err
}
Expand Down
Loading