Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dbnode] Add configurability for regexp DFA and FSA limits #2926

Merged
merged 6 commits into from
Nov 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ require (
github.com/m3db/stackmurmur3/v2 v2.0.2
github.com/m3db/tools v0.0.0-20181008195521-c6ded3f34878
github.com/m3dbx/pilosa v1.4.1
github.com/m3dbx/vellum v0.0.0-20200826162549-f94c029903de
github.com/m3dbx/vellum v0.0.0-20201119082309-5b47f7a70f69
github.com/mauricelam/genny v0.0.0-20180903214747-eb2c5232c885
github.com/mjibson/esc v0.1.0
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -556,8 +556,8 @@ github.com/m3db/tools v0.0.0-20181008195521-c6ded3f34878 h1:kww0LtVVfGrXR7Ofpbi/
github.com/m3db/tools v0.0.0-20181008195521-c6ded3f34878/go.mod h1:TxroQUZzb1wzOsq+4+TfVtT7z89YTz3v2UJAYfLNfLE=
github.com/m3dbx/pilosa v1.4.1 h1:/Cpp1XAHSd6orpjceXGiKpCoDdYBP5BD/6NoqGG9eVg=
github.com/m3dbx/pilosa v1.4.1/go.mod h1:Jt0+w9O08sa7qWDeRC58VBjb4OeOTDMOhfvVmyeVCO8=
github.com/m3dbx/vellum v0.0.0-20200826162549-f94c029903de h1:C4DpCfTNzJf5RhJqxOtfWAnD2d6ls7KDnK1boBGUnVg=
github.com/m3dbx/vellum v0.0.0-20200826162549-f94c029903de/go.mod h1:DOTAUfV4bzK6Nrb0dboT/oCG0DnQuX+/n0jfZPh6xxI=
github.com/m3dbx/vellum v0.0.0-20201119082309-5b47f7a70f69 h1:dANuca0xuYlZR7qWdPIIAZKG0YHvsbLTzdenj53yQvc=
github.com/m3dbx/vellum v0.0.0-20201119082309-5b47f7a70f69/go.mod h1:DOTAUfV4bzK6Nrb0dboT/oCG0DnQuX+/n0jfZPh6xxI=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
Expand Down
27 changes: 27 additions & 0 deletions src/cmd/services/m3dbnode/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
xlog "github.com/m3db/m3/src/x/log"
"github.com/m3db/m3/src/x/opentracing"

"github.com/m3dbx/vellum/regexp"
"go.etcd.io/etcd/embed"
"go.etcd.io/etcd/pkg/transport"
"go.etcd.io/etcd/pkg/types"
Expand Down Expand Up @@ -379,6 +380,14 @@ type IndexConfiguration struct {
// as they are very CPU-intensive (regex and FST matching).
MaxQueryIDsConcurrency int `yaml:"maxQueryIDsConcurrency" validate:"min=0"`

// RegexpDFALimit is the limit on the max number of states used by a
// regexp deterministic finite automaton. Default is 10,000 states.
RegexpDFALimit *int `yaml:"regexpDFALimit"`

// RegexpFSALimit is the limit on the max number of bytes used by the
// finite state automaton. Default is 10mb (10 million as int).
RegexpFSALimit *uint `yaml:"regexpFSALimit"`

// ForwardIndexProbability determines the likelihood that an incoming write is
// written to the next block, when arriving close to the block boundary.
//
Expand All @@ -396,6 +405,24 @@ type IndexConfiguration struct {
ForwardIndexThreshold float64 `yaml:"forwardIndexThreshold" validate:"min=0.0,max=1.0"`
}

// RegexpDFALimitOrDefault returns the deterministic finite automaton states
// limit or default.
func (c IndexConfiguration) RegexpDFALimitOrDefault() int {
if c.RegexpDFALimit == nil {
return regexp.StateLimit()
}
return *c.RegexpDFALimit
}

// RegexpFSALimitOrDefault returns the finite state automaton size
// limit or default.
func (c IndexConfiguration) RegexpFSALimitOrDefault() uint {
if c.RegexpFSALimit == nil {
return regexp.DefaultLimit()
}
return *c.RegexpFSALimit
}

// TransformConfiguration contains configuration options that can transform
// incoming writes.
type TransformConfiguration struct {
Expand Down
2 changes: 2 additions & 0 deletions src/cmd/services/m3dbnode/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,8 @@ func TestConfiguration(t *testing.T) {
expected := `db:
index:
maxQueryIDsConcurrency: 0
regexpDFALimit: null
regexpFSALimit: null
forwardIndexProbability: 0
forwardIndexThreshold: 0
transforms:
Expand Down
13 changes: 13 additions & 0 deletions src/dbnode/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ import (
xsync "github.com/m3db/m3/src/x/sync"

apachethrift "github.com/apache/thrift/lib/go/thrift"
"github.com/m3dbx/vellum/levenshtein"
"github.com/m3dbx/vellum/levenshtein2"
"github.com/m3dbx/vellum/regexp"
opentracing "github.com/opentracing/opentracing-go"
"github.com/uber-go/tally"
"github.com/uber/tchannel-go"
Expand Down Expand Up @@ -371,6 +374,16 @@ func Run(runOpts RunOptions) {
logger.Warn("max index query IDs concurrency was not set, falling back to default value")
}

// Set global index options.
if n := cfg.Index.RegexpDFALimitOrDefault(); n > 0 {
regexp.SetStateLimit(n)
levenshtein.SetStateLimit(n)
levenshtein2.SetStateLimit(n)
}
if n := cfg.Index.RegexpFSALimitOrDefault(); n > 0 {
regexp.SetDefaultLimit(n)
robskillington marked this conversation as resolved.
Show resolved Hide resolved
}

buildReporter := instrument.NewBuildReporter(iOpts)
if err := buildReporter.Start(); err != nil {
logger.Fatal("unable to start build reporter", zap.Error(err))
Expand Down
2 changes: 1 addition & 1 deletion src/m3ninx/index/segment/fst/regexp/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func ParseRegexp(pattern string) (a *vregexp.Regexp, prefixBeg, prefixEnd []byte
// ParsedRegexp uses the pre-parsed regexp pattern and creates an equivalent matching automaton, and
// corresponding keys to bound prefix beginning and end during the FST search.
func ParsedRegexp(pattern string, parsed *syntax.Regexp) (a *vregexp.Regexp, prefixBeg, prefixEnd []byte, err error) {
re, err := vregexp.NewParsedWithLimit(pattern, parsed, vregexp.DefaultLimit)
re, err := vregexp.NewParsedWithLimit(pattern, parsed, vregexp.DefaultLimit())
if err != nil {
return nil, nil, nil, err
}
Expand Down