Skip to content

Commit

Permalink
[dbnode] Add configurability for regexp DFA and FSA limits (#2926)
Browse files Browse the repository at this point in the history
  • Loading branch information
robskillington committed Dec 24, 2020
1 parent f668a84 commit ab67c57
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 4 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ require (
github.com/m3db/stackmurmur3/v2 v2.0.2
github.com/m3db/tools v0.0.0-20181008195521-c6ded3f34878
github.com/m3dbx/pilosa v1.4.2-0.20201109081833-6c9df43642fd
github.com/m3dbx/vellum v0.0.0-20200826162549-f94c029903de
github.com/m3dbx/vellum v0.0.0-20201119082309-5b47f7a70f69
github.com/mauricelam/genny v0.0.0-20180903214747-eb2c5232c885
github.com/mjibson/esc v0.1.0
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e // indirect
Expand Down
5 changes: 3 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ github.com/dgraph-io/ristretto v0.0.3 h1:jh22xisGBjrEVnRZ1DVTpBVQm0Xndu8sMl0CWDz
github.com/dgraph-io/ristretto v0.0.3/go.mod h1:KPxhHT9ZxKefz+PCeOGsrHpl1qZ7i70dGTu2u+Ahh6E=
github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
github.com/dgryski/go-sip13 v0.0.0-20190329191031-25c5027a8c7b/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
Expand Down Expand Up @@ -559,8 +560,8 @@ github.com/m3db/tools v0.0.0-20181008195521-c6ded3f34878 h1:kww0LtVVfGrXR7Ofpbi/
github.com/m3db/tools v0.0.0-20181008195521-c6ded3f34878/go.mod h1:TxroQUZzb1wzOsq+4+TfVtT7z89YTz3v2UJAYfLNfLE=
github.com/m3dbx/pilosa v1.4.2-0.20201109081833-6c9df43642fd h1:C+RCSMuplTpLH8Fiwb87XIkbYEabVf9itroR9+u4RYo=
github.com/m3dbx/pilosa v1.4.2-0.20201109081833-6c9df43642fd/go.mod h1:Jt0+w9O08sa7qWDeRC58VBjb4OeOTDMOhfvVmyeVCO8=
github.com/m3dbx/vellum v0.0.0-20200826162549-f94c029903de h1:C4DpCfTNzJf5RhJqxOtfWAnD2d6ls7KDnK1boBGUnVg=
github.com/m3dbx/vellum v0.0.0-20200826162549-f94c029903de/go.mod h1:DOTAUfV4bzK6Nrb0dboT/oCG0DnQuX+/n0jfZPh6xxI=
github.com/m3dbx/vellum v0.0.0-20201119082309-5b47f7a70f69 h1:dANuca0xuYlZR7qWdPIIAZKG0YHvsbLTzdenj53yQvc=
github.com/m3dbx/vellum v0.0.0-20201119082309-5b47f7a70f69/go.mod h1:DOTAUfV4bzK6Nrb0dboT/oCG0DnQuX+/n0jfZPh6xxI=
github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
github.com/magiconair/properties v1.8.1 h1:ZC2Vc7/ZFkGmsVC9KvOjumD+G5lXy2RtTKyzRKO2BQ4=
github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
Expand Down
27 changes: 27 additions & 0 deletions src/cmd/services/m3dbnode/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
xlog "github.com/m3db/m3/src/x/log"
"github.com/m3db/m3/src/x/opentracing"

"github.com/m3dbx/vellum/regexp"
"go.etcd.io/etcd/embed"
"go.etcd.io/etcd/pkg/transport"
"go.etcd.io/etcd/pkg/types"
Expand Down Expand Up @@ -379,6 +380,14 @@ type IndexConfiguration struct {
// as they are very CPU-intensive (regex and FST matching).
MaxQueryIDsConcurrency int `yaml:"maxQueryIDsConcurrency" validate:"min=0"`

// RegexpDFALimit is the limit on the max number of states used by a
// regexp deterministic finite automaton. Default is 10,000 states.
RegexpDFALimit *int `yaml:"regexpDFALimit"`

// RegexpFSALimit is the limit on the max number of bytes used by the
// finite state automaton. Default is 10mb (10 million as int).
RegexpFSALimit *uint `yaml:"regexpFSALimit"`

// ForwardIndexProbability determines the likelihood that an incoming write is
// written to the next block, when arriving close to the block boundary.
//
Expand All @@ -396,6 +405,24 @@ type IndexConfiguration struct {
ForwardIndexThreshold float64 `yaml:"forwardIndexThreshold" validate:"min=0.0,max=1.0"`
}

// RegexpDFALimitOrDefault returns the deterministic finite automaton states
// limit or default.
func (c IndexConfiguration) RegexpDFALimitOrDefault() int {
if c.RegexpDFALimit == nil {
return regexp.StateLimit()
}
return *c.RegexpDFALimit
}

// RegexpFSALimitOrDefault returns the finite state automaton size
// limit or default.
func (c IndexConfiguration) RegexpFSALimitOrDefault() uint {
if c.RegexpFSALimit == nil {
return regexp.DefaultLimit()
}
return *c.RegexpFSALimit
}

// TransformConfiguration contains configuration options that can transform
// incoming writes.
type TransformConfiguration struct {
Expand Down
2 changes: 2 additions & 0 deletions src/cmd/services/m3dbnode/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,8 @@ func TestConfiguration(t *testing.T) {
expected := `db:
index:
maxQueryIDsConcurrency: 0
regexpDFALimit: null
regexpFSALimit: null
forwardIndexProbability: 0
forwardIndexThreshold: 0
transforms:
Expand Down
13 changes: 13 additions & 0 deletions src/dbnode/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ import (
xsync "github.com/m3db/m3/src/x/sync"

apachethrift "github.com/apache/thrift/lib/go/thrift"
"github.com/m3dbx/vellum/levenshtein"
"github.com/m3dbx/vellum/levenshtein2"
"github.com/m3dbx/vellum/regexp"
opentracing "github.com/opentracing/opentracing-go"
"github.com/uber-go/tally"
"github.com/uber/tchannel-go"
Expand Down Expand Up @@ -368,6 +371,16 @@ func Run(runOpts RunOptions) {

opentracing.SetGlobalTracer(tracer)

// Set global index options.
if n := cfg.Index.RegexpDFALimitOrDefault(); n > 0 {
regexp.SetStateLimit(n)
levenshtein.SetStateLimit(n)
levenshtein2.SetStateLimit(n)
}
if n := cfg.Index.RegexpFSALimitOrDefault(); n > 0 {
regexp.SetDefaultLimit(n)
}

buildReporter := instrument.NewBuildReporter(iopts)
if err := buildReporter.Start(); err != nil {
logger.Fatal("unable to start build reporter", zap.Error(err))
Expand Down
2 changes: 1 addition & 1 deletion src/m3ninx/index/segment/fst/regexp/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func ParseRegexp(pattern string) (a *vregexp.Regexp, prefixBeg, prefixEnd []byte
// ParsedRegexp uses the pre-parsed regexp pattern and creates an equivalent matching automaton, and
// corresponding keys to bound prefix beginning and end during the FST search.
func ParsedRegexp(pattern string, parsed *syntax.Regexp) (a *vregexp.Regexp, prefixBeg, prefixEnd []byte, err error) {
re, err := vregexp.NewParsedWithLimit(pattern, parsed, vregexp.DefaultLimit)
re, err := vregexp.NewParsedWithLimit(pattern, parsed, vregexp.DefaultLimit())
if err != nil {
return nil, nil, nil, err
}
Expand Down

0 comments on commit ab67c57

Please sign in to comment.