Skip to content

Commit

Permalink
Fix panic parsing regular expressions
Browse files Browse the repository at this point in the history
- Fix parsing regular expression matching patterns for log lines starting with
  a date, by taking an optional literal string/character suffix into account
  (panic with index out of error, due to expecting a digit-pattern)
- add more test cases for regression
- add support to flatten nested repetitions like `(\d{2}){2}` into `\d{4}`, so
  more date-like patterns can be captured. This adds support for capturing
  `\d\d\d\d` as simplified `\d{4}` pattern.
  • Loading branch information
urso committed Apr 3, 2017
1 parent 76f5873 commit 4808f1c
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ https://github.com/elastic/beats/compare/v5.1.1...master[Check the HEAD diff]
- Add `_id`, `_type`, `_index` and `_score` fields in the generated index pattern. {pull}3282[3282]
- Fix potential elasticsearch output URL parsing error if protocol scheme is missing. {pull}3671[3671]
- Improve error message when downloading the dashboards fails. {pull}3805[3805]
- Fix panic when testing regex-AST to match against date patterns. {issue}3889[3889]

*Filebeat*
- Always use absolute path for event and registry. {pull}3328[3328]
Expand Down
7 changes: 6 additions & 1 deletion libbeat/common/match/cmp.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ func isPrefixNumDate(r *syntax.Regexp) bool {
i++
}

// check digits
// check starts with digits `\d{n}` or `[0-9]{n}`
if !isMultiDigits(r.Sub[i]) {
return false
}
Expand All @@ -103,6 +103,11 @@ func isPrefixNumDate(r *syntax.Regexp) bool {
}
i++

// regex has 'OpLiteral' suffix, without any more digits/patterns following
if i == len(r.Sub) {
return true
}

// check digits
if !isMultiDigits(r.Sub[i]) {
return false
Expand Down
11 changes: 9 additions & 2 deletions libbeat/common/match/compile.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,21 @@ func compilePrefixNumDate(r *syntax.Regexp) (stringMatcher, error) {
i++

for i < len(r.Sub) {
seps = append(seps, []byte(string(r.Sub[i].Rune)))
lit := []byte(string(r.Sub[i].Rune))
i++

// capture literal suffix
if i == len(r.Sub) {
m.suffix = lit
break
}

seps = append(seps, lit)
digits = append(digits, digitLen(r.Sub[i]))
i++
}

minLen := len(m.prefix)
minLen := len(m.prefix) + len(m.suffix)
for _, d := range digits {
minLen += d
}
Expand Down
4 changes: 3 additions & 1 deletion libbeat/common/match/matcher_bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,9 @@ func BenchmarkPatterns(b *testing.B) {
{"startsWith ' '", `^ `},
{"startsWithDate", `^\d{2}-\d{2}-\d{4}`},
{"startsWithDate2", `^\d{4}-\d{2}-\d{2}`},
{"startsWithDate3", `^20\d{2}-\d{2}-\d{2}`},
{"startsWithDate3", `^\d\d\d\d-\d\d-\d\d`},
{"startsWithDate4", `^20\d{2}-\d{2}-\d{2}`},
{"startsWithDateAndSpace", `^\d{4}-\d{2}-\d{2} `},
{"startsWithLevel", `^(DEBUG|INFO|WARN|ERR|CRIT)`},
{"hasLevel", `(DEBUG|INFO|WARN|ERR|CRIT)`},
{"contains 'PATTERN'", `PATTERN`},
Expand Down
36 changes: 36 additions & 0 deletions libbeat/common/match/matcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,18 @@ func TestMatchers(t *testing.T) {
"This should not match",
},
},
{
`^\d\d\d\d-\d\d-\d\d`,
typeOf((*prefixNumDate)(nil)),
[]string{
"2017-01-02 should match",
"2017-01-03 should also match",
},
[]string{
"- 2017-01-02 should not match",
"fail",
},
},
{
`^\d{4}-\d{2}-\d{2}`,
typeOf((*prefixNumDate)(nil)),
Expand All @@ -132,6 +144,30 @@ func TestMatchers(t *testing.T) {
"fail",
},
},
{
`^(\d{2}){2}-\d{2}-\d{2}`,
typeOf((*prefixNumDate)(nil)),
[]string{
"2017-01-02 should match",
"2017-01-03 should also match",
},
[]string{
"- 2017-01-02 should not match",
"fail",
},
},
{
`^\d{4}-\d{2}-\d{2} - `,
typeOf((*prefixNumDate)(nil)),
[]string{
"2017-01-02 - should match",
"2017-01-03 - should also match",
},
[]string{
"- 2017-01-02 should not match",
"fail",
},
},
{
`^20\d{2}-\d{2}-\d{2}`,
typeOf((*prefixNumDate)(nil)),
Expand Down
9 changes: 8 additions & 1 deletion libbeat/common/match/matchers.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,10 @@ type altPrefixMatcher struct {

type prefixNumDate struct {
minLen int
digits []int
prefix []byte
digits []int
seps [][]byte
suffix []byte
}

type emptyStringMatcher struct{}
Expand Down Expand Up @@ -182,6 +183,12 @@ func (m *prefixNumDate) Match(in []byte) bool {
}
}

if sfx := m.suffix; len(sfx) > 0 {
if !bytes.HasPrefix(in[pos:], sfx) {
return false
}
}

return true
}

Expand Down
56 changes: 54 additions & 2 deletions libbeat/common/match/optimize.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ var transformations = []trans{
trimRight,
unconcat,
concatRepetition,
flattenRepetition,
}

// optimize runs minimal regular expression optimizations
Expand Down Expand Up @@ -112,8 +113,8 @@ func unconcat(r *syntax.Regexp) (bool, *syntax.Regexp) {
return false, r
}

// concatRepetition concatenates multiple repeated sub-patterns into
// a repetition of exactly N.
// concatRepetition concatenates 2 consecutive repeated sub-patterns into a
// repetition of length 2.
func concatRepetition(r *syntax.Regexp) (bool, *syntax.Regexp) {

if r.Op != syntax.OpConcat {
Expand Down Expand Up @@ -204,3 +205,54 @@ func concatRepetition(r *syntax.Regexp) (bool, *syntax.Regexp) {
}
return changed, r
}

// flattenRepetition flattens nested repetitions
func flattenRepetition(r *syntax.Regexp) (bool, *syntax.Regexp) {
if r.Op != syntax.OpConcat {
// don't iterate sub-expressions if top-level is no OpConcat
return false, r
}

sub := r.Sub
inRepetition := false
if isConcatRepetition(r) {
sub = sub[:1]
inRepetition = true

// create flattened regex repetition mulitplying count
// if nexted expression is also a repetition
if s := sub[0]; isConcatRepetition(s) {
count := len(s.Sub) * len(r.Sub)
return true, &syntax.Regexp{
Op: syntax.OpRepeat,
Sub: s.Sub[:1],
Min: count,
Max: count,
Flags: r.Flags | s.Flags,
}
}
}

// recursively check if we can flatten sub-expressions
changed := false
for i, s := range sub {
upd, tmp := flattenRepetition(s)
changed = changed || upd
sub[i] = tmp
}

if !changed {
return false, r
}

// fix up top-level repetition with modified one
tmp := *r
if inRepetition {
for i := range r.Sub {
tmp.Sub[i] = sub[0]
}
} else {
tmp.Sub = sub
}
return changed, &tmp
}

0 comments on commit 4808f1c

Please sign in to comment.