Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize cases with long potential simple_keys #555

Merged
merged 3 commits into from
Jan 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions limit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ var limitTests = []struct {
{name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)},
{name: "1000kb slice nested at max-depth", data: []byte(strings.Repeat(`[`, 10000) + `1` + strings.Repeat(`,1`, 1000*1024/2-20000-1) + strings.Repeat(`]`, 10000))},
{name: "1000kb slice nested in maps at max-depth", data: []byte("{a,b:\n" + strings.Repeat(" {a,b:", 10000-2) + ` [1` + strings.Repeat(",1", 1000*1024/2-6*10000-1) + `]` + strings.Repeat(`}`, 10000-1))},
{name: "1000kb of 10000-nested lines", data: []byte(strings.Repeat(`- `+strings.Repeat(`[`, 10000)+strings.Repeat(`]`, 10000)+"\n", 1000*1024/20000))},
}

func (s *S) TestLimits(c *C) {
Expand Down Expand Up @@ -92,6 +93,10 @@ func BenchmarkDeepFlow(b *testing.B) {
benchmark(b, "1000kb slice nested in maps at max-depth")
}

func Benchmark1000KBMaxDepthNested(b *testing.B) {
benchmark(b, "1000kb of 10000-nested lines")
}

func benchmark(b *testing.B, name string) {
for _, t := range limitTests {
if t.name != name {
Expand Down
47 changes: 20 additions & 27 deletions scannerc.go
Original file line number Diff line number Diff line change
Expand Up @@ -626,32 +626,18 @@ func trace(args ...interface{}) func() {
func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool {
// While we need more tokens to fetch, do it.
for {
// Check if we really need to fetch more tokens.
need_more_tokens := false

if parser.tokens_head == len(parser.tokens) {
// Queue is empty.
need_more_tokens = true
} else {
// Check if any potential simple key may occupy the head position.
for i := len(parser.simple_keys) - 1; i >= 0; i-- {
simple_key := &parser.simple_keys[i]
if simple_key.token_number < parser.tokens_parsed {
break
}
if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok {
return false
} else if valid && simple_key.token_number == parser.tokens_parsed {
need_more_tokens = true
break
}
if parser.tokens_head != len(parser.tokens) {
// If queue is non-empty, check if any potential simple key may
// occupy the head position.
head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed]
if !ok {
break
} else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok {
return false
} else if !valid {
break
}
}

// We are finished.
if !need_more_tokens {
break
}
// Fetch the next token.
if !yaml_parser_fetch_next_token(parser) {
return false
Expand Down Expand Up @@ -883,6 +869,7 @@ func yaml_parser_save_simple_key(parser *yaml_parser_t) bool {
return false
}
parser.simple_keys[len(parser.simple_keys)-1] = simple_key
parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1
}
return true
}
Expand All @@ -897,9 +884,10 @@ func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool {
"while scanning a simple key", parser.simple_keys[i].mark,
"could not find expected ':'")
}
// Remove the key from the stack.
parser.simple_keys[i].possible = false
delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number)
}
// Remove the key from the stack.
parser.simple_keys[i].possible = false
return true
}

Expand Down Expand Up @@ -930,7 +918,9 @@ func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool {
if parser.flow_level > 0 {
parser.flow_level--
parser.simple_keys = parser.simple_keys[:len(parser.simple_keys)-1]
last := len(parser.simple_keys) - 1
delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number)
parser.simple_keys = parser.simple_keys[:last]
}
return true
}
Expand Down Expand Up @@ -1007,6 +997,8 @@ func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool {
// Initialize the simple key stack.
parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{})

parser.simple_keys_by_tok = make(map[int]int)

// A simple key is allowed at the beginning of the stream.
parser.simple_key_allowed = true

Expand Down Expand Up @@ -1310,6 +1302,7 @@ func yaml_parser_fetch_value(parser *yaml_parser_t) bool {

// Remove the simple key.
simple_key.possible = false
delete(parser.simple_keys_by_tok, simple_key.token_number)

// A simple key cannot follow another simple key.
parser.simple_key_allowed = false
Expand Down
1 change: 1 addition & 0 deletions yamlh.go
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,7 @@ type yaml_parser_t struct {

simple_key_allowed bool // May a simple key occur at the current position?
simple_keys []yaml_simple_key_t // The stack of simple keys.
simple_keys_by_tok map[int]int // possible simple_key indexes indexed by token_number

// Parser stuff

Expand Down