From 98e75d6e6841da675b069e393afc90eefdfe49d7 Mon Sep 17 00:00:00 2001 From: CJ Cullen Date: Thu, 21 Nov 2019 14:18:32 -0800 Subject: [PATCH 1/3] Add test for max-depth indents. --- limit_test.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/limit_test.go b/limit_test.go index f160c873..8d8ec2d2 100644 --- a/limit_test.go +++ b/limit_test.go @@ -39,6 +39,7 @@ var limitTests = []struct { {name: "1000kb of maps", data: []byte(`a: &a [{a}` + strings.Repeat(`,{a}`, 1000*1024/4-1) + `]`)}, {name: "1000kb slice nested at max-depth", data: []byte(strings.Repeat(`[`, 10000) + `1` + strings.Repeat(`,1`, 1000*1024/2-20000-1) + strings.Repeat(`]`, 10000))}, {name: "1000kb slice nested in maps at max-depth", data: []byte("{a,b:\n" + strings.Repeat(" {a,b:", 10000-2) + ` [1` + strings.Repeat(",1", 1000*1024/2-6*10000-1) + `]` + strings.Repeat(`}`, 10000-1))}, + {name: "1000kb of 10000-nested lines", data: []byte(strings.Repeat(`- `+strings.Repeat(`[`, 10000)+strings.Repeat(`]`, 10000)+"\n", 1000*1024/20000))}, } func (s *S) TestLimits(c *C) { @@ -92,6 +93,10 @@ func BenchmarkDeepFlow(b *testing.B) { benchmark(b, "1000kb slice nested in maps at max-depth") } +func Benchmark1000KBMaxDepthNested(b *testing.B) { + benchmark(b, "1000kb of 10000-nested lines") +} + func benchmark(b *testing.B, name string) { for _, t := range limitTests { if t.name != name { From f7bfbcfea552cf1097b6c9904ac082a561a2a545 Mon Sep 17 00:00:00 2001 From: CJ Cullen Date: Mon, 25 Nov 2019 20:34:44 -0800 Subject: [PATCH 2/3] Track high-water mark for checking staleness up the simple_keys stack. --- scannerc.go | 38 ++++++++++++++++++++++++++++++++------ yamlh.go | 5 +++-- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/scannerc.go b/scannerc.go index b33bdbae..930e8998 100644 --- a/scannerc.go +++ b/scannerc.go @@ -633,15 +633,20 @@ func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { // Queue is empty. need_more_tokens = true } else { + // Check for any now stale keys in a required position. + if !yaml_parser_stale_simple_keys(parser) { + return false + } // Check if any potential simple key may occupy the head position. for i := len(parser.simple_keys) - 1; i >= 0; i-- { simple_key := &parser.simple_keys[i] + if !simple_key.possible { + break + } if simple_key.token_number < parser.tokens_parsed { break } - if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok { - return false - } else if valid && simple_key.token_number == parser.tokens_parsed { + if simple_key.token_number == parser.tokens_parsed { need_more_tokens = true break } @@ -859,6 +864,20 @@ func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key return true, true } +func yaml_parser_stale_simple_keys(parser *yaml_parser_t) bool { + // fmt.Println(parser.simple_keys_min_possible_index, ":", len(parser.simple_keys)) + for i := parser.simple_keys_min_possible_index; i < len(parser.simple_keys); i++ { + simple_key := &parser.simple_keys[i] + if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok { + return false + } else if !valid { + parser.simple_keys_min_possible_index = i + 1 + } + + } + return true +} + // Check if a simple key may start at the current position and add it if // needed. func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { @@ -897,9 +916,12 @@ func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool { "while scanning a simple key", parser.simple_keys[i].mark, "could not find expected ':'") } + // Remove the key from the stack. + parser.simple_keys[i].possible = false + if parser.simple_keys_min_possible_index > i { + parser.simple_keys_min_possible_index = i + } } - // Remove the key from the stack. - parser.simple_keys[i].possible = false return true } @@ -930,7 +952,11 @@ func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool { func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool { if parser.flow_level > 0 { parser.flow_level-- - parser.simple_keys = parser.simple_keys[:len(parser.simple_keys)-1] + last := len(parser.simple_keys) - 1 + parser.simple_keys = parser.simple_keys[:last] + if parser.simple_keys_min_possible_index > last { + parser.simple_keys_min_possible_index = last + } } return true } diff --git a/yamlh.go b/yamlh.go index e25cee56..31611b3a 100644 --- a/yamlh.go +++ b/yamlh.go @@ -577,8 +577,9 @@ type yaml_parser_t struct { indent int // The current indentation level. indents []int // The indentation levels stack. - simple_key_allowed bool // May a simple key occur at the current position? - simple_keys []yaml_simple_key_t // The stack of simple keys. + simple_key_allowed bool // May a simple key occur at the current position? + simple_keys []yaml_simple_key_t // The stack of simple keys. + simple_keys_min_possible_index int // Parser stuff From 84d2f8603e81f4a8a9aafa9706ef676203548460 Mon Sep 17 00:00:00 2001 From: CJ Cullen Date: Fri, 6 Dec 2019 15:34:14 -0800 Subject: [PATCH 3/3] Track possible simple_keys in a map indexed by token_number for efficient lookup in yaml_parser_fetch_value(). --- scannerc.go | 63 +++++++++++++---------------------------------------- yamlh.go | 6 ++--- 2 files changed, 18 insertions(+), 51 deletions(-) diff --git a/scannerc.go b/scannerc.go index 930e8998..0b9bb603 100644 --- a/scannerc.go +++ b/scannerc.go @@ -626,36 +626,17 @@ func trace(args ...interface{}) func() { func yaml_parser_fetch_more_tokens(parser *yaml_parser_t) bool { // While we need more tokens to fetch, do it. for { - // Check if we really need to fetch more tokens. - need_more_tokens := false - - if parser.tokens_head == len(parser.tokens) { - // Queue is empty. - need_more_tokens = true - } else { - // Check for any now stale keys in a required position. - if !yaml_parser_stale_simple_keys(parser) { + if parser.tokens_head != len(parser.tokens) { + // If queue is non-empty, check if any potential simple key may + // occupy the head position. + head_tok_idx, ok := parser.simple_keys_by_tok[parser.tokens_parsed] + if !ok { + break + } else if valid, ok := yaml_simple_key_is_valid(parser, &parser.simple_keys[head_tok_idx]); !ok { return false + } else if !valid { + break } - // Check if any potential simple key may occupy the head position. - for i := len(parser.simple_keys) - 1; i >= 0; i-- { - simple_key := &parser.simple_keys[i] - if !simple_key.possible { - break - } - if simple_key.token_number < parser.tokens_parsed { - break - } - if simple_key.token_number == parser.tokens_parsed { - need_more_tokens = true - break - } - } - } - - // We are finished. - if !need_more_tokens { - break } // Fetch the next token. if !yaml_parser_fetch_next_token(parser) { @@ -864,20 +845,6 @@ func yaml_simple_key_is_valid(parser *yaml_parser_t, simple_key *yaml_simple_key return true, true } -func yaml_parser_stale_simple_keys(parser *yaml_parser_t) bool { - // fmt.Println(parser.simple_keys_min_possible_index, ":", len(parser.simple_keys)) - for i := parser.simple_keys_min_possible_index; i < len(parser.simple_keys); i++ { - simple_key := &parser.simple_keys[i] - if valid, ok := yaml_simple_key_is_valid(parser, simple_key); !ok { - return false - } else if !valid { - parser.simple_keys_min_possible_index = i + 1 - } - - } - return true -} - // Check if a simple key may start at the current position and add it if // needed. func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { @@ -902,6 +869,7 @@ func yaml_parser_save_simple_key(parser *yaml_parser_t) bool { return false } parser.simple_keys[len(parser.simple_keys)-1] = simple_key + parser.simple_keys_by_tok[simple_key.token_number] = len(parser.simple_keys) - 1 } return true } @@ -918,9 +886,7 @@ func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool { } // Remove the key from the stack. parser.simple_keys[i].possible = false - if parser.simple_keys_min_possible_index > i { - parser.simple_keys_min_possible_index = i - } + delete(parser.simple_keys_by_tok, parser.simple_keys[i].token_number) } return true } @@ -953,10 +919,8 @@ func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool { if parser.flow_level > 0 { parser.flow_level-- last := len(parser.simple_keys) - 1 + delete(parser.simple_keys_by_tok, parser.simple_keys[last].token_number) parser.simple_keys = parser.simple_keys[:last] - if parser.simple_keys_min_possible_index > last { - parser.simple_keys_min_possible_index = last - } } return true } @@ -1033,6 +997,8 @@ func yaml_parser_fetch_stream_start(parser *yaml_parser_t) bool { // Initialize the simple key stack. parser.simple_keys = append(parser.simple_keys, yaml_simple_key_t{}) + parser.simple_keys_by_tok = make(map[int]int) + // A simple key is allowed at the beginning of the stream. parser.simple_key_allowed = true @@ -1336,6 +1302,7 @@ func yaml_parser_fetch_value(parser *yaml_parser_t) bool { // Remove the simple key. simple_key.possible = false + delete(parser.simple_keys_by_tok, simple_key.token_number) // A simple key cannot follow another simple key. parser.simple_key_allowed = false diff --git a/yamlh.go b/yamlh.go index 31611b3a..f6a9c8e3 100644 --- a/yamlh.go +++ b/yamlh.go @@ -577,9 +577,9 @@ type yaml_parser_t struct { indent int // The current indentation level. indents []int // The indentation levels stack. - simple_key_allowed bool // May a simple key occur at the current position? - simple_keys []yaml_simple_key_t // The stack of simple keys. - simple_keys_min_possible_index int + simple_key_allowed bool // May a simple key occur at the current position? + simple_keys []yaml_simple_key_t // The stack of simple keys. + simple_keys_by_tok map[int]int // possible simple_key indexes indexed by token_number // Parser stuff