From 0b39ccac5b9a76f92cf0f387e45aa80e50be7ea3 Mon Sep 17 00:00:00 2001 From: aycabta Date: Wed, 18 Jul 2018 20:15:10 +0900 Subject: [PATCH 1/6] Remove ancestors from a method chain of checking include module I used Stackprof for this optimization of CRuby documents generation with a patch below. --- a/exe/rdoc +++ b/exe/rdoc @@ -16,8 +16,11 @@ end require 'rdoc/rdoc' begin - r = RDoc::RDoc.new - r.document ARGV + require 'stackprof' + StackProf.run(mode: :cpu, out: 'stackprof.dump') do + r = RDoc::RDoc.new + r.document ARGV + end rescue Errno::ENOSPC $stderr.puts 'Ran out of space creating documentation' $stderr.puts Before this commit, RDoc::TopLevel#text? uses processing time about 19.3%. ~/rdoc$ stackprof stackprof.dump --text --limit 5 ================================== Mode: cpu(1000) Samples: 54814 (0.47% miss rate) GC: 9022 (16.46%) ================================== TOTAL (pct) SAMPLES (pct) FRAME 10579 (19.3%) 10579 (19.3%) RDoc::TopLevel#text? 9022 (16.5%) 9022 (16.5%) (garbage collection) 21611 (39.4%) 2503 (4.6%) RDoc::Generator::Darkfish#generate_class 3151 (5.7%) 1482 (2.7%) Ripper::Lexer#lex 1271 (2.3%) 1271 (2.3%) Ripper::Lexer::State#initialize After this commit, processing time of RDoc::TopLevel#text? is reduced to 6.8%. ~/rdoc$ stackprof stackprof.dump --text --limit 5 ================================== Mode: cpu(1000) Samples: 42712 (0.47% miss rate) GC: 6066 (14.20%) ================================== TOTAL (pct) SAMPLES (pct) FRAME 6066 (14.2%) 6066 (14.2%) (garbage collection) 2892 (6.8%) 2892 (6.8%) RDoc::TopLevel#text? 14059 (32.9%) 2379 (5.6%) RDoc::Generator::Darkfish#generate_class 3015 (7.1%) 1389 (3.3%) Ripper::Lexer#lex 1265 (3.0%) 1265 (3.0%) Ripper::Lexer::State#initialize Therefore, this commit speeds up RDoc by 9.9%. --- lib/rdoc/top_level.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rdoc/top_level.rb b/lib/rdoc/top_level.rb index 6186722772..b8d4506ec8 100644 --- a/lib/rdoc/top_level.rb +++ b/lib/rdoc/top_level.rb @@ -272,7 +272,7 @@ def search_record # Is this TopLevel from a text file instead of a source code file? def text? - @parser and @parser.ancestors.include? RDoc::Parser::Text + @parser and @parser.include? RDoc::Parser::Text end def to_s # :nodoc: From 818ee979e02151885b4ae769b9f87b8f6b5f1802 Mon Sep 17 00:00:00 2001 From: aycabta Date: Wed, 18 Jul 2018 21:03:03 +0900 Subject: [PATCH 2/6] Generate text file only table I'm using Stackprof for a performance check of CRuby documents generation. Before this commit, RDoc::TopLevel#text? uses processing time about 6.8%. ~/rdoc$ stackprof stackprof.dump --text --limit 5 ================================== Mode: cpu(1000) Samples: 42712 (0.47% miss rate) GC: 6066 (14.20%) ================================== TOTAL (pct) SAMPLES (pct) FRAME 6066 (14.2%) 6066 (14.2%) (garbage collection) 2892 (6.8%) 2892 (6.8%) RDoc::TopLevel#text? 14059 (32.9%) 2379 (5.6%) RDoc::Generator::Darkfish#generate_class 3015 (7.1%) 1389 (3.3%) Ripper::Lexer#lex 1265 (3.0%) 1265 (3.0%) Ripper::Lexer::State#initialize After this commit, processing time of RDoc::TopLevel#text? is reduced to 0.1%. ~/rdoc$ stackprof stackprof.dump --method 'RDoc::TopLevel#text?' RDoc::TopLevel#text? (/home/aycabta/rdoc/lib/rdoc/top_level.rb:274) samples: 20 self (0.1%) / 20 total (0.1%) callers: 17 ( 85.0%) RDoc::Generator::Darkfish#generate_page 2 ( 10.0%) RDoc::Store#add_file 1 ( 5.0%) RDoc::Store#complete --- lib/rdoc/rdoc.rb | 2 +- lib/rdoc/store.rb | 20 +++++++++++++++----- lib/rdoc/top_level.rb | 8 +++++++- test/test_rdoc_cross_reference.rb | 3 +-- test/test_rdoc_servlet.rb | 3 +-- test/test_rdoc_store.rb | 11 ++++------- 6 files changed, 29 insertions(+), 18 deletions(-) diff --git a/lib/rdoc/rdoc.rb b/lib/rdoc/rdoc.rb index 31a92b3bec..ca2c1abefd 100644 --- a/lib/rdoc/rdoc.rb +++ b/lib/rdoc/rdoc.rb @@ -355,7 +355,7 @@ def parse_file filename relative_path.relative_path_from @options.page_dir end - top_level = @store.add_file filename, relative_path.to_s + top_level = @store.add_file filename, relative_name: relative_path.to_s parser = RDoc::Parser.for top_level, filename, content, @options, @stats diff --git a/lib/rdoc/store.rb b/lib/rdoc/store.rb index b7e677abf9..f420fc2bd2 100644 --- a/lib/rdoc/store.rb +++ b/lib/rdoc/store.rb @@ -148,6 +148,7 @@ def initialize path = nil, type = nil @classes_hash = {} @modules_hash = {} @files_hash = {} + @text_files_hash = {} @c_enclosure_classes = {} @c_enclosure_names = {} @@ -184,16 +185,24 @@ def add_c_variables c_parser # Adds the file with +name+ as an RDoc::TopLevel to the store. Returns the # created RDoc::TopLevel. - def add_file absolute_name, relative_name = absolute_name + def add_file absolute_name, relative_name: absolute_name, parser: nil unless top_level = @files_hash[relative_name] then top_level = RDoc::TopLevel.new absolute_name, relative_name + top_level.parser = parser if parser top_level.store = self @files_hash[relative_name] = top_level + @text_files_hash[relative_name] = top_level if top_level.text? end top_level end + def update_parser_of_file(absolute_name, parser) + if top_level = @files_hash[absolute_name] then + @text_files_hash[absolute_name] = top_level if top_level.text? + end + end + ## # Returns all classes discovered by RDoc @@ -428,8 +437,8 @@ def find_module_named name # +file_name+ def find_text_page file_name - @files_hash.each_value.find do |file| - file.text? and file.full_name == file_name + @text_files_hash.each_value.find do |file| + file.full_name == file_name end end @@ -537,6 +546,7 @@ def load_all @cache[:pages].each do |page_name| page = load_page page_name @files_hash[page_name] = page + @text_files_hash[page_name] = page if page.text? end end @@ -712,8 +722,8 @@ def modules_hash # Returns the RDoc::TopLevel that is a text file and has the given +name+ def page name - @files_hash.each_value.find do |file| - file.text? and file.page_name == name + @text_files_hash.each_value.find do |file| + file.page_name == name end end diff --git a/lib/rdoc/top_level.rb b/lib/rdoc/top_level.rb index b8d4506ec8..b8b6110bb2 100644 --- a/lib/rdoc/top_level.rb +++ b/lib/rdoc/top_level.rb @@ -33,7 +33,7 @@ class RDoc::TopLevel < RDoc::Context ## # The parser class that processed this file - attr_accessor :parser + attr_reader :parser ## # Creates a new TopLevel for the file at +absolute_name+. If documentation @@ -52,6 +52,12 @@ def initialize absolute_name, relative_name = absolute_name @classes_or_modules = [] end + def parser=(val) + @parser = val + @store.update_parser_of_file(absolute_name, val) if @store + @parser + end + ## # An RDoc::TopLevel is equal to another with the same relative_name diff --git a/test/test_rdoc_cross_reference.rb b/test/test_rdoc_cross_reference.rb index 8233fe3f61..183de0930d 100644 --- a/test/test_rdoc_cross_reference.rb +++ b/test/test_rdoc_cross_reference.rb @@ -162,8 +162,7 @@ def test_resolve_method_equals3 end def test_resolve_page - page = @store.add_file 'README.txt' - page.parser = RDoc::Parser::Simple + page = @store.add_file 'README.txt', parser: RDoc::Parser::Simple assert_ref page, 'README' end diff --git a/test/test_rdoc_servlet.rb b/test/test_rdoc_servlet.rb index c0c0869293..0a197a31b6 100644 --- a/test/test_rdoc_servlet.rb +++ b/test/test_rdoc_servlet.rb @@ -224,8 +224,7 @@ def test_documentation_page_page generator = @s.generator_for store - readme = store.add_file 'README.rdoc' - readme.parser = RDoc::Parser::Simple + readme = store.add_file 'README.rdoc', parser: RDoc::Parser::Simple @s.documentation_page store, generator, 'README_rdoc.html', @req, @res diff --git a/test/test_rdoc_store.rb b/test/test_rdoc_store.rb index e5cf75203f..5a884c0b5f 100644 --- a/test/test_rdoc_store.rb +++ b/test/test_rdoc_store.rb @@ -14,8 +14,7 @@ def setup @top_level = @s.add_file 'file.rb' - @page = @s.add_file 'README.txt' - @page.parser = RDoc::Parser::Simple + @page = @s.add_file 'README.txt', parser: RDoc::Parser::Simple @page.comment = RDoc::Comment.new 'This is a page', @page @klass = @top_level.add_class RDoc::NormalClass, 'Object' @@ -146,7 +145,7 @@ def test_add_file end def test_add_file_relative - top_level = @store.add_file 'path/file.rb', 'file.rb' + top_level = @store.add_file 'path/file.rb', relative_name: 'file.rb' assert_kind_of RDoc::TopLevel, top_level assert_equal @store, top_level.store @@ -310,8 +309,7 @@ def test_find_module_named end def test_find_text_page - page = @store.add_file 'PAGE.txt' - page.parser = RDoc::Parser::Simple + page = @store.add_file 'PAGE.txt', parser: RDoc::Parser::Simple assert_nil @store.find_text_page 'no such page' @@ -601,8 +599,7 @@ def test_module_names end def test_page - page = @store.add_file 'PAGE.txt' - page.parser = RDoc::Parser::Simple + page = @store.add_file 'PAGE.txt', parser: RDoc::Parser::Simple assert_nil @store.page 'no such page' From 0c146114c9935d0fe14b7dbc7bbf1542d2b90174 Mon Sep 17 00:00:00 2001 From: aycabta Date: Fri, 20 Jul 2018 23:06:48 +0900 Subject: [PATCH 3/6] Disable wrap processing in paragraph I'm using Stackprof for a performance check of Rails documents generation. Before this commit, RDoc::Text#wrap uses processing time about 12.7%. ~/rdoc$ stackprof stackprof-rails.dump --text --limit 3: ================================== Mode: cpu(1000) Samples: 94637 (1.54% miss rate) GC: 18742 (19.80%) ================================== TOTAL (pct) SAMPLES (pct) FRAME 18742 (19.8%) 18742 (19.8%) (garbage collection) 12022 (12.7%) 12022 (12.7%) RDoc::Text#wrap 17468 (18.5%) 6836 (7.2%) RDoc::Generator::Darkfish#generate_class After this commit, processing time of RDoc::Text#wrap is reduced to 0.0%. $ stackprof stackprof-rails.dump --method 'RDoc::Text#wrap' RDoc::Text#wrap (/home/aycabta/rdoc/lib/rdoc/text.rb:269) samples: 9 self (0.0%) / 9 total (0.0%) callers: The wrap processing is just for readability of HTML, but it's too heavy and unnecessary in the present day. --- lib/rdoc/markup/to_html.rb | 2 +- lib/rdoc/markup/to_html_snippet.rb | 2 +- test/test_rdoc_markup_to_html.rb | 2 +- test/test_rdoc_markup_to_html_snippet.rb | 6 ++---- test/test_rdoc_text.rb | 3 +-- 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/lib/rdoc/markup/to_html.rb b/lib/rdoc/markup/to_html.rb index 79b13e1819..63cca98de7 100644 --- a/lib/rdoc/markup/to_html.rb +++ b/lib/rdoc/markup/to_html.rb @@ -186,7 +186,7 @@ def accept_paragraph paragraph @res << "\n

" text = paragraph.text @hard_break text = text.gsub(/\r?\n/, ' ') - @res << wrap(to_html(text)) + @res << to_html(text) @res << "

\n" end diff --git a/lib/rdoc/markup/to_html_snippet.rb b/lib/rdoc/markup/to_html_snippet.rb index 24aa1d32d9..0cb4ffe4a4 100644 --- a/lib/rdoc/markup/to_html_snippet.rb +++ b/lib/rdoc/markup/to_html_snippet.rb @@ -71,7 +71,7 @@ def accept_paragraph paragraph text = paragraph.text @hard_break - @res << "#{para}#{wrap to_html text}\n" + @res << "#{para}#{to_html text}\n" add_paragraph end diff --git a/test/test_rdoc_markup_to_html.rb b/test/test_rdoc_markup_to_html.rb index 59889dcc5a..421c5edbc3 100644 --- a/test/test_rdoc_markup_to_html.rb +++ b/test/test_rdoc_markup_to_html.rb @@ -395,7 +395,7 @@ def test_accept_paragraph_newline @to.accept_paragraph para("hello\n", "world\n") - assert_equal "\n

hello world

\n", @to.res.join + assert_equal "\n

hello world

\n", @to.res.join end def test_accept_heading_output_decoration diff --git a/test/test_rdoc_markup_to_html_snippet.rb b/test/test_rdoc_markup_to_html_snippet.rb index 98102ba553..ad03747c89 100644 --- a/test/test_rdoc_markup_to_html_snippet.rb +++ b/test/test_rdoc_markup_to_html_snippet.rb @@ -458,8 +458,7 @@ def test_convert_limit expected = <<-EXPECTED

Hello -

This is some text, it will be cut off after 100 characters -and an ellipsis must follow +

This is some text, it will be cut off after 100 characters and an ellipsis must follow

So there you #{@ellipsis} EXPECTED @@ -563,8 +562,7 @@ def test_convert_limit_verbatim_2 RDOC expected = <<-EXPECTED -

Extracts the class, selector and method name parts from name -like Foo::Bar#baz. +

Extracts the class, selector and method name parts from name like Foo::Bar#baz.

NOTE: Given Foo::Bar, #{@ellipsis} EXPECTED diff --git a/test/test_rdoc_text.rb b/test/test_rdoc_text.rb index 61f528a5a4..2669766e71 100644 --- a/test/test_rdoc_text.rb +++ b/test/test_rdoc_text.rb @@ -259,8 +259,7 @@ def test_snippet TEXT expected = <<-EXPECTED -

This is one-hundred characters or more of text in a single paragraph. This -paragraph will be cut off … +

This is one-hundred characters or more of text in a single paragraph. This paragraph will be cut off … EXPECTED assert_equal expected, snippet(text) From 769cfce9fc0fcaae6bbddc6a9f37034c51492583 Mon Sep 17 00:00:00 2001 From: aycabta Date: Fri, 20 Jul 2018 23:15:56 +0900 Subject: [PATCH 4/6] Remove debugging variable derived from IRB --- lib/rdoc/parser/ruby.rb | 2 -- lib/rdoc/parser/ruby_tools.rb | 2 -- 2 files changed, 4 deletions(-) diff --git a/lib/rdoc/parser/ruby.rb b/lib/rdoc/parser/ruby.rb index 8b9ecc1141..ac40136f33 100644 --- a/lib/rdoc/parser/ruby.rb +++ b/lib/rdoc/parser/ruby.rb @@ -8,8 +8,6 @@ # by Keiju ISHITSUKA (Nippon Rational Inc.) # -$TOKEN_DEBUG ||= nil - ## # Extracts code elements from a source file returning a TopLevel object # containing the constituent file elements. diff --git a/lib/rdoc/parser/ruby_tools.rb b/lib/rdoc/parser/ruby_tools.rb index 1f621cd32e..6797d0ef3c 100644 --- a/lib/rdoc/parser/ruby_tools.rb +++ b/lib/rdoc/parser/ruby_tools.rb @@ -25,12 +25,10 @@ def get_tk tk = @scanner[@scanner_point] @scanner_point += 1 @read.push tk[:text] - puts "get_tk1 => #{tk.inspect}" if $TOKEN_DEBUG end else @read.push @unget_read.shift tk = @tokens.shift - puts "get_tk2 => #{tk.inspect}" if $TOKEN_DEBUG end if tk == nil || :on___end__ == tk[:kind] From c64c71e288e1fc8efafe893220a6a81e615e5173 Mon Sep 17 00:00:00 2001 From: aycabta Date: Sat, 21 Jul 2018 03:58:58 +0900 Subject: [PATCH 5/6] Split skip_tkspace I'm using Stackprof for a performance check of Rails documents generation. Before this commit, RDoc::Text#wrap uses processing time about 0.8%. ~/rdoc$ stackprof stackprof-rails.dump ================================== Mode: cpu(1000) Samples: 68079 (0.92% miss rate) GC: 15749 (23.13%) ================================== TOTAL (pct) SAMPLES (pct) FRAME 15749 (23.1%) 15749 (23.1%) (garbage collection) 14045 (20.6%) 6002 (8.8%) RDoc::Generator::Darkfish#generate_class 6107 (9.0%) 5962 (8.8%) RDoc::Markup::AttributeManager#convert_specials (snip) 2955 (4.3%) 541 (0.8%) RDoc::Parser::RubyTools#skip_tkspace After this commit, processing time of RDoc::Text#wrap is reduced to 0.5%. ~/rdoc$ stackprof stackprof-rails.dump ================================== Mode: cpu(1000) Samples: 66262 (0.79% miss rate) GC: 15673 (23.65%) ================================== TOTAL (pct) SAMPLES (pct) FRAME 15673 (23.7%) 15673 (23.7%) (garbage collection) 13878 (20.9%) 5836 (8.8%) RDoc::Generator::Darkfish#generate_class 5624 (8.5%) 5489 (8.3%) RDoc::Markup::AttributeManager#convert_specials (snip) 2178 (3.3%) 316 (0.5%) RDoc::Parser::RubyTools#skip_tkspace_without_nl --- lib/rdoc/parser/ruby.rb | 52 +++++++++++++++++------------------ lib/rdoc/parser/ruby_tools.rb | 20 ++++++++++---- test/test_rdoc_parser_ruby.rb | 37 +++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 31 deletions(-) diff --git a/lib/rdoc/parser/ruby.rb b/lib/rdoc/parser/ruby.rb index ac40136f33..97399f87ff 100644 --- a/lib/rdoc/parser/ruby.rb +++ b/lib/rdoc/parser/ruby.rb @@ -265,7 +265,7 @@ def collect_first_comment tk = get_tk if :on_nl === tk then - skip_tkspace false + skip_tkspace_without_nl tk = get_tk end end @@ -280,7 +280,7 @@ def collect_first_comment # Consumes trailing whitespace from the token stream def consume_trailing_spaces # :nodoc: - skip_tkspace false + skip_tkspace_without_nl end ## @@ -352,7 +352,7 @@ def get_class_or_module container, ignore_constants = false given_name << '::' end - skip_tkspace false + skip_tkspace_without_nl given_name << name_t[:text] is_self = name_t[:kind] == :on_op && name_t[:text] == '<<' @@ -376,7 +376,7 @@ def get_class_or_module container, ignore_constants = false record_location container get_tk - skip_tkspace false + skip_tkspace_without_nl name_t = get_tk unless :on_const == name_t[:kind] || :on_ident == name_t[:kind] raise RDoc::Error, "Invalid class or module definition: #{given_name}" @@ -388,7 +388,7 @@ def get_class_or_module container, ignore_constants = false end end - skip_tkspace false + skip_tkspace_without_nl return [container, name_t, given_name, new_modules] end @@ -408,7 +408,7 @@ def get_class_specification res = get_constant - skip_tkspace false + skip_tkspace_without_nl get_tkread # empty out read buffer @@ -431,7 +431,7 @@ def get_class_specification def get_constant res = "" - skip_tkspace false + skip_tkspace_without_nl tk = get_tk while tk && ((:on_op == tk[:kind] && '::' == tk[:text]) || :on_const == tk[:kind]) do @@ -447,7 +447,7 @@ def get_constant # Get an included module that may be surrounded by parens def get_included_module_with_optional_parens - skip_tkspace false + skip_tkspace_without_nl get_tkread tk = get_tk end_token = get_end_token tk @@ -683,7 +683,7 @@ def parse_attr(context, single, tk, comment) if args.size > 0 then name = args[0] rw = "R" - skip_tkspace false + skip_tkspace_without_nl tk = get_tk if :on_comma == tk[:kind] then @@ -933,7 +933,7 @@ def parse_constant container, tk, comment, ignore_constants = false line_no = tk[:line_no] name = tk[:text] - skip_tkspace false + skip_tkspace_without_nl return unless name =~ /^\w+$/ @@ -959,7 +959,7 @@ def parse_constant container, tk, comment, ignore_constants = false break if nest == 0 end end - skip_tkspace false + skip_tkspace_without_nl is_array_or_hash = true end @@ -1296,7 +1296,7 @@ def parse_meta_method(container, single, tk, comment) add_token tk add_token_listener self - skip_tkspace false + skip_tkspace_without_nl comment.text = comment.text.sub(/(^# +:?)(singleton-)(method:)/, '\1\3') singleton = !!$~ @@ -1485,7 +1485,7 @@ def parse_method_dummy container def parse_method_name container # :nodoc: skip_tkspace name_t = get_tk - back_tk = skip_tkspace(false) + back_tk = skip_tkspace_without_nl singleton = false dot = get_tk @@ -1573,7 +1573,7 @@ def parse_method_name_singleton container, name_t # :nodoc: def parse_method_or_yield_parameters(method = nil, modifiers = RDoc::METHOD_MODIFIERS) - skip_tkspace false + skip_tkspace_without_nl tk = get_tk end_token = get_end_token tk return '' unless end_token @@ -1646,7 +1646,7 @@ def parse_method_parameters method return if method.block_params - skip_tkspace false + skip_tkspace_without_nl read_documentation_modifiers method, RDoc::METHOD_MODIFIERS end @@ -1697,19 +1697,19 @@ def parse_require(context, comment) # Parses a rescue def parse_rescue - skip_tkspace false + skip_tkspace_without_nl while tk = get_tk case tk[:kind] when :on_nl, :on_semicolon, :on_comment then break when :on_comma then - skip_tkspace false + skip_tkspace_without_nl get_tk if :on_nl == peek_tk[:kind] end - skip_tkspace false + skip_tkspace_without_nl end end @@ -1782,7 +1782,7 @@ def parse_statements(container, single = NORMAL, current_method = nil, comment += "\n" unless "\n" == comment_body.chars.to_a.last if comment_body.size > 1 && "\n" == comment_body.chars.to_a.last then - skip_tkspace false # leading spaces + skip_tkspace_without_nl # leading spaces end tk = get_tk end @@ -1966,7 +1966,7 @@ def parse_symbol_arg_space no, tk # :nodoc: end loop do - skip_tkspace false + skip_tkspace_without_nl tk1 = get_tk if tk1.nil? || :on_comma != tk1[:kind] then @@ -2115,7 +2115,7 @@ def read_directive allowed # See also RDoc::Markup::PreProcess#handle_directive def read_documentation_modifiers context, allowed - skip_tkspace(false) + skip_tkspace_without_nl directive, value = read_directive allowed return unless directive @@ -2193,7 +2193,7 @@ def scan # while, until, and for have an optional do def skip_optional_do_after_expression - skip_tkspace false + skip_tkspace_without_nl tk = get_tk b_nest = 0 @@ -2225,7 +2225,7 @@ def skip_optional_do_after_expression tk = get_tk end - skip_tkspace false + skip_tkspace_without_nl get_tk if peek_tk && :on_kw == peek_tk[:kind] && 'do' == peek_tk[:text] end @@ -2234,9 +2234,9 @@ def skip_optional_do_after_expression # skip the var [in] part of a 'for' statement def skip_for_variable - skip_tkspace false + skip_tkspace_without_nl get_tk - skip_tkspace false + skip_tkspace_without_nl tk = get_tk unget_tk(tk) unless :on_kw == tk[:kind] and 'in' == tk[:text] end @@ -2255,7 +2255,7 @@ def skip_method container def skip_tkspace_comment(skip_nl = true) loop do - skip_tkspace skip_nl + skip_nl ? skip_tkspace : skip_tkspace_without_nl next_tk = peek_tk return if next_tk.nil? || (:on_comment != next_tk[:kind] and :on_embdoc != next_tk[:kind]) get_tk diff --git a/lib/rdoc/parser/ruby_tools.rb b/lib/rdoc/parser/ruby_tools.rb index 6797d0ef3c..681d7166ce 100644 --- a/lib/rdoc/parser/ruby_tools.rb +++ b/lib/rdoc/parser/ruby_tools.rb @@ -109,17 +109,27 @@ def reset @scanner_point = 0 end - def tk_nl?(tk) - :on_nl == tk[:kind] or :on_ignored_nl == tk[:kind] + ## + # Skips whitespace tokens including newlines + + def skip_tkspace + tokens = [] + + while (tk = get_tk) and (:on_sp == tk[:kind] or :on_nl == tk[:kind] or :on_ignored_nl == tk[:kind]) do + tokens.push(tk) + end + + unget_tk(tk) + tokens end ## - # Skips whitespace tokens including newlines if +skip_nl+ is true + # Skips whitespace tokens excluding newlines - def skip_tkspace(skip_nl = true) + def skip_tkspace_without_nl tokens = [] - while (tk = get_tk) and (:on_sp == tk[:kind] or (skip_nl and tk_nl?(tk))) do + while (tk = get_tk) and :on_sp == tk[:kind] do tokens.push(tk) end diff --git a/test/test_rdoc_parser_ruby.rb b/test/test_rdoc_parser_ruby.rb index 8e31141f59..69dfcf1196 100644 --- a/test/test_rdoc_parser_ruby.rb +++ b/test/test_rdoc_parser_ruby.rb @@ -495,6 +495,43 @@ def test_parse_attr_accessor assert_equal 'my attr', bar.comment.text end + def test_parse_attr_accessor_with_newline + klass = RDoc::NormalClass.new 'Foo' + klass.parent = @top_level + + comment = RDoc::Comment.new "##\n# my attr\n", @top_level + + util_parser "attr_accessor :foo, :bar,\n :baz,\n :qux" + + tk = @parser.get_tk + + @parser.parse_attr_accessor klass, RDoc::Parser::Ruby::NORMAL, tk, comment + + assert_equal 4, klass.attributes.length + + foo = klass.attributes[0] + assert_equal 'foo', foo.name + assert_equal 'RW', foo.rw + assert_equal 'my attr', foo.comment.text + assert_equal @top_level, foo.file + assert_equal 1, foo.line + + bar = klass.attributes[1] + assert_equal 'bar', bar.name + assert_equal 'RW', bar.rw + assert_equal 'my attr', bar.comment.text + + bar = klass.attributes[2] + assert_equal 'baz', bar.name + assert_equal 'RW', bar.rw + assert_equal 'my attr', bar.comment.text + + bar = klass.attributes[3] + assert_equal 'qux', bar.name + assert_equal 'RW', bar.rw + assert_equal 'my attr', bar.comment.text + end + def test_parse_attr_accessor_nodoc klass = RDoc::NormalClass.new 'Foo' klass.parent = @top_level From 2b96a8b617c8d3438ccf9bdb302353c66142afca Mon Sep 17 00:00:00 2001 From: aycabta Date: Sat, 21 Jul 2018 09:30:31 +0900 Subject: [PATCH 6/6] Refactor RipperStateLex I'm using Stackprof for a performance check of Rails documents generation. I checked methods below: - RDoc::Parser::RipperStateLex#get_squashed_tk - RDoc::Parser::RipperStateLex#initialize - RDoc::Parser::RipperStateLex::InnerStateLex#on_default Before this commit, these use processing time about 19.6%. aycabta@x270:~/rdoc$ stackprof stackprof-rails-before.dump --limit 1 ================================== Mode: cpu(1000) Samples: 66894 (0.83% miss rate) GC: 15700 (23.47%) ================================== TOTAL (pct) SAMPLES (pct) FRAME 15700 (23.5%) 15700 (23.5%) (garbage collection) aycabta@x270:~/rdoc$ stackprof stackprof-rails-before.dump | \ grep 'RipperStateLex' 1727 (2.6%) 1472 (2.2%) (snip)RipperStateLex#get_squashed_tk 9352 (14.0%) 1281 (1.9%) (snip)RipperStateLex#initialize 2018 (3.0%) 475 (0.7%) (snip)RipperStateLex::InnerStateLex#on_default After this commit, processing time is reduced to 15.0%. aycabta@x270:~/rdoc$ stackprof stackprof-rails-after.dump --limit 1 ================================== Mode: cpu(1000) Samples: 61020 (1.01% miss rate) GC: 13824 (22.65%) ================================== TOTAL (pct) SAMPLES (pct) FRAME 13824 (22.7%) 13824 (22.7%) (garbage collection) aycabta@x270:~/rdoc$ stackprof stackprof-rails-after.dump --limit 0 | \ grep '#get_squashed_tk\|RipperStateLex#initialize\|#on_default' 735 (1.2%) 533 (0.9%) (snip)RipperStateLex::InnerStateLex#on_default 760 (1.2%) 517 (0.8%) (snip)RipperStateLex#get_squashed_tk 7663 (12.6%) 1 (0.0%) (snip)RipperStateLex#initialize --- lib/rdoc/parser/ripper_state_lex.rb | 84 ++++++++++++++--------------- 1 file changed, 39 insertions(+), 45 deletions(-) diff --git a/lib/rdoc/parser/ripper_state_lex.rb b/lib/rdoc/parser/ripper_state_lex.rb index 773a830450..9704adb785 100644 --- a/lib/rdoc/parser/ripper_state_lex.rb +++ b/lib/rdoc/parser/ripper_state_lex.rb @@ -50,7 +50,7 @@ def on_nl(tok, data) @continue = false @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 end - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_ignored_nl(tok, data) @@ -61,7 +61,7 @@ def on_ignored_nl(tok, data) @continue = false @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 end - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_op(tok, data) @@ -103,7 +103,7 @@ def on_op(tok, data) @lex_state = EXPR_BEG end end - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_kw(tok, data) @@ -132,54 +132,54 @@ def on_kw(tok, data) @lex_state = EXPR_END end end - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_tstring_beg(tok, data) @lex_state = EXPR_BEG - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_tstring_end(tok, data) @lex_state = EXPR_END | EXPR_ENDARG - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_CHAR(tok, data) @lex_state = EXPR_END - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_period(tok, data) @lex_state = EXPR_DOT - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_int(tok, data) @lex_state = EXPR_END | EXPR_ENDARG - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_float(tok, data) @lex_state = EXPR_END | EXPR_ENDARG - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_rational(tok, data) @lex_state = EXPR_END | EXPR_ENDARG - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_imaginary(tok, data) @lex_state = EXPR_END | EXPR_ENDARG - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_symbeg(tok, data) @lex_state = EXPR_FNAME @continue = true @in_fname = true - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end private def on_variables(event, tok, data) @@ -198,7 +198,7 @@ def on_symbeg(tok, data) else @lex_state = EXPR_CMDARG end - @callback.call(Token.new(lineno, column, event, tok, @lex_state)) + data << Token.new(lineno, column, event, tok, @lex_state) end def on_ident(tok, data) @@ -227,32 +227,32 @@ def on_backref(tok, data) def on_lparen(tok, data) @lex_state = EXPR_LABEL | EXPR_BEG - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_rparen(tok, data) @lex_state = EXPR_ENDFN - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_lbrace(tok, data) @lex_state = EXPR_LABEL | EXPR_BEG - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_rbrace(tok, data) @lex_state = EXPR_ENDARG - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_lbracket(tok, data) @lex_state = EXPR_LABEL | EXPR_BEG - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_rbracket(tok, data) @lex_state = EXPR_ENDARG - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_const(tok, data) @@ -264,41 +264,43 @@ def on_const(tok, data) else @lex_state = EXPR_CMDARG end - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_sp(tok, data) - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_comma(tok, data) @lex_state = EXPR_BEG | EXPR_LABEL if (EXPR_ARG_ANY & @lex_state) != 0 - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_comment(tok, data) @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) end def on_ignored_sp(tok, data) @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) + end + + def on_heredoc_beg(tok, data) + data << Token.new(lineno, column, __method__, tok, @lex_state) + @lex_state = EXPR_END + data end def on_heredoc_end(tok, data) - @callback.call(Token.new(lineno, column, __method__, tok, @lex_state)) + data << Token.new(lineno, column, __method__, tok, @lex_state) @lex_state = EXPR_BEG + data end def on_default(event, tok, data) reset - @callback.call(Token.new(lineno, column, event, tok, @lex_state)) - end - - def each(&block) - @callback = block - parse + data << Token.new(lineno, column, event, tok, @lex_state) end end unless RIPPER_HAS_LEX_STATE @@ -308,21 +310,17 @@ def initialize(code) end def on_default(event, tok, data) - @callback.call(Token.new(lineno, column, event, tok, state)) - end - - def each(&block) - @callback = block - parse + data << Token.new(lineno, column, event, tok, state) end end if RIPPER_HAS_LEX_STATE def get_squashed_tk if @buf.empty? - tk = @inner_lex_enumerator.next + tk = @tokens.shift else tk = @buf.shift end + return nil if tk.nil? case tk[:kind] when :on_symbeg then tk = get_symbol_tk(tk) @@ -472,7 +470,7 @@ def get_squashed_tk string = '' start_tk = nil prev_tk = nil - until heredoc_end?(heredoc_name, indent, tk = @inner_lex_enumerator.next) do + until heredoc_end?(heredoc_name, indent, tk = @tokens.shift) do start_tk = tk unless start_tk if (prev_tk.nil? or "\n" == prev_tk[:text][-1]) and 0 != tk[:char_no] string = string + (' ' * tk[:char_no]) @@ -566,11 +564,7 @@ def initialize(code) @buf = [] @heredoc_queue = [] @inner_lex = InnerStateLex.new(code) - @inner_lex_enumerator = Enumerator.new do |y| - @inner_lex.each do |tk| - y << tk - end - end + @tokens = @inner_lex.parse([]) end def self.parse(code)