From a5fc3d63fe27d7144629077fb0fdbaaf31559a30 Mon Sep 17 00:00:00 2001
From: Eric Hodel <drbrain@segment7.net>
Date: Mon, 17 Dec 2012 23:27:47 -0800
Subject: [PATCH] Added limited HEREDOC tokenization

A TkHEREDOC is only generated when the heredoc identifier is followed by
a line end.  When followed by a method call, comma, etc. a TkSTRING is
returned instead so the source output in the HTML view is correct.

Fixes #162

Conflicts:
	History.rdoc
	lib/rdoc/store.rb
---
 History.rdoc               |  4 ++-
 lib/rdoc/ruby_lex.rb       | 24 +++++++++++++---
 lib/rdoc/ruby_token.rb     |  1 +
 test/test_rdoc_ruby_lex.rb | 56 ++++++++++++++++++++++++++++++++++----
 4 files changed, 75 insertions(+), 10 deletions(-)

diff --git a/History.rdoc b/History.rdoc
index e21b1d2b7e..2f328afec0 100644
--- a/History.rdoc
+++ b/History.rdoc
@@ -8,7 +8,9 @@
   * Fixed tokenization of % when it is not followed by a $-string type
   * Fixed display of __END__ in documentation examples in HTML output
   * Fixed tokenization of reserved words used as new-style hash keys
-
+  * Fixed HEREDOC output for the limited case of a heredoc followed by a line
+    end.  When a HEREDOC is not followed by a line end RDoc is not currently
+    smart enough to restore the source correctly.  Bug #162 by Zachary Scott.
 
 === 3.12.1 / 2013-02-05
 
diff --git a/lib/rdoc/ruby_lex.rb b/lib/rdoc/ruby_lex.rb
index 313e69ea47..e6e0b41aab 100644
--- a/lib/rdoc/ruby_lex.rb
+++ b/lib/rdoc/ruby_lex.rb
@@ -982,12 +982,13 @@ def identify_here_document
       indent = true
     end
     if /['"`]/ =~ ch
-      lt = ch
+      user_quote = lt = ch
       quoted = ""
       while (c = getc) && c != lt
         quoted.concat c
       end
     else
+      user_quote = nil
       lt = '"'
       quoted = ch.dup
       while (c = getc) && c =~ /\w/
@@ -1007,8 +1008,17 @@ def identify_here_document
       end
     end
 
+    output_heredoc = reserve.join =~ /\A\r?\n\z/
+
+    if output_heredoc then
+      doc = '<<'
+      doc << '-' if indent
+      doc << "#{user_quote}#{quoted}#{user_quote}\n"
+    else
+      doc = '"'
+    end
+
     @here_header = false
-    doc = '"'
     while l = gets
       l = l.sub(/(:?\r)?\n\z/, "\n")
       if (indent ? l.strip : l.chomp) == quoted
@@ -1016,7 +1026,12 @@ def identify_here_document
       end
       doc << l
     end
-    doc << '"'
+
+    if output_heredoc then
+      doc << l.chomp
+    else
+      doc << '"'
+    end
 
     @here_header = true
     @here_readed.concat reserve
@@ -1024,9 +1039,10 @@ def identify_here_document
       ungetc ch
     end
 
+    token_class = output_heredoc ? RDoc::RubyLex::TkHEREDOC : Ltype2Token[lt]
     @ltype = ltback
     @lex_state = EXPR_END
-    Token(Ltype2Token[lt], doc)
+    Token(token_class, doc)
   end
 
   def identify_quotation
diff --git a/lib/rdoc/ruby_token.rb b/lib/rdoc/ruby_token.rb
index 687ef2fe80..d3333bc6a1 100644
--- a/lib/rdoc/ruby_token.rb
+++ b/lib/rdoc/ruby_token.rb
@@ -331,6 +331,7 @@ def Token(token, value = nil)
     [:TkINTEGER,    TkVal],
     [:TkFLOAT,      TkVal],
     [:TkSTRING,     TkVal],
+    [:TkHEREDOC,    TkVal],
     [:TkXSTRING,    TkVal],
     [:TkREGEXP,     TkVal],
     [:TkSYMBOL,     TkVal],
diff --git a/test/test_rdoc_ruby_lex.rb b/test/test_rdoc_ruby_lex.rb
index dfa350e018..0dcb42565d 100644
--- a/test/test_rdoc_ruby_lex.rb
+++ b/test/test_rdoc_ruby_lex.rb
@@ -70,7 +70,8 @@ def x
       @TK::TkIDENTIFIER.new( 4, 1,  4, 'x'),
       @TK::TkNL        .new( 5, 1,  5, "\n"),
       @TK::TkSPACE     .new( 6, 2,  0, '  '),
-      @TK::TkSTRING    .new( 8, 2,  2, %Q{"Line 1\nLine 2\n"}),
+      @TK::TkHEREDOC   .new( 8, 2,  2,
+                            %Q{<<E\nLine 1\nLine 2\nE}),
       @TK::TkNL        .new(27, 5, 28, "\n"),
       @TK::TkEND       .new(28, 6,  0, 'end'),
       @TK::TkNL        .new(31, 6, 28, "\n"),
@@ -96,12 +97,56 @@ def test_class_tokenize_hash_symbol
     assert_equal expected, tokens
   end
 
+  def test_class_tokenize_heredoc_CR_NL
+    tokens = RDoc::RubyLex.tokenize <<-RUBY, nil
+string = <<-STRING\r
+Line 1\r
+Line 2\r
+  STRING\r
+    RUBY
+
+    expected = [
+      @TK::TkIDENTIFIER.new( 0, 1,  0, 'string'),
+      @TK::TkSPACE     .new( 6, 1,  6, ' '),
+      @TK::TkASSIGN    .new( 7, 1,  7, '='),
+      @TK::TkSPACE     .new( 8, 1,  8, ' '),
+      @TK::TkHEREDOC   .new( 9, 1,  9,
+                            %Q{<<-STRING\nLine 1\nLine 2\n  STRING}),
+      @TK::TkSPACE     .new(44, 4, 45, "\r"),
+      @TK::TkNL        .new(45, 4, 46, "\n"),
+    ]
+
+    assert_equal expected, tokens
+  end
+
+  def test_class_tokenize_heredoc_call
+    tokens = RDoc::RubyLex.tokenize <<-'RUBY', nil
+string = <<-STRING.chomp
+Line 1
+Line 2
+  STRING
+    RUBY
+
+    expected = [
+      @TK::TkIDENTIFIER.new( 0, 1,  0, 'string'),
+      @TK::TkSPACE     .new( 6, 1,  6, ' '),
+      @TK::TkASSIGN    .new( 7, 1,  7, '='),
+      @TK::TkSPACE     .new( 8, 1,  8, ' '),
+      @TK::TkSTRING    .new( 9, 1,  9, %Q{"Line 1\nLine 2\n"}),
+      @TK::TkDOT       .new(41, 4, 42, '.'),
+      @TK::TkIDENTIFIER.new(42, 4, 43, 'chomp'),
+      @TK::TkNL        .new(47, 4, 48, "\n"),
+    ]
+
+    assert_equal expected, tokens
+  end
+
   def test_class_tokenize_heredoc_indent
     tokens = RDoc::RubyLex.tokenize <<-'RUBY', nil
 string = <<-STRING
 Line 1
 Line 2
-STRING
+  STRING
     RUBY
 
     expected = [
@@ -109,8 +154,9 @@ def test_class_tokenize_heredoc_indent
       @TK::TkSPACE     .new( 6, 1,  6, ' '),
       @TK::TkASSIGN    .new( 7, 1,  7, '='),
       @TK::TkSPACE     .new( 8, 1,  8, ' '),
-      @TK::TkSTRING    .new( 9, 1,  9, %Q{"Line 1\nLine 2\n"}),
-      @TK::TkNL        .new(39, 4, 40, "\n"),
+      @TK::TkHEREDOC   .new( 9, 1,  9,
+                            %Q{<<-STRING\nLine 1\nLine 2\n  STRING}),
+      @TK::TkNL        .new(41, 4, 42, "\n"),
     ]
 
     assert_equal expected, tokens
@@ -128,7 +174,7 @@ def test_class_tokenize_heredoc_percent_N
       @TK::TkSPACE     .new( 1, 1,  1, ' '),
       @TK::TkIDENTIFIER.new( 2, 1,  2, 'b'),
       @TK::TkSPACE     .new( 3, 1,  3, ' '),
-      @TK::TkSTRING    .new( 4, 1,  4, %Q{"%N\n"}),
+      @TK::TkHEREDOC   .new( 4, 1,  4, %Q{<<-U\n%N\nU}),
       @TK::TkNL        .new(13, 3, 14, "\n"),
     ]