From cc2d33c1afea0e840def79bf25367151b7fb98e5 Mon Sep 17 00:00:00 2001 From: Carl Mercier Date: Tue, 10 Jan 2023 15:32:34 -0500 Subject: [PATCH] Properly handle incomplete HTML5 fragments (#622) # Type of PR (feature, enhancement, bug fix, etc.) Bug Fix ## Description Properly handle incomplete HTML fragments that would normally be stripped out by Nokogiri. For example, `` is invalid by itself because it is not wrapped in a `table` element. This PR detects these instances and handle them gracefully. Fixes # (issue) ## Why should this be added Ability to morph parts of a table is important. ## Checklist - [x] My code follows the style guidelines of this project - [x] Checks (StandardRB & Prettier-Standard) are passing - [x] This is not a documentation update Co-authored-by: Marco Roth --- lib/stimulus_reflex.rb | 1 + .../broadcasters/selector_broadcaster.rb | 2 +- lib/stimulus_reflex/html/document.rb | 15 +- lib/stimulus_reflex/html/document_fragment.rb | 11 ++ test/broadcasters/broadcaster_test_case.rb | 2 +- .../broadcasters/selector_broadcaster_test.rb | 4 +- test/html/document_fragment_test.rb | 171 ++++++++++++++++++ 7 files changed, 198 insertions(+), 8 deletions(-) create mode 100644 lib/stimulus_reflex/html/document_fragment.rb create mode 100644 test/html/document_fragment_test.rb diff --git a/lib/stimulus_reflex.rb b/lib/stimulus_reflex.rb index f2b02b89..35e9d001 100644 --- a/lib/stimulus_reflex.rb +++ b/lib/stimulus_reflex.rb @@ -16,6 +16,7 @@ require "stimulus_reflex/configuration" require "stimulus_reflex/callbacks" require "stimulus_reflex/html/document" +require "stimulus_reflex/html/document_fragment" require "stimulus_reflex/request_parameters" require "stimulus_reflex/reflex" require "stimulus_reflex/reflex_data" diff --git a/lib/stimulus_reflex/broadcasters/selector_broadcaster.rb b/lib/stimulus_reflex/broadcasters/selector_broadcaster.rb index e209b53c..ca049bba 100644 --- a/lib/stimulus_reflex/broadcasters/selector_broadcaster.rb +++ b/lib/stimulus_reflex/broadcasters/selector_broadcaster.rb @@ -7,7 +7,7 @@ def broadcast(_, data = {}) selectors, html = morph updates = create_update_collection(selectors, html) updates.each do |update| - document = StimulusReflex::HTML::Document.new(update.html) + document = StimulusReflex::HTML::DocumentFragment.new(update.html) match = document.match(update.selector) if match.present? operations << [update.selector, StimulusReflex.config.morph_operation] diff --git a/lib/stimulus_reflex/html/document.rb b/lib/stimulus_reflex/html/document.rb index 3c6dce35..80f6eea8 100644 --- a/lib/stimulus_reflex/html/document.rb +++ b/lib/stimulus_reflex/html/document.rb @@ -7,20 +7,21 @@ class Document delegate :element, to: :@document - def to_html - @document.root.to_html(save_with: DEFAULT_HTML_WITHOUT_FORMAT) + def document_element + @document&.root end def outer_html - @document.root.to_html(save_with: DEFAULT_HTML_WITHOUT_FORMAT) + document_element ? document_element.to_html(save_with: DEFAULT_HTML_WITHOUT_FORMAT) : "" end + alias_method :to_html, :outer_html def inner_html - @document.root.inner_html(save_with: DEFAULT_HTML_WITHOUT_FORMAT) + document_element ? document_element.inner_html(save_with: DEFAULT_HTML_WITHOUT_FORMAT) : "" end def initialize(html) - @document = Nokogiri::HTML5::Document.parse(html.to_s) + @document = parsing_class.parse(html.to_s) @matches = { "body" => Match.new(@document.at_css("body")) } @@ -30,6 +31,10 @@ def empty? @document.content.empty? end + def parsing_class + Nokogiri::HTML5::Document + end + def match(selector) @matches[selector] ||= Match.new(@document.at_css(selector)) end diff --git a/lib/stimulus_reflex/html/document_fragment.rb b/lib/stimulus_reflex/html/document_fragment.rb new file mode 100644 index 00000000..fbcb67d0 --- /dev/null +++ b/lib/stimulus_reflex/html/document_fragment.rb @@ -0,0 +1,11 @@ +# forzen_string_literal: true + +module StimulusReflex + module HTML + class DocumentFragment < Document + def parsing_class + Nokogiri + end + end + end +end diff --git a/test/broadcasters/broadcaster_test_case.rb b/test/broadcasters/broadcaster_test_case.rb index 52f1df5f..72716a31 100644 --- a/test/broadcasters/broadcaster_test_case.rb +++ b/test/broadcasters/broadcaster_test_case.rb @@ -23,7 +23,7 @@ def assert_broadcast_on(stream, data, &block) message = new_messages.find { |msg| ActiveSupport::JSON.decode(msg) == serialized_msg } unless message - puts "\n\n#{ActiveSupport::JSON.decode(new_messages.first)}\n#{data}\n\n" + puts "\n\nActual: #{ActiveSupport::JSON.decode(new_messages.first)}\n\nExpected: #{data}\n\n" end assert message, "No messages sent with #{data} to #{stream}" diff --git a/test/broadcasters/selector_broadcaster_test.rb b/test/broadcasters/selector_broadcaster_test.rb index bcb54a79..ded2b4e8 100644 --- a/test/broadcasters/selector_broadcaster_test.rb +++ b/test/broadcasters/selector_broadcaster_test.rb @@ -71,7 +71,9 @@ class SelectorBroadcasterTest < StimulusReflex::BroadcasterTestCase "operations" => [ { "selector" => "html", - "html" => "Test
bar
baz
", + # Nokogiri automatically adds a `` tag for the encoding + # See. https://github.com/sparklemotion/nokogiri/blob/6ea1449926ce97648bb2f7401c9e4fdcb0e261ba/lib/nokogiri/html4/document.rb#L34-L35 + "html" => "Test
bar
baz
", "payload" => {}, "childrenOnly" => true, "permanentAttributeName" => nil, diff --git a/test/html/document_fragment_test.rb b/test/html/document_fragment_test.rb new file mode 100644 index 00000000..455759c4 --- /dev/null +++ b/test/html/document_fragment_test.rb @@ -0,0 +1,171 @@ +# frozen_string_literal: true + +require_relative "../test_helper" + +class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase + test "should handle nil" do + fragment = StimulusReflex::HTML::DocumentFragment.new(nil) + + assert_equal "", fragment.to_html + assert_equal "", fragment.outer_html + assert_equal "", fragment.inner_html + + assert_nil fragment.match("html").to_html + assert_nil fragment.match("html").outer_html + assert_nil fragment.match("html").inner_html + + assert_nil fragment.match("body").to_html + assert_nil fragment.match("body").outer_html + assert_nil fragment.match("body").inner_html + end + + test "should handle empty string" do + fragment = StimulusReflex::HTML::DocumentFragment.new("") + + assert_equal "", fragment.to_html + assert_equal "", fragment.outer_html + assert_equal "", fragment.inner_html + + assert_nil fragment.match("html").to_html + assert_nil fragment.match("html").outer_html + assert_nil fragment.match("html").inner_html + + assert_nil fragment.match("body").to_html + assert_nil fragment.match("body").outer_html + assert_nil fragment.match("body").inner_html + end + + test "should extract a fragment of the HTML" do + raw_html = <<-HTML +
+

Home#index

+
+ HTML + + fragment = StimulusReflex::HTML::DocumentFragment.new(raw_html) + + inner_title = "Home#index" + outer_title = "

#{inner_title}

" + inner_container = outer_title + outer_container = "
#{inner_container}
" + + assert_equal raw_html.squish, fragment.to_html.squish + assert_equal outer_title.squish, fragment.inner_html.squish + assert_equal raw_html.squish, fragment.outer_html.squish + + refute fragment.match("body").present? + assert_nil fragment.match("body").to_html + + assert_equal outer_container, fragment.match("#container").to_html.squish + assert_equal outer_container, fragment.match("#container").outer_html.squish + assert_equal inner_container, fragment.match("#container").inner_html.squish + + assert_equal outer_title, fragment.match("#title").to_html.squish + assert_equal outer_title, fragment.match("#title").outer_html.squish + assert_equal inner_title, fragment.match("#title").inner_html.squish + end + + test "should extract body of a fragment" do + raw_body = <<-HTML + +

Home#index

+

Find me in app/views/home/index.html.erb

+ + HTML + + fragment = StimulusReflex::HTML::DocumentFragment.new(raw_body) + + inner_body = "

Home#index

Find me in app/views/home/index.html.erb

" + outer_body = " #{inner_body} " + + assert_equal outer_body, fragment.to_html.squish + assert_equal inner_body, fragment.inner_html.squish + assert_equal outer_body, fragment.outer_html.squish + + assert_equal outer_body, fragment.match("body").to_html.squish + assert_equal outer_body, fragment.match("body").outer_html.squish + assert_equal inner_body, fragment.match("body").inner_html.squish + + assert_equal outer_body, fragment.match("#body").to_html.squish + assert_equal outer_body, fragment.match("#body").outer_html.squish + assert_equal inner_body, fragment.match("#body").inner_html.squish + end + + test "should extract whole HTML fragment" do + raw_body = <<-BODY + +

Home#index

+

Find me in app/views/home/index.html.erb

+ + BODY + + raw_html = <<-HTML + + + + StimulusReflex Test + + + + + + + + #{raw_body} + + HTML + + fragment = StimulusReflex::HTML::DocumentFragment.new(raw_html) + + inner_p = "Find me in app/views/home/index.html.erb" + outer_p = "

#{inner_p}

" + inner_body = "

Home#index

#{outer_p}" + outer_body = " #{inner_body} " + inner_html = " StimulusReflex Test #{outer_body}" + outer_html = " #{inner_html} " + + assert_equal outer_html, fragment.to_html.squish + assert_equal outer_html, fragment.outer_html.squish + assert_equal inner_html, fragment.inner_html.squish + + assert_equal outer_html, fragment.match("html").to_html.squish + assert_equal outer_html, fragment.match("html").outer_html.squish + assert_equal inner_html, fragment.match("html").inner_html.squish + + assert_equal outer_body, fragment.match("body").to_html.squish + assert_equal outer_body, fragment.match("body").outer_html.squish + assert_equal inner_body, fragment.match("body").inner_html.squish + + assert_equal outer_body, fragment.match("#body").to_html.squish + assert_equal outer_body, fragment.match("#body").outer_html.squish + assert_equal inner_body, fragment.match("#body").inner_html.squish + + assert_equal outer_p, fragment.match("p").to_html.squish + assert_equal outer_p, fragment.match("p").outer_html.squish + assert_equal inner_p, fragment.match("p").inner_html.squish + end + + test "should properly handle a tr without the parent table" do + html = "12" + fragment = StimulusReflex::HTML::DocumentFragment.new(html) + assert_equal html, fragment.to_html.squish + end + + test "should properly handle a td without the parent table or td" do + html = "1" + fragment = StimulusReflex::HTML::DocumentFragment.new(html) + assert_equal html, fragment.to_html.squish + end + + test "should properly return inner html of a complex tr when parsed as a fragment fragment" do + html = '12' + fragment = StimulusReflex::HTML::DocumentFragment.new(html) + assert_equal "12", fragment.inner_html.squish + end + + test "should properly return inner html of a td when parsed as a fragment fragment" do + html = "1" + fragment = StimulusReflex::HTML::DocumentFragment.new(html) + assert_equal "1", fragment.inner_html.squish + end +end