Skip to content

Commit

Permalink
Properly handle incomplete HTML5 fragments (#622)
Browse files Browse the repository at this point in the history
# Type of PR (feature, enhancement, bug fix, etc.)

Bug Fix

## Description

Properly handle incomplete HTML fragments that would normally be
stripped out by Nokogiri. For example, `<tr><td></td></td>` is invalid
by itself because it is not wrapped in a `table` element. This PR
detects these instances and handle them gracefully.

Fixes # (issue)

## Why should this be added

Ability to morph parts of a table is important.

## Checklist

- [x] My code follows the style guidelines of this project
- [x] Checks (StandardRB & Prettier-Standard) are passing
- [x] This is not a documentation update

Co-authored-by: Marco Roth <marco.roth@intergga.ch>
  • Loading branch information
cmer and marcoroth authored Jan 10, 2023
1 parent f86607d commit cc2d33c
Show file tree
Hide file tree
Showing 7 changed files with 198 additions and 8 deletions.
1 change: 1 addition & 0 deletions lib/stimulus_reflex.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
require "stimulus_reflex/configuration"
require "stimulus_reflex/callbacks"
require "stimulus_reflex/html/document"
require "stimulus_reflex/html/document_fragment"
require "stimulus_reflex/request_parameters"
require "stimulus_reflex/reflex"
require "stimulus_reflex/reflex_data"
Expand Down
2 changes: 1 addition & 1 deletion lib/stimulus_reflex/broadcasters/selector_broadcaster.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def broadcast(_, data = {})
selectors, html = morph
updates = create_update_collection(selectors, html)
updates.each do |update|
document = StimulusReflex::HTML::Document.new(update.html)
document = StimulusReflex::HTML::DocumentFragment.new(update.html)
match = document.match(update.selector)
if match.present?
operations << [update.selector, StimulusReflex.config.morph_operation]
Expand Down
15 changes: 10 additions & 5 deletions lib/stimulus_reflex/html/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,21 @@ class Document

delegate :element, to: :@document

def to_html
@document.root.to_html(save_with: DEFAULT_HTML_WITHOUT_FORMAT)
def document_element
@document&.root
end

def outer_html
@document.root.to_html(save_with: DEFAULT_HTML_WITHOUT_FORMAT)
document_element ? document_element.to_html(save_with: DEFAULT_HTML_WITHOUT_FORMAT) : ""
end
alias_method :to_html, :outer_html

def inner_html
@document.root.inner_html(save_with: DEFAULT_HTML_WITHOUT_FORMAT)
document_element ? document_element.inner_html(save_with: DEFAULT_HTML_WITHOUT_FORMAT) : ""
end

def initialize(html)
@document = Nokogiri::HTML5::Document.parse(html.to_s)
@document = parsing_class.parse(html.to_s)
@matches = {
"body" => Match.new(@document.at_css("body"))
}
Expand All @@ -30,6 +31,10 @@ def empty?
@document.content.empty?
end

def parsing_class
Nokogiri::HTML5::Document
end

def match(selector)
@matches[selector] ||= Match.new(@document.at_css(selector))
end
Expand Down
11 changes: 11 additions & 0 deletions lib/stimulus_reflex/html/document_fragment.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# forzen_string_literal: true

module StimulusReflex
module HTML
class DocumentFragment < Document
def parsing_class
Nokogiri
end
end
end
end
2 changes: 1 addition & 1 deletion test/broadcasters/broadcaster_test_case.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def assert_broadcast_on(stream, data, &block)
message = new_messages.find { |msg| ActiveSupport::JSON.decode(msg) == serialized_msg }

unless message
puts "\n\n#{ActiveSupport::JSON.decode(new_messages.first)}\n#{data}\n\n"
puts "\n\nActual: #{ActiveSupport::JSON.decode(new_messages.first)}\n\nExpected: #{data}\n\n"
end

assert message, "No messages sent with #{data} to #{stream}"
Expand Down
4 changes: 3 additions & 1 deletion test/broadcasters/selector_broadcaster_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ class SelectorBroadcasterTest < StimulusReflex::BroadcasterTestCase
"operations" => [
{
"selector" => "html",
"html" => "<head><title>Test</title></head><body><div><div>bar</div><div>baz</div></div></body>",
# Nokogiri automatically adds a `<meta>` tag for the encoding
# See. https://github.com/sparklemotion/nokogiri/blob/6ea1449926ce97648bb2f7401c9e4fdcb0e261ba/lib/nokogiri/html4/document.rb#L34-L35
"html" => "<head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"><title>Test</title></head><body><div><div>bar</div><div>baz</div></div></body>",
"payload" => {},
"childrenOnly" => true,
"permanentAttributeName" => nil,
Expand Down
171 changes: 171 additions & 0 deletions test/html/document_fragment_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
# frozen_string_literal: true

require_relative "../test_helper"

class StimulusReflex::HTML::DocumentFragmentTest < ActiveSupport::TestCase
test "should handle nil" do
fragment = StimulusReflex::HTML::DocumentFragment.new(nil)

assert_equal "", fragment.to_html
assert_equal "", fragment.outer_html
assert_equal "", fragment.inner_html

assert_nil fragment.match("html").to_html
assert_nil fragment.match("html").outer_html
assert_nil fragment.match("html").inner_html

assert_nil fragment.match("body").to_html
assert_nil fragment.match("body").outer_html
assert_nil fragment.match("body").inner_html
end

test "should handle empty string" do
fragment = StimulusReflex::HTML::DocumentFragment.new("")

assert_equal "", fragment.to_html
assert_equal "", fragment.outer_html
assert_equal "", fragment.inner_html

assert_nil fragment.match("html").to_html
assert_nil fragment.match("html").outer_html
assert_nil fragment.match("html").inner_html

assert_nil fragment.match("body").to_html
assert_nil fragment.match("body").outer_html
assert_nil fragment.match("body").inner_html
end

test "should extract a fragment of the HTML" do
raw_html = <<-HTML
<div id="container">
<h1 id="title">Home#index</h1>
</div>
HTML

fragment = StimulusReflex::HTML::DocumentFragment.new(raw_html)

inner_title = "Home#index"
outer_title = "<h1 id=\"title\">#{inner_title}</h1>"
inner_container = outer_title
outer_container = "<div id=\"container\"> #{inner_container} </div>"

assert_equal raw_html.squish, fragment.to_html.squish
assert_equal outer_title.squish, fragment.inner_html.squish
assert_equal raw_html.squish, fragment.outer_html.squish

refute fragment.match("body").present?
assert_nil fragment.match("body").to_html

assert_equal outer_container, fragment.match("#container").to_html.squish
assert_equal outer_container, fragment.match("#container").outer_html.squish
assert_equal inner_container, fragment.match("#container").inner_html.squish

assert_equal outer_title, fragment.match("#title").to_html.squish
assert_equal outer_title, fragment.match("#title").outer_html.squish
assert_equal inner_title, fragment.match("#title").inner_html.squish
end

test "should extract body of a fragment" do
raw_body = <<-HTML
<body id="body">
<h1>Home#index</h1>
<p>Find me in app/views/home/index.html.erb</p>
</body>
HTML

fragment = StimulusReflex::HTML::DocumentFragment.new(raw_body)

inner_body = "<h1>Home#index</h1> <p>Find me in app/views/home/index.html.erb</p>"
outer_body = "<body id=\"body\"> #{inner_body} </body>"

assert_equal outer_body, fragment.to_html.squish
assert_equal inner_body, fragment.inner_html.squish
assert_equal outer_body, fragment.outer_html.squish

assert_equal outer_body, fragment.match("body").to_html.squish
assert_equal outer_body, fragment.match("body").outer_html.squish
assert_equal inner_body, fragment.match("body").inner_html.squish

assert_equal outer_body, fragment.match("#body").to_html.squish
assert_equal outer_body, fragment.match("#body").outer_html.squish
assert_equal inner_body, fragment.match("#body").inner_html.squish
end

test "should extract whole HTML fragment" do
raw_body = <<-BODY
<body id="body">
<h1>Home#index</h1>
<p>Find me in app/views/home/index.html.erb</p>
</body>
BODY

raw_html = <<-HTML
<!DOCTYPE html>
<html>
<head>
<title>StimulusReflex Test</title>
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta name="csrf-param" content="authenticity_token" />
<meta name="csrf-token" content="token" />
<link rel="stylesheet" href="/assets/application.css" data-turbo-track="reload" />
<script src="/assets/application.js" data-turbo-track="reload" defer="defer"></script>
</head>
#{raw_body}
</html>
HTML

fragment = StimulusReflex::HTML::DocumentFragment.new(raw_html)

inner_p = "Find me in app/views/home/index.html.erb"
outer_p = "<p>#{inner_p}</p>"
inner_body = "<h1>Home#index</h1> #{outer_p}"
outer_body = "<body id=\"body\"> #{inner_body} </body>"
inner_html = "<head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"> <title>StimulusReflex Test</title> <meta name=\"viewport\" content=\"width=device-width,initial-scale=1\"> <meta name=\"csrf-param\" content=\"authenticity_token\"> <meta name=\"csrf-token\" content=\"token\"> <link rel=\"stylesheet\" href=\"/assets/application.css\" data-turbo-track=\"reload\"> <script src=\"/assets/application.js\" data-turbo-track=\"reload\" defer></script> </head> #{outer_body}"
outer_html = "<html> #{inner_html} </html>"

assert_equal outer_html, fragment.to_html.squish
assert_equal outer_html, fragment.outer_html.squish
assert_equal inner_html, fragment.inner_html.squish

assert_equal outer_html, fragment.match("html").to_html.squish
assert_equal outer_html, fragment.match("html").outer_html.squish
assert_equal inner_html, fragment.match("html").inner_html.squish

assert_equal outer_body, fragment.match("body").to_html.squish
assert_equal outer_body, fragment.match("body").outer_html.squish
assert_equal inner_body, fragment.match("body").inner_html.squish

assert_equal outer_body, fragment.match("#body").to_html.squish
assert_equal outer_body, fragment.match("#body").outer_html.squish
assert_equal inner_body, fragment.match("#body").inner_html.squish

assert_equal outer_p, fragment.match("p").to_html.squish
assert_equal outer_p, fragment.match("p").outer_html.squish
assert_equal inner_p, fragment.match("p").inner_html.squish
end

test "should properly handle a tr without the parent table" do
html = "<tr><td>1</td><td>2</td></tr>"
fragment = StimulusReflex::HTML::DocumentFragment.new(html)
assert_equal html, fragment.to_html.squish
end

test "should properly handle a td without the parent table or td" do
html = "<td>1</td>"
fragment = StimulusReflex::HTML::DocumentFragment.new(html)
assert_equal html, fragment.to_html.squish
end

test "should properly return inner html of a complex tr when parsed as a fragment fragment" do
html = '<tr data-foo="1" id="123" class="abc"><td>1</td><td>2</td></tr>'
fragment = StimulusReflex::HTML::DocumentFragment.new(html)
assert_equal "<td>1</td><td>2</td>", fragment.inner_html.squish
end

test "should properly return inner html of a td when parsed as a fragment fragment" do
html = "<td>1</td>"
fragment = StimulusReflex::HTML::DocumentFragment.new(html)
assert_equal "1", fragment.inner_html.squish
end
end

0 comments on commit cc2d33c

Please sign in to comment.