Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Selma signatures #370

Merged
merged 3 commits into from
Dec 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions lib/html_pipeline/node_filter/emoji_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ def selector
Selma::Selector.new(match_text_within: "*", ignore_text_within: ignored_ancestor_tags)
end

def handle_text(text)
return text unless text.include?(":")
def handle_text_chunk(text)
return unless text.to_s.include?(":")

emoji_image_filter(text)
text.replace(emoji_image_filter(text.to_s), as: :html)
end

# Implementation of validate hook.
Expand Down
3 changes: 2 additions & 1 deletion lib/html_pipeline/node_filter/image_max_width_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ def handle_element(element)

def link_image(element)
link_start = %(<a target="_blank" href="#{element["src"]}">)
element.before(link_start, as: :html)
link_end = "</a>"
element.wrap(link_start, link_end, :as_html)
element.after(link_end, as: :html)
end
end
end
Expand Down
11 changes: 6 additions & 5 deletions lib/html_pipeline/node_filter/mention_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,14 @@ def selector
SELECTOR
end

def handle_text(text)
return text unless text.include?("@")
def handle_text_chunk(text)
content = text.to_s
return unless content.include?("@")

html = mention_link_filter(text, base_url: base_url, username_pattern: username_pattern)
return text if html == text
html = mention_link_filter(content, base_url: base_url, username_pattern: username_pattern)
return if html == content

html
text.replace(html, as: :html)
end

# The URL to provide when someone @mentions a "mention" name, such
Expand Down
10 changes: 6 additions & 4 deletions lib/html_pipeline/node_filter/syntax_highlight_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@ def handle_element(element)
element["class"] = "#{scope} #{scope}-#{@lang}" if include_lang?
end

def handle_text(text)
return text if @lang.nil?
return text if (lexer = lexer_for(@lang)).nil?
def handle_text_chunk(text)
return if @lang.nil?
return if (lexer = lexer_for(@lang)).nil?

highlight_with_timeout_handling(text, lexer)
content = text.to_s

text.replace(highlight_with_timeout_handling(content, lexer), as: :html)
end

def highlight_with_timeout_handling(text, lexer)
Expand Down
8 changes: 3 additions & 5 deletions lib/html_pipeline/node_filter/table_of_contents_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,13 @@ def handle_element(element)
element["id"] = header_id
element["class"] = classes

element.set_inner_content(anchor_html, :as_html)
element.set_inner_content(anchor_html, as: :html)

result[:toc] << { href: header_href }
end

def handle_text(text)
result[:toc].last[:text] = text

text
def handle_text_chunk(text)
result[:toc].last[:text] = text.to_s
end
end
end
Expand Down
7 changes: 4 additions & 3 deletions lib/html_pipeline/node_filter/team_mention_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,11 @@ def selector
SELECTOR
end

def handle_text(text)
return text unless text.include?("@")
def handle_text_chunk(text)
content = text.to_s
return unless content.include?("@")

mention_link_filter(text, base_url: base_url, team_pattern: team_pattern)
text.replace(mention_link_filter(content, base_url: base_url, team_pattern: team_pattern), as: :html)
end

def team_pattern
Expand Down
66 changes: 24 additions & 42 deletions lib/html_pipeline/sanitization_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,37 +13,35 @@ class HTMLPipeline
#
# This filter does not write additional information to the context.
class SanitizationFilter
LISTS = Set.new(["ul", "ol"].freeze)
LIST_ITEM = "li"

# List of table child elements. These must be contained by a <table> element
# or they are not allowed through. Otherwise they can be used to break out
# of places we're using tables to contain formatted user content (like pull
# request review comments).
TABLE_ITEMS = Set.new(["tr", "td", "th"].freeze)
TABLE = "table"
TABLE_SECTIONS = Set.new(["thead", "tbody", "tfoot"].freeze)

# These schemes are the only ones allowed in <a href> attributes by default.
PROTOCOLS = ["http", "https", "mailto", "xmpp", :relative, "irc", "ircs"].freeze
VALID_PROTOCOLS = Selma::Sanitizer::Config::VALID_PROTOCOLS.dup

# The main sanitization allowlist. Only these elements and attributes are
# allowed through by default.
DEFAULT_CONFIG = {
elements: ["h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt", "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th", "s", "strike", "summary", "details", "caption", "figure", "figcaption", "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "wbr"].freeze,
remove_contents: ["script"].freeze,
elements: ["h1", "h2", "h3", "h4", "h5", "h6", "br", "b", "i", "strong", "em", "a", "pre", "code",
"img", "tt", "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot",
"blockquote", "dl", "dt", "dd", "kbd", "q", "samp", "var", "hr", "ruby", "rt", "rp", "li", "tr", "td", "th",
"s", "strike", "summary", "details", "caption", "figure", "figcaption", "abbr", "bdo", "cite",
"dfn", "mark", "small", "span", "time", "wbr",],

attributes: {
"a" => ["href"].freeze,
"img" => ["src", "longdesc"].freeze,
"div" => ["itemscope", "itemtype"].freeze,
"blockquote" => ["cite"].freeze,
"del" => ["cite"].freeze,
"ins" => ["cite"].freeze,
"q" => ["cite"].freeze,
all: ["abbr", "accept", "accept-charset", "accesskey", "action", "align", "alt", "aria-describedby", "aria-hidden", "aria-label", "aria-labelledby", "axis", "border", "cellpadding", "cellspacing", "char", "charoff", "charset", "checked", "clear", "cols", "colspan", "color", "compact", "coords", "datetime", "dir", "disabled", "enctype", "for", "frame", "headers", "height", "hreflang", "hspace", "ismap", "label", "lang", "maxlength", "media", "method", "multiple", "name", "nohref", "noshade", "nowrap", "open", "progress", "prompt", "readonly", "rel", "rev", "role", "rows", "rowspan", "rules", "scope", "selected", "shape", "size", "span", "start", "summary", "tabindex", "target", "title", "type", "usemap", "valign", "value", "vspace", "width", "itemprop"].freeze,
}.freeze,
"a" => ["href"],
"img" => ["src", "longdesc"],
"div" => ["itemscope", "itemtype"],
"blockquote" => ["cite"],
"del" => ["cite"],
"ins" => ["cite"],
"q" => ["cite"],
all: ["abbr", "accept", "accept-charset", "accesskey", "action", "align", "alt", "aria-describedby",
"aria-hidden", "aria-label", "aria-labelledby", "axis", "border", "cellpadding", "cellspacing", "char",
"charoff", "charset", "checked", "clear", "cols", "colspan", "color", "compact", "coords", "datetime", "dir",
"disabled", "enctype", "for", "frame", "headers", "height", "hreflang", "hspace", "id", "ismap", "label", "lang",
"maxlength", "media", "method", "multiple", "name", "nohref", "noshade", "nowrap", "open", "progress",
"prompt", "readonly", "rel", "rev", "role", "rows", "rowspan", "rules", "scope", "selected", "shape",
"size", "span", "start", "summary", "tabindex", "title", "type", "usemap", "valign", "value", "width", "itemprop",],
},
protocols: {
"a" => { "href" => PROTOCOLS }.freeze,
"a" => { "href" => Selma::Sanitizer::Config::VALID_PROTOCOLS }.freeze,
"blockquote" => { "cite" => ["http", "https", :relative].freeze },
"del" => { "cite" => ["http", "https", :relative].freeze },
"ins" => { "cite" => ["http", "https", :relative].freeze },
Expand All @@ -53,23 +51,7 @@ class SanitizationFilter
"longdesc" => ["http", "https", :relative].freeze,
}.freeze,
},
transformers: [
# Top-level <li> elements are removed because they can break out of
# containing markup.
lambda { |env|
name = env[:node_name]
node = env[:node]
node.replace(node.children) if name == LIST_ITEM && node.ancestors.none? { |n| LISTS.include?(n.name) }
},

# Table child elements that are not contained by a <table> are removed.
lambda { |env|
name = env[:node_name]
node = env[:node]
node.replace(node.children) if (TABLE_SECTIONS.include?(name) || TABLE_ITEMS.include?(name)) && node.ancestors.none? { |n| n.name == TABLE }
},
].freeze,
}.freeze
}

class << self
def call(html, config)
Expand Down
4 changes: 2 additions & 2 deletions test/sanitization_filter_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,11 @@ def test_uses_anchor_schemes_from_allowlist_when_not_separately_specified
end

def test_allowlist_contains_default_anchor_schemes
assert_equal(["http", "https", "mailto", "xmpp", :relative, "irc", "ircs"], SanitizationFilter::DEFAULT_CONFIG[:protocols]["a"]["href"])
assert_equal(["http", "https", "mailto", :relative], SanitizationFilter::DEFAULT_CONFIG[:protocols]["a"]["href"])
end

def test_exports_default_anchor_schemes
assert_equal(["http", "https", "mailto", "xmpp", :relative, "irc", "ircs"], SanitizationFilter::PROTOCOLS)
assert_equal(["http", "https", "mailto", :relative], SanitizationFilter::VALID_PROTOCOLS)
end

def test_script_contents_are_removed
Expand Down