-
Notifications
You must be signed in to change notification settings - Fork 2.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: HTML API: Add set_inner_html()
to HTML Processor
#7326
base: trunk
Are you sure you want to change the base?
Changes from all commits
d456844
7e97aed
945c817
93b48e7
0bab23f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5020,6 +5020,158 @@ public function get_comment_type(): ?string { | |
return $this->is_virtual() ? null : parent::get_comment_type(); | ||
} | ||
|
||
/** | ||
* Normalize an HTML string by serializing it. | ||
* | ||
* This removes any partial syntax at the end of the string. | ||
* | ||
* @since 6.7.0 | ||
* | ||
* @param string $html Input HTML to normalize. | ||
* | ||
* @return string|null Normalized output, or `null` if unable to normalize. | ||
*/ | ||
public static function normalize( string $html ): ?string { | ||
return static::create_fragment( $html )->serialize(); | ||
} | ||
|
||
/** | ||
* Generate normalized markup for the HTML in the provided processor. | ||
* | ||
* This removes any partial syntax at the end of the string. | ||
* | ||
* @since 6.7.0 | ||
* | ||
* @return string|null Normalized HTML markup represented by processor, | ||
* or `null` if unable to generate serialization. | ||
*/ | ||
public function serialize(): ?string { | ||
if ( WP_HTML_Tag_Processor::STATE_READY !== $this->parser_state ) { | ||
return null; | ||
} | ||
|
||
$html = ''; | ||
while ( $this->next_token() ) { | ||
$token_type = $this->get_token_type(); | ||
|
||
switch ( $token_type ) { | ||
case '#text': | ||
$html .= htmlspecialchars( $this->get_modifiable_text(), ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' ); | ||
break; | ||
|
||
case '#funky-comment': | ||
case '#comment': | ||
$html .= "<!--{$this->get_modifiable_text()}-->"; | ||
break; | ||
|
||
case '#cdata-section': | ||
$html .= "<![CDATA[{$this->get_modifiable_text()}]]>"; | ||
break; | ||
|
||
case 'html': | ||
$html .= '<!DOCTYPE html>'; | ||
break; | ||
} | ||
|
||
if ( '#tag' !== $token_type ) { | ||
continue; | ||
} | ||
|
||
if ( $this->is_tag_closer() ) { | ||
$html .= "</{$this->get_qualified_tag_name()}>"; | ||
continue; | ||
} | ||
|
||
$attribute_names = $this->get_attribute_names_with_prefix( '' ); | ||
if ( ! isset( $attribute_names ) ) { | ||
$html .= "<{$this->get_qualified_tag_name()}>"; | ||
continue; | ||
} | ||
|
||
$html .= "<{$this->get_qualified_tag_name()}"; | ||
foreach ( $attribute_names as $attribute_name ) { | ||
$html .= " {$this->get_qualified_attribute_name( $attribute_name )}"; | ||
$value = $this->get_attribute( $attribute_name ); | ||
|
||
if ( is_string( $value ) ) { | ||
$html .= '="' . htmlspecialchars( $value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ) . '"'; | ||
} | ||
} | ||
|
||
if ( 'html' !== $this->get_namespace() && $this->has_self_closing_flag() ) { | ||
$html .= '/'; | ||
} | ||
|
||
$html .= '>'; | ||
} | ||
|
||
if ( null !== $this->get_last_error() ) { | ||
return null; | ||
} | ||
|
||
return $html; | ||
} | ||
|
||
/** | ||
* Replaces the inner markup of the currently-matched tag with provided HTML. | ||
* | ||
* This function will normalize the given input and enforce the boundaries | ||
* within the existing HTML where it's called. | ||
* | ||
* @since 6.8.0 | ||
* | ||
* @param string $new_inner_html New HTML to inject as inner HTML for the currently-matched tag. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any plans to support in the future the HTML fragment as the param in addition to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It wasn't in my plans, mostly because I'm On the other hand, when working earlier with templating I had a desire to wrap HTML content in some class indicating that it was safe. Something like that would allow deferred parsing and might be a worthwhile investigation here. On the other other hand I've also been a bit doubtful about the likelihood of people using that. Maybe it would eventually be good to have both as you suggest, where a string would be re-parsed, but a wrapped fragment could in theory be deferred. In a wrapped model, to achieve performance wins, I think we have to be careful to defer until the last moment when rendering the outermost processor, which may not be easy and may require stacking complicated updates. For example, this code: $processor = WP_HTML_Processor::create_fragment( '<li>' );
$processor->next_tag( 'li' );
$processor->set_inner_html( WP_HTML_Processor::create_fragment( 'Apples and Oranges' ) );
$outer_processor = WP_HTML_Processor::create_fragment( '<ul>' );
$outer_processor->next_tag( 'ul' );
$outer_processor->set_inner_html( $processor ); This is a challenge because the original processor isn't going to know that it shouldn't apply its update and advance. This also demonstrates some of the questions arising from non-local access of a processor: when passing it into another function it might scan to the end and "finish." What if calling code expects to continue using it after passing it in? For the time being I see the eager string parsing as the more approachable option with computational weight as the tradeoff. This would leave room in the future for an optimization. Other potential optimizations might also arise, such as creating subtrees of the full tree for edits, but these do get complicated quickly. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like the $fragment = $this->spawn_fragment_parser( $new_inner_html );
$new_markup = $fragment->serialize(); In the scenario when using only string as param, the same example would look like this: $processor = WP_HTML_Processor::create_fragment( '<li>' );
$processor->next_tag( 'li' );
$processor->set_inner_html( (string) WP_HTML_Processor::create_fragment( 'Apples and Oranges' ) );
$outer_processor = WP_HTML_Processor::create_fragment( '<ul>' );
$outer_processor->next_tag( 'ul' );
$outer_processor->set_inner_html( (string) $processor ); So there are two steps:
Aside. It made me realize one important aspect for the example provided with the outlined implementation. By eagerly processing HTML when inserting inner HTML, it will happen in a bottom up fashion. In effect, the processor will have to serialize the 'Apples and Oranges' fragment multiple times as the fragments build on top of each other. At every stage, it will know more about the entire document so in certain scenarios it might even impact the produced HTML. Just to illustrate it better:
So this isn't something I initially thought about, but after thinking a bit more about it, it's another part where things are getting interesting.
I didn't think about that earlier. Yes, that is indeed interesting. When passing as a string, you have to call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've sketched an implementation for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See #7144 some prior work on different fragment contexts. |
||
* @return bool Whether the inner markup was modified for the currently-matched tag, or `NULL` | ||
* if called on a node which doesn't allow changing the inner HTML. | ||
*/ | ||
public function set_inner_html( string $new_inner_html ): ?bool { | ||
$tag_name = $this->get_tag(); | ||
|
||
if ( | ||
WP_HTML_Tag_Processor::STATE_MATCHED_TAG !== $this->parser_state || | ||
$this->is_tag_closer() || | ||
( 'html' === $this->get_namespace() && | ||
( | ||
self::is_void( $tag_name ) || | ||
in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) | ||
) | ||
) | ||
) { | ||
// @todo Support setting inner HTML for SCRIPT, STYLE, TEXTAREA, and TITLE. | ||
return null; | ||
} | ||
|
||
$fragment = $this->spawn_fragment_parser( $new_inner_html ); | ||
$new_markup = $fragment->serialize(); | ||
|
||
$this->set_bookmark( 'start' ); | ||
$depth = $this->get_current_depth(); | ||
while ( $this->get_current_depth() >= $depth && $this->next_token() ) { | ||
continue; | ||
} | ||
|
||
if ( | ||
$this->paused_at_incomplete_token() || | ||
null !== $this->get_last_error() | ||
) { | ||
return false; | ||
} | ||
|
||
$this->set_bookmark( 'end' ); | ||
$start = $this->bookmarks['_start']; | ||
$end = $this->bookmarks['_end']; | ||
|
||
$this->lexical_updates[] = new WP_HTML_Text_Replacement( | ||
$start->start + $start->length, | ||
$end->start - ( $start->start + $start->length ), | ||
$new_markup | ||
); | ||
|
||
$this->get_updated_html(); | ||
$this->seek( 'start' ); | ||
return true; | ||
} | ||
|
||
/** | ||
* Removes a bookmark that is no longer needed. | ||
* | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Implemented in #7331.