Skip to content

Commit

Permalink
HTML API: Add missing tags in IN BODY insertion mode to HTML Processor.
Browse files Browse the repository at this point in the history
As part of work to add more spec support to the HTML API, this patch adds
support for the remaining missing tags in the IN BODY insertion mode. Not
all of the added tags are supported, because in some cases they reset the
insertion mode and are reprocessed where they will be rejected.

This patch also improves the support of `get_modifiable_text()`, removing
a leading newline inside a LISTING, PRE, or TEXTAREA element.

Developed in #6972
Discussed in https://core.trac.wordpress.org/ticket/61576

Props dmsnell, jonsurrell, westonruter.
See #61576.


git-svn-id: https://develop.svn.wordpress.org/trunk@58779 602fd350-edb4-49c9-b593-d223f7449a82
  • Loading branch information
dmsnell committed Jul 22, 2024
1 parent 485fdca commit db30ce9
Show file tree
Hide file tree
Showing 14 changed files with 1,214 additions and 530 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,22 @@ public function current_node() {
return $current_node ? $current_node : null;
}

/**
* Inserts a "marker" at the end of the list of active formatting elements.
*
* > The markers are inserted when entering applet, object, marquee,
* > template, td, th, and caption elements, and are used to prevent
* > formatting from "leaking" into applet, object, marquee, template,
* > td, th, and caption elements.
*
* @see https://html.spec.whatwg.org/#concept-parser-marker
*
* @since 6.7.0
*/
public function insert_marker(): void {
$this->push( new WP_HTML_Token( null, 'marker', false ) );
}

/**
* Pushes a node onto the stack of active formatting elements.
*
Expand Down Expand Up @@ -184,4 +200,30 @@ public function walk_up() {
yield $this->stack[ $i ];
}
}

/**
* Clears the list of active formatting elements up to the last marker.
*
* > When the steps below require the UA to clear the list of active formatting elements up to
* > the last marker, the UA must perform the following steps:
* >
* > 1. Let entry be the last (most recently added) entry in the list of active
* > formatting elements.
* > 2. Remove entry from the list of active formatting elements.
* > 3. If entry was a marker, then stop the algorithm at this point.
* > The list has been cleared up to the last marker.
* > 4. Go to step 1.
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker
*
* @since 6.7.0
*/
public function clear_up_to_last_marker(): void {
foreach ( $this->walk_up() as $item ) {
array_pop( $this->stack );
if ( 'marker' === $item->node_name ) {
break;
}
}
}
}
181 changes: 157 additions & 24 deletions src/wp-includes/html-api/class-wp-html-open-elements.php
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,49 @@ public function set_push_handler( Closure $handler ): void {
$this->push_handler = $handler;
}

/**
* Returns the name of the node at the nth position on the stack
* of open elements, or `null` if no such position exists.
*
* Note that this uses a 1-based index, which represents the
* "nth item" on the stack, counting from the top, where the
* top-most element is the 1st, the second is the 2nd, etc...
*
* @since 6.7.0
*
* @param int $nth Retrieve the nth item on the stack, with 1 being
* the top element, 2 being the second, etc...
* @return string|null Name of the node on the stack at the given location,
* or `null` if the location isn't on the stack.
*/
public function at( int $nth ): ?string {
foreach ( $this->walk_down() as $item ) {
if ( 0 === --$nth ) {
return $item->node_name;
}
}

return null;
}

/**
* Reports if a node of a given name is in the stack of open elements.
*
* @since 6.7.0
*
* @param string $node_name Name of node for which to check.
* @return bool Whether a node of the given name is in the stack of open elements.
*/
public function contains( string $node_name ): bool {
foreach ( $this->walk_up() as $item ) {
if ( $node_name === $item->node_name ) {
return true;
}
}

return false;
}

/**
* Reports if a specific node is in the stack of open elements.
*
Expand All @@ -111,7 +154,7 @@ public function set_push_handler( Closure $handler ): void {
*/
public function contains_node( WP_HTML_Token $token ): bool {
foreach ( $this->walk_up() as $item ) {
if ( $token->bookmark_name === $item->bookmark_name ) {
if ( $token === $item ) {
return true;
}
}
Expand Down Expand Up @@ -210,11 +253,6 @@ public function has_element_in_specific_scope( string $tag_name, $termination_li
return true;
}

switch ( $node->node_name ) {
case 'HTML':
return false;
}

if ( in_array( $node->node_name, $termination_list, true ) ) {
return false;
}
Expand All @@ -226,7 +264,31 @@ public function has_element_in_specific_scope( string $tag_name, $termination_li
/**
* Returns whether a particular element is in scope.
*
* > The stack of open elements is said to have a particular element in
* > scope when it has that element in the specific scope consisting of
* > the following element types:
* >
* > - applet
* > - caption
* > - html
* > - table
* > - td
* > - th
* > - marquee
* > - object
* > - template
* > - MathML mi
* > - MathML mo
* > - MathML mn
* > - MathML ms
* > - MathML mtext
* > - MathML annotation-xml
* > - SVG foreignObject
* > - SVG desc
* > - SVG title
*
* @since 6.4.0
* @since 6.7.0 Supports all required HTML elements.
*
* @see https://html.spec.whatwg.org/#has-an-element-in-scope
*
Expand All @@ -237,23 +299,34 @@ public function has_element_in_scope( string $tag_name ): bool {
return $this->has_element_in_specific_scope(
$tag_name,
array(

/*
* Because it's not currently possible to encounter
* one of the termination elements, they don't need
* to be listed here. If they were, they would be
* unreachable and only waste CPU cycles while
* scanning through HTML.
*/
'APPLET',
'CAPTION',
'HTML',
'TABLE',
'TD',
'TH',
'MARQUEE',
'OBJECT',
'TEMPLATE',
// @todo: Support SVG and MathML nodes when support for foreign content is added.
)
);
}

/**
* Returns whether a particular element is in list item scope.
*
* > The stack of open elements is said to have a particular element
* > in list item scope when it has that element in the specific scope
* > consisting of the following element types:
* >
* > - All the element types listed above for the has an element in scope algorithm.
* > - ol in the HTML namespace
* > - ul in the HTML namespace
*
* @since 6.4.0
* @since 6.5.0 Implemented: no longer throws on every invocation.
* @since 6.7.0 Supports all required HTML elements.
*
* @see https://html.spec.whatwg.org/#has-an-element-in-list-item-scope
*
Expand All @@ -264,43 +337,88 @@ public function has_element_in_list_item_scope( string $tag_name ): bool {
return $this->has_element_in_specific_scope(
$tag_name,
array(
// There are more elements that belong here which aren't currently supported.
'APPLET',
'BUTTON',
'CAPTION',
'HTML',
'TABLE',
'TD',
'TH',
'MARQUEE',
'OBJECT',
'OL',
'TEMPLATE',
'UL',
// @todo: Support SVG and MathML nodes when support for foreign content is added.
)
);
}

/**
* Returns whether a particular element is in button scope.
*
* > The stack of open elements is said to have a particular element
* > in button scope when it has that element in the specific scope
* > consisting of the following element types:
* >
* > - All the element types listed above for the has an element in scope algorithm.
* > - button in the HTML namespace
*
* @since 6.4.0
* @since 6.7.0 Supports all required HTML elements.
*
* @see https://html.spec.whatwg.org/#has-an-element-in-button-scope
*
* @param string $tag_name Name of tag to check.
* @return bool Whether given element is in scope.
*/
public function has_element_in_button_scope( string $tag_name ): bool {
return $this->has_element_in_specific_scope( $tag_name, array( 'BUTTON' ) );
return $this->has_element_in_specific_scope(
$tag_name,
array(
'APPLET',
'BUTTON',
'CAPTION',
'HTML',
'TABLE',
'TD',
'TH',
'MARQUEE',
'OBJECT',
'TEMPLATE',
// @todo: Support SVG and MathML nodes when support for foreign content is added.
)
);
}

/**
* Returns whether a particular element is in table scope.
*
* > The stack of open elements is said to have a particular element
* > in table scope when it has that element in the specific scope
* > consisting of the following element types:
* >
* > - html in the HTML namespace
* > - table in the HTML namespace
* > - template in the HTML namespace
*
* @since 6.4.0
* @since 6.7.0 Full implementation.
*
* @see https://html.spec.whatwg.org/#has-an-element-in-table-scope
*
* @throws WP_HTML_Unsupported_Exception Always until this function is implemented.
*
* @param string $tag_name Name of tag to check.
* @return bool Whether given element is in scope.
*/
public function has_element_in_table_scope( string $tag_name ): bool {
throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on table scope.' );

return false; // The linter requires this unreachable code until the function is implemented and can return.
return $this->has_element_in_specific_scope(
$tag_name,
array(
'HTML',
'TABLE',
'TEMPLATE',
)
);
}

/**
Expand Down Expand Up @@ -540,7 +658,16 @@ public function after_element_push( WP_HTML_Token $item ): void {
* cases where the precalculated value needs to change.
*/
switch ( $item->node_name ) {
case 'APPLET':
case 'BUTTON':
case 'CAPTION':
case 'HTML':
case 'TABLE':
case 'TD':
case 'TH':
case 'MARQUEE':
case 'OBJECT':
case 'TEMPLATE':
$this->has_p_in_button_scope = false;
break;

Expand Down Expand Up @@ -573,11 +700,17 @@ public function after_element_pop( WP_HTML_Token $item ): void {
* cases where the precalculated value needs to change.
*/
switch ( $item->node_name ) {
case 'APPLET':
case 'BUTTON':
$this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' );
break;

case 'CAPTION':
case 'HTML':
case 'P':
case 'TABLE':
case 'TD':
case 'TH':
case 'MARQUEE':
case 'OBJECT':
case 'TEMPLATE':
$this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' );
break;
}
Expand Down
Loading

0 comments on commit db30ce9

Please sign in to comment.