diff --git a/wp-includes/html-api/class-wp-html-active-formatting-elements.php b/wp-includes/html-api/class-wp-html-active-formatting-elements.php
index 69e34dca498..2f51482eee0 100644
--- a/wp-includes/html-api/class-wp-html-active-formatting-elements.php
+++ b/wp-includes/html-api/class-wp-html-active-formatting-elements.php
@@ -86,6 +86,22 @@ public function current_node() {
return $current_node ? $current_node : null;
}
+ /**
+ * Inserts a "marker" at the end of the list of active formatting elements.
+ *
+ * > The markers are inserted when entering applet, object, marquee,
+ * > template, td, th, and caption elements, and are used to prevent
+ * > formatting from "leaking" into applet, object, marquee, template,
+ * > td, th, and caption elements.
+ *
+ * @see https://html.spec.whatwg.org/#concept-parser-marker
+ *
+ * @since 6.7.0
+ */
+ public function insert_marker(): void {
+ $this->push( new WP_HTML_Token( null, 'marker', false ) );
+ }
+
/**
* Pushes a node onto the stack of active formatting elements.
*
@@ -184,4 +200,30 @@ public function walk_up() {
yield $this->stack[ $i ];
}
}
+
+ /**
+ * Clears the list of active formatting elements up to the last marker.
+ *
+ * > When the steps below require the UA to clear the list of active formatting elements up to
+ * > the last marker, the UA must perform the following steps:
+ * >
+ * > 1. Let entry be the last (most recently added) entry in the list of active
+ * > formatting elements.
+ * > 2. Remove entry from the list of active formatting elements.
+ * > 3. If entry was a marker, then stop the algorithm at this point.
+ * > The list has been cleared up to the last marker.
+ * > 4. Go to step 1.
+ *
+ * @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker
+ *
+ * @since 6.7.0
+ */
+ public function clear_up_to_last_marker(): void {
+ foreach ( $this->walk_up() as $item ) {
+ array_pop( $this->stack );
+ if ( 'marker' === $item->node_name ) {
+ break;
+ }
+ }
+ }
}
diff --git a/wp-includes/html-api/class-wp-html-open-elements.php b/wp-includes/html-api/class-wp-html-open-elements.php
index 065bbd25c98..d59bd321405 100644
--- a/wp-includes/html-api/class-wp-html-open-elements.php
+++ b/wp-includes/html-api/class-wp-html-open-elements.php
@@ -101,6 +101,49 @@ public function set_push_handler( Closure $handler ): void {
$this->push_handler = $handler;
}
+ /**
+ * Returns the name of the node at the nth position on the stack
+ * of open elements, or `null` if no such position exists.
+ *
+ * Note that this uses a 1-based index, which represents the
+ * "nth item" on the stack, counting from the top, where the
+ * top-most element is the 1st, the second is the 2nd, etc...
+ *
+ * @since 6.7.0
+ *
+ * @param int $nth Retrieve the nth item on the stack, with 1 being
+ * the top element, 2 being the second, etc...
+ * @return string|null Name of the node on the stack at the given location,
+ * or `null` if the location isn't on the stack.
+ */
+ public function at( int $nth ): ?string {
+ foreach ( $this->walk_down() as $item ) {
+ if ( 0 === --$nth ) {
+ return $item->node_name;
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Reports if a node of a given name is in the stack of open elements.
+ *
+ * @since 6.7.0
+ *
+ * @param string $node_name Name of node for which to check.
+ * @return bool Whether a node of the given name is in the stack of open elements.
+ */
+ public function contains( string $node_name ): bool {
+ foreach ( $this->walk_up() as $item ) {
+ if ( $node_name === $item->node_name ) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
/**
* Reports if a specific node is in the stack of open elements.
*
@@ -111,7 +154,7 @@ public function set_push_handler( Closure $handler ): void {
*/
public function contains_node( WP_HTML_Token $token ): bool {
foreach ( $this->walk_up() as $item ) {
- if ( $token->bookmark_name === $item->bookmark_name ) {
+ if ( $token === $item ) {
return true;
}
}
@@ -210,11 +253,6 @@ public function has_element_in_specific_scope( string $tag_name, $termination_li
return true;
}
- switch ( $node->node_name ) {
- case 'HTML':
- return false;
- }
-
if ( in_array( $node->node_name, $termination_list, true ) ) {
return false;
}
@@ -226,7 +264,31 @@ public function has_element_in_specific_scope( string $tag_name, $termination_li
/**
* Returns whether a particular element is in scope.
*
+ * > The stack of open elements is said to have a particular element in
+ * > scope when it has that element in the specific scope consisting of
+ * > the following element types:
+ * >
+ * > - applet
+ * > - caption
+ * > - html
+ * > - table
+ * > - td
+ * > - th
+ * > - marquee
+ * > - object
+ * > - template
+ * > - MathML mi
+ * > - MathML mo
+ * > - MathML mn
+ * > - MathML ms
+ * > - MathML mtext
+ * > - MathML annotation-xml
+ * > - SVG foreignObject
+ * > - SVG desc
+ * > - SVG title
+ *
* @since 6.4.0
+ * @since 6.7.0 Supports all required HTML elements.
*
* @see https://html.spec.whatwg.org/#has-an-element-in-scope
*
@@ -237,14 +299,16 @@ public function has_element_in_scope( string $tag_name ): bool {
return $this->has_element_in_specific_scope(
$tag_name,
array(
-
- /*
- * Because it's not currently possible to encounter
- * one of the termination elements, they don't need
- * to be listed here. If they were, they would be
- * unreachable and only waste CPU cycles while
- * scanning through HTML.
- */
+ 'APPLET',
+ 'CAPTION',
+ 'HTML',
+ 'TABLE',
+ 'TD',
+ 'TH',
+ 'MARQUEE',
+ 'OBJECT',
+ 'TEMPLATE',
+ // @todo: Support SVG and MathML nodes when support for foreign content is added.
)
);
}
@@ -252,8 +316,17 @@ public function has_element_in_scope( string $tag_name ): bool {
/**
* Returns whether a particular element is in list item scope.
*
+ * > The stack of open elements is said to have a particular element
+ * > in list item scope when it has that element in the specific scope
+ * > consisting of the following element types:
+ * >
+ * > - All the element types listed above for the has an element in scope algorithm.
+ * > - ol in the HTML namespace
+ * > - ul in the HTML namespace
+ *
* @since 6.4.0
* @since 6.5.0 Implemented: no longer throws on every invocation.
+ * @since 6.7.0 Supports all required HTML elements.
*
* @see https://html.spec.whatwg.org/#has-an-element-in-list-item-scope
*
@@ -264,9 +337,19 @@ public function has_element_in_list_item_scope( string $tag_name ): bool {
return $this->has_element_in_specific_scope(
$tag_name,
array(
- // There are more elements that belong here which aren't currently supported.
+ 'APPLET',
+ 'BUTTON',
+ 'CAPTION',
+ 'HTML',
+ 'TABLE',
+ 'TD',
+ 'TH',
+ 'MARQUEE',
+ 'OBJECT',
'OL',
+ 'TEMPLATE',
'UL',
+ // @todo: Support SVG and MathML nodes when support for foreign content is added.
)
);
}
@@ -274,7 +357,15 @@ public function has_element_in_list_item_scope( string $tag_name ): bool {
/**
* Returns whether a particular element is in button scope.
*
+ * > The stack of open elements is said to have a particular element
+ * > in button scope when it has that element in the specific scope
+ * > consisting of the following element types:
+ * >
+ * > - All the element types listed above for the has an element in scope algorithm.
+ * > - button in the HTML namespace
+ *
* @since 6.4.0
+ * @since 6.7.0 Supports all required HTML elements.
*
* @see https://html.spec.whatwg.org/#has-an-element-in-button-scope
*
@@ -282,25 +373,52 @@ public function has_element_in_list_item_scope( string $tag_name ): bool {
* @return bool Whether given element is in scope.
*/
public function has_element_in_button_scope( string $tag_name ): bool {
- return $this->has_element_in_specific_scope( $tag_name, array( 'BUTTON' ) );
+ return $this->has_element_in_specific_scope(
+ $tag_name,
+ array(
+ 'APPLET',
+ 'BUTTON',
+ 'CAPTION',
+ 'HTML',
+ 'TABLE',
+ 'TD',
+ 'TH',
+ 'MARQUEE',
+ 'OBJECT',
+ 'TEMPLATE',
+ // @todo: Support SVG and MathML nodes when support for foreign content is added.
+ )
+ );
}
/**
* Returns whether a particular element is in table scope.
*
+ * > The stack of open elements is said to have a particular element
+ * > in table scope when it has that element in the specific scope
+ * > consisting of the following element types:
+ * >
+ * > - html in the HTML namespace
+ * > - table in the HTML namespace
+ * > - template in the HTML namespace
+ *
* @since 6.4.0
+ * @since 6.7.0 Full implementation.
*
* @see https://html.spec.whatwg.org/#has-an-element-in-table-scope
*
- * @throws WP_HTML_Unsupported_Exception Always until this function is implemented.
- *
* @param string $tag_name Name of tag to check.
* @return bool Whether given element is in scope.
*/
public function has_element_in_table_scope( string $tag_name ): bool {
- throw new WP_HTML_Unsupported_Exception( 'Cannot process elements depending on table scope.' );
-
- return false; // The linter requires this unreachable code until the function is implemented and can return.
+ return $this->has_element_in_specific_scope(
+ $tag_name,
+ array(
+ 'HTML',
+ 'TABLE',
+ 'TEMPLATE',
+ )
+ );
}
/**
@@ -540,7 +658,16 @@ public function after_element_push( WP_HTML_Token $item ): void {
* cases where the precalculated value needs to change.
*/
switch ( $item->node_name ) {
+ case 'APPLET':
case 'BUTTON':
+ case 'CAPTION':
+ case 'HTML':
+ case 'TABLE':
+ case 'TD':
+ case 'TH':
+ case 'MARQUEE':
+ case 'OBJECT':
+ case 'TEMPLATE':
$this->has_p_in_button_scope = false;
break;
@@ -573,11 +700,17 @@ public function after_element_pop( WP_HTML_Token $item ): void {
* cases where the precalculated value needs to change.
*/
switch ( $item->node_name ) {
+ case 'APPLET':
case 'BUTTON':
- $this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' );
- break;
-
+ case 'CAPTION':
+ case 'HTML':
case 'P':
+ case 'TABLE':
+ case 'TD':
+ case 'TH':
+ case 'MARQUEE':
+ case 'OBJECT':
+ case 'TEMPLATE':
$this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' );
break;
}
diff --git a/wp-includes/html-api/class-wp-html-processor-state.php b/wp-includes/html-api/class-wp-html-processor-state.php
index eadfe30d26c..e0469bea020 100644
--- a/wp-includes/html-api/class-wp-html-processor-state.php
+++ b/wp-includes/html-api/class-wp-html-processor-state.php
@@ -311,6 +311,31 @@ class WP_HTML_Processor_State {
*/
const INSERTION_MODE_IN_FOREIGN_CONTENT = 'insertion-mode-in-foreign-content';
+ /**
+ * No-quirks mode document compatability mode.
+ *
+ * > In no-quirks mode, the behavior is (hopefully) the desired behavior
+ * > described by the modern HTML and CSS specifications.
+ *
+ * @since 6.7.0
+ *
+ * @var string
+ */
+ const NO_QUIRKS_MODE = 'no-quirks-mode';
+
+ /**
+ * Quirks mode document compatability mode.
+ *
+ * > In quirks mode, layout emulates behavior in Navigator 4 and Internet
+ * > Explorer 5. This is essential in order to support websites that were
+ * > built before the widespread adoption of web standards.
+ *
+ * @since 6.7.0
+ *
+ * @var string
+ */
+ const QUIRKS_MODE = 'quirks-mode';
+
/**
* The stack of template insertion modes.
*
@@ -368,6 +393,30 @@ class WP_HTML_Processor_State {
*/
public $insertion_mode = self::INSERTION_MODE_INITIAL;
+ /**
+ * Indicates if the document is in quirks mode or no-quirks mode.
+ *
+ * Impact on HTML parsing:
+ *
+ * - In `NO_QUIRKS_MODE` CSS class and ID selectors match in a byte-for-byte
+ * manner, otherwise for backwards compatability, class selectors are to
+ * match in an ASCII case-insensitive manner.
+ *
+ * - When not in `QUIRKS_MODE`, a TABLE start tag implicitly closes an open P tag
+ * if one is in scope and open, otherwise the TABLE becomes a child of the P.
+ *
+ * `QUIRKS_MODE` impacts many styling-related aspects of an HTML document, but
+ * none of the other changes modifies how the HTML is parsed or selected.
+ *
+ * @see self::QUIRKS_MODE
+ * @see self::NO_QUIRKS_MODE
+ *
+ * @since 6.7.0
+ *
+ * @var string
+ */
+ public $document_mode = self::NO_QUIRKS_MODE;
+
/**
* Context node initializing fragment parser, if created as a fragment parser.
*
@@ -390,6 +439,24 @@ class WP_HTML_Processor_State {
*/
public $head_element = null;
+ /**
+ * FORM element pointer.
+ *
+ * > points to the last form element that was opened and whose end tag has
+ * > not yet been seen. It is used to make form controls associate with
+ * > forms in the face of dramatically bad markup, for historical reasons.
+ * > It is ignored inside template elements.
+ *
+ * @todo This may be invalidated by a seek operation.
+ *
+ * @see https://html.spec.whatwg.org/#form-element-pointer
+ *
+ * @since 6.7.0
+ *
+ * @var WP_HTML_Token|null
+ */
+ public $form_element = null;
+
/**
* The frameset-ok flag indicates if a `FRAMESET` element is allowed in the current state.
*
diff --git a/wp-includes/html-api/class-wp-html-processor.php b/wp-includes/html-api/class-wp-html-processor.php
index 72f39d3ad7a..d614112a767 100644
--- a/wp-includes/html-api/class-wp-html-processor.php
+++ b/wp-includes/html-api/class-wp-html-processor.php
@@ -97,22 +97,11 @@
* will abort early and stop all processing. This draconian measure ensures
* that the HTML Processor won't break any HTML it doesn't fully understand.
*
- * The following list specifies the HTML tags that _are_ supported:
+ * The HTML Processor supports all elements other than a specific set:
*
- * - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY.
- * - Custom elements: All custom elements are supported. :)
- * - Form elements: BUTTON, DATALIST, FIELDSET, INPUT, LABEL, LEGEND, METER, OPTGROUP, OPTION, PROGRESS, SEARCH, SELECT.
- * - Formatting elements: B, BIG, CODE, EM, FONT, I, PRE, SMALL, STRIKE, STRONG, TT, U, WBR.
- * - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
- * - Links: A.
- * - Lists: DD, DL, DT, LI, OL, UL.
- * - Media elements: AUDIO, CANVAS, EMBED, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, SOURCE, TRACK, VIDEO.
- * - Paragraph: BR, P.
- * - Phrasing elements: ABBR, AREA, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
- * - Sectioning elements: ARTICLE, ASIDE, HR, NAV, SECTION.
- * - Templating elements: SLOT.
- * - Text decoration: RUBY.
- * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, KEYGEN, LISTING, MULTICOL, NEXTID, PARAM, SPACER.
+ * - Any element inside a TABLE.
+ * - Any element inside foreign content, including SVG and MATH.
+ * - Any element outside the IN BODY insertion mode, e.g. doctype declarations, meta, links.
*
* ### Supported markup
*
@@ -121,15 +110,30 @@
* may in fact belong _before_ the table in the DOM. If the HTML Processor encounters
* such a case it will stop processing.
*
- * The following list specifies HTML markup that _is_ supported:
+ * The following list illustrates some common examples of unexpected HTML inputs that
+ * the HTML Processor properly parses and represents:
*
- * - Markup involving only those tags listed above.
- * - Fully-balanced and non-overlapping tags.
- * - HTML with unexpected tag closers.
- * - Some unbalanced or overlapping tags.
- * - P tags after unclosed P tags.
- * - BUTTON tags after unclosed BUTTON tags.
- * - A tags after unclosed A tags that don't involve any active formatting elements.
+ * - HTML with optional tags omitted, e.g. `
one
two`.
+ * - HTML with unexpected tag closers, e.g. `
one more
`.
+ * - Non-void tags with self-closing flag, e.g. `the DIV is still open.`.
+ * - Heading elements which close open heading elements of another level, e.g. `
Closed by `.
+ * - Elements containing text that looks like other tags but isn't, e.g. `The is plaintext`.
+ * - SCRIPT and STYLE tags containing text that looks like HTML but isn't, e.g. ``.
+ * - SCRIPT content which has been escaped, e.g. `') -->`.
+ *
+ * ### Unsupported Features
+ *
+ * This parser does not report parse errors.
+ *
+ * Normally, when additional HTML or BODY tags are encountered in a document, if there
+ * are any additional attributes on them that aren't found on the previous elements,
+ * the existing HTML and BODY elements adopt those missing attribute values. This
+ * parser does not add those additional attributes.
+ *
+ * In certain situations, elements are moved to a different part of the document in
+ * a process called "adoption" and "fostering." Because the nodes move to a location
+ * in the document that the parser had already processed, this parser does not support
+ * these situations and will bail.
*
* @since 6.4.0
*
@@ -1104,15 +1108,7 @@ private function step_in_body(): bool {
$op = "{$op_sigil}{$token_name}";
switch ( $op ) {
- case '#comment':
- case '#funky-comment':
- case '#presumptuous-tag':
- $this->insert_html_element( $this->state->current_token );
- return true;
-
case '#text':
- $this->reconstruct_active_formatting_elements();
-
$current_token = $this->bookmarks[ $this->state->current_token->bookmark_name ];
/*
@@ -1133,6 +1129,8 @@ private function step_in_body(): bool {
return $this->step();
}
+ $this->reconstruct_active_formatting_elements();
+
/*
* Whitespace-only text does not affect the frameset-ok flag.
* It is probably inter-element whitespace, but it may also
@@ -1146,29 +1144,146 @@ private function step_in_body(): bool {
$this->insert_html_element( $this->state->current_token );
return true;
+ case '#comment':
+ case '#funky-comment':
+ case '#presumptuous-tag':
+ $this->insert_html_element( $this->state->current_token );
+ return true;
+
+ /*
+ * > A DOCTYPE token
+ * > Parse error. Ignore the token.
+ */
case 'html':
+ return $this->step();
+
+ /*
+ * > A start tag whose tag name is "html"
+ */
+ case '+HTML':
+ if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) {
+ /*
+ * > Otherwise, for each attribute on the token, check to see if the attribute
+ * > is already present on the top element of the stack of open elements. If
+ * > it is not, add the attribute and its corresponding value to that element.
+ *
+ * This parser does not currently support this behavior: ignore the token.
+ */
+ }
+
+ // Ignore the token.
+ return $this->step();
+
+ /*
+ * > A start tag whose tag name is one of: "base", "basefont", "bgsound", "link",
+ * > "meta", "noframes", "script", "style", "template", "title"
+ * >
+ * > An end tag whose tag name is "template"
+ */
+ case '+BASE':
+ case '+BASEFONT':
+ case '+BGSOUND':
+ case '+LINK':
+ case '+META':
+ case '+NOFRAMES':
+ case '+SCRIPT':
+ case '+STYLE':
+ case '+TEMPLATE':
+ case '+TITLE':
+ case '-TEMPLATE':
+ return $this->step_in_head();
+
+ /*
+ * > A start tag whose tag name is "body"
+ *
+ * This tag in the IN BODY insertion mode is a parse error.
+ */
+ case '+BODY':
+ if (
+ 1 === $this->state->stack_of_open_elements->count() ||
+ 'BODY' !== $this->state->stack_of_open_elements->at( 2 ) ||
+ $this->state->stack_of_open_elements->contains( 'TEMPLATE' )
+ ) {
+ // Ignore the token.
+ return $this->step();
+ }
+
/*
- * > A DOCTYPE token
- * > Parse error. Ignore the token.
+ * > Otherwise, set the frameset-ok flag to "not ok"; then, for each attribute
+ * > on the token, check to see if the attribute is already present on the body
+ * > element (the second element) on the stack of open elements, and if it is
+ * > not, add the attribute and its corresponding value to that element.
+ *
+ * This parser does not currently support this behavior: ignore the token.
*/
+ $this->state->frameset_ok = false;
return $this->step();
/*
- * > A start tag whose tag name is "button"
+ * > A start tag whose tag name is "frameset"
+ *
+ * This tag in the IN BODY insertion mode is a parse error.
*/
- case '+BUTTON':
- if ( $this->state->stack_of_open_elements->has_element_in_scope( 'BUTTON' ) ) {
- // @todo Indicate a parse error once it's possible. This error does not impact the logic here.
- $this->generate_implied_end_tags();
- $this->state->stack_of_open_elements->pop_until( 'BUTTON' );
+ case '+FRAMESET':
+ if (
+ 1 === $this->state->stack_of_open_elements->count() ||
+ 'BODY' !== $this->state->stack_of_open_elements->at( 2 ) ||
+ false === $this->state->frameset_ok
+ ) {
+ // Ignore the token.
+ return $this->step();
}
- $this->reconstruct_active_formatting_elements();
- $this->insert_html_element( $this->state->current_token );
- $this->state->frameset_ok = false;
+ /*
+ * > Otherwise, run the following steps:
+ */
+ $this->bail( 'Cannot process non-ignored FRAMESET tags.' );
+ break;
+ /*
+ * > An end tag whose tag name is "body"
+ */
+ case '-BODY':
+ if ( ! $this->state->stack_of_open_elements->has_element_in_scope( 'BODY' ) ) {
+ // Parse error: ignore the token.
+ return $this->step();
+ }
+
+ /*
+ * > Otherwise, if there is a node in the stack of open elements that is not either a
+ * > dd element, a dt element, an li element, an optgroup element, an option element,
+ * > a p element, an rb element, an rp element, an rt element, an rtc element, a tbody
+ * > element, a td element, a tfoot element, a th element, a thread element, a tr
+ * > element, the body element, or the html element, then this is a parse error.
+ *
+ * There is nothing to do for this parse error, so don't check for it.
+ */
+
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY;
return true;
+ /*
+ * > An end tag whose tag name is "html"
+ */
+ case '-HTML':
+ if ( ! $this->state->stack_of_open_elements->has_element_in_scope( 'BODY' ) ) {
+ // Parse error: ignore the token.
+ return $this->step();
+ }
+
+ /*
+ * > Otherwise, if there is a node in the stack of open elements that is not either a
+ * > dd element, a dt element, an li element, an optgroup element, an option element,
+ * > a p element, an rb element, an rp element, an rt element, an rtc element, a tbody
+ * > element, a td element, a tfoot element, a th element, a thread element, a tr
+ * > element, the body element, or the html element, then this is a parse error.
+ *
+ * There is nothing to do for this parse error, so don't check for it.
+ */
+
+ $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY;
+ return $this->step( self::REPROCESS_CURRENT_NODE );
+
/*
* > A start tag whose tag name is one of: "address", "article", "aside",
* > "blockquote", "center", "details", "dialog", "dir", "div", "dl",
@@ -1207,52 +1322,6 @@ private function step_in_body(): bool {
$this->insert_html_element( $this->state->current_token );
return true;
- /*
- * > An end tag whose tag name is one of: "address", "article", "aside", "blockquote",
- * > "button", "center", "details", "dialog", "dir", "div", "dl", "fieldset",
- * > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main",
- * > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul"
- */
- case '-ADDRESS':
- case '-ARTICLE':
- case '-ASIDE':
- case '-BLOCKQUOTE':
- case '-BUTTON':
- case '-CENTER':
- case '-DETAILS':
- case '-DIALOG':
- case '-DIR':
- case '-DIV':
- case '-DL':
- case '-FIELDSET':
- case '-FIGCAPTION':
- case '-FIGURE':
- case '-FOOTER':
- case '-HEADER':
- case '-HGROUP':
- case '-LISTING':
- case '-MAIN':
- case '-MENU':
- case '-NAV':
- case '-OL':
- case '-PRE':
- case '-SEARCH':
- case '-SECTION':
- case '-SUMMARY':
- case '-UL':
- if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $token_name ) ) {
- // @todo Report parse error.
- // Ignore the token.
- return $this->step();
- }
-
- $this->generate_implied_end_tags();
- if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) {
- // @todo Record parse error: this error doesn't impact parsing.
- }
- $this->state->stack_of_open_elements->pop_until( $token_name );
- return true;
-
/*
* > A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
*/
@@ -1288,35 +1357,39 @@ private function step_in_body(): bool {
if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
$this->close_a_p_element();
}
+
+ /*
+ * > If the next token is a U+000A LINE FEED (LF) character token,
+ * > then ignore that token and move on to the next one. (Newlines
+ * > at the start of pre blocks are ignored as an authoring convenience.)
+ *
+ * This is handled in `get_modifiable_text()`.
+ */
+
$this->insert_html_element( $this->state->current_token );
$this->state->frameset_ok = false;
return true;
/*
- * > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
+ * > A start tag whose tag name is "form"
*/
- case '-H1':
- case '-H2':
- case '-H3':
- case '-H4':
- case '-H5':
- case '-H6':
- if ( ! $this->state->stack_of_open_elements->has_element_in_scope( '(internal: H1 through H6 - do not use)' ) ) {
- /*
- * This is a parse error; ignore the token.
- *
- * @todo Indicate a parse error once it's possible.
- */
+ case '+FORM':
+ $stack_contains_template = $this->state->stack_of_open_elements->contains( 'TEMPLATE' );
+
+ if ( isset( $this->state->form_element ) && ! $stack_contains_template ) {
+ // Parse error: ignore the token.
return $this->step();
}
- $this->generate_implied_end_tags();
+ if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
+ $this->close_a_p_element();
+ }
- if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) {
- // @todo Record parse error: this error doesn't impact parsing.
+ $this->insert_html_element( $this->state->current_token );
+ if ( ! $stack_contains_template ) {
+ $this->state->form_element = $this->state->current_token;
}
- $this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' );
return true;
/*
@@ -1377,6 +1450,150 @@ private function step_in_body(): bool {
$this->insert_html_element( $this->state->current_token );
return true;
+ case '+PLAINTEXT':
+ if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
+ $this->close_a_p_element();
+ }
+
+ /*
+ * @todo This may need to be handled in the Tag Processor and turn into
+ * a single self-contained tag like TEXTAREA, whose modifiable text
+ * is the rest of the input document as plaintext.
+ */
+ $this->bail( 'Cannot process PLAINTEXT elements.' );
+ break;
+
+ /*
+ * > A start tag whose tag name is "button"
+ */
+ case '+BUTTON':
+ if ( $this->state->stack_of_open_elements->has_element_in_scope( 'BUTTON' ) ) {
+ // @todo Indicate a parse error once it's possible. This error does not impact the logic here.
+ $this->generate_implied_end_tags();
+ $this->state->stack_of_open_elements->pop_until( 'BUTTON' );
+ }
+
+ $this->reconstruct_active_formatting_elements();
+ $this->insert_html_element( $this->state->current_token );
+ $this->state->frameset_ok = false;
+
+ return true;
+
+ /*
+ * > An end tag whose tag name is one of: "address", "article", "aside", "blockquote",
+ * > "button", "center", "details", "dialog", "dir", "div", "dl", "fieldset",
+ * > "figcaption", "figure", "footer", "header", "hgroup", "listing", "main",
+ * > "menu", "nav", "ol", "pre", "search", "section", "summary", "ul"
+ *
+ * @todo This needs to check if the element in scope is an HTML element, meaning that
+ * when SVG and MathML support is added, this needs to differentiate between an
+ * HTML element of the given name, such as `
`, and a foreign element of
+ * the same given name.
+ */
+ case '-ADDRESS':
+ case '-ARTICLE':
+ case '-ASIDE':
+ case '-BLOCKQUOTE':
+ case '-BUTTON':
+ case '-CENTER':
+ case '-DETAILS':
+ case '-DIALOG':
+ case '-DIR':
+ case '-DIV':
+ case '-DL':
+ case '-FIELDSET':
+ case '-FIGCAPTION':
+ case '-FIGURE':
+ case '-FOOTER':
+ case '-HEADER':
+ case '-HGROUP':
+ case '-LISTING':
+ case '-MAIN':
+ case '-MENU':
+ case '-NAV':
+ case '-OL':
+ case '-PRE':
+ case '-SEARCH':
+ case '-SECTION':
+ case '-SUMMARY':
+ case '-UL':
+ if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $token_name ) ) {
+ // @todo Report parse error.
+ // Ignore the token.
+ return $this->step();
+ }
+
+ $this->generate_implied_end_tags();
+ if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) {
+ // @todo Record parse error: this error doesn't impact parsing.
+ }
+ $this->state->stack_of_open_elements->pop_until( $token_name );
+ return true;
+
+ /*
+ * > An end tag whose tag name is "form"
+ */
+ case '-FORM':
+ if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) {
+ $node = $this->state->form_element;
+ $this->state->form_element = null;
+
+ /*
+ * > If node is null or if the stack of open elements does not have node
+ * > in scope, then this is a parse error; return and ignore the token.
+ *
+ * @todo It's necessary to check if the form token itself is in scope, not
+ * simply whether any FORM is in scope.
+ */
+ if (
+ null === $node ||
+ ! $this->state->stack_of_open_elements->has_element_in_scope( 'FORM' )
+ ) {
+ // Parse error: ignore the token.
+ return $this->step();
+ }
+
+ $this->generate_implied_end_tags();
+ if ( $node !== $this->state->stack_of_open_elements->current_node() ) {
+ // @todo Indicate a parse error once it's possible. This error does not impact the logic here.
+ $this->bail( 'Cannot close a FORM when other elements remain open as this would throw off the breadcrumbs for the following tokens.' );
+ }
+
+ $this->state->stack_of_open_elements->remove_node( $node );
+ } else {
+ /*
+ * > If the stack of open elements does not have a form element in scope,
+ * > then this is a parse error; return and ignore the token.
+ *
+ * Note that unlike in the clause above, this is checking for any FORM in scope.
+ */
+ if ( ! $this->state->stack_of_open_elements->has_element_in_scope( 'FORM' ) ) {
+ // Parse error: ignore the token.
+ return $this->step();
+ }
+
+ $this->generate_implied_end_tags();
+
+ if ( ! $this->state->stack_of_open_elements->current_node_is( 'FORM' ) ) {
+ // @todo Indicate a parse error once it's possible. This error does not impact the logic here.
+ }
+
+ $this->state->stack_of_open_elements->pop_until( 'FORM' );
+ return true;
+ }
+ break;
+
+ /*
+ * > An end tag whose tag name is "p"
+ */
+ case '-P':
+ if ( ! $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
+ $this->insert_html_element( $this->state->current_token );
+ }
+
+ $this->close_a_p_element();
+ return true;
+
/*
* > An end tag whose tag name is "li"
* > An end tag whose tag name is one of: "dd", "dt"
@@ -1423,17 +1640,35 @@ private function step_in_body(): bool {
return true;
/*
- * > An end tag whose tag name is "p"
+ * > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
*/
- case '-P':
- if ( ! $this->state->stack_of_open_elements->has_p_in_button_scope() ) {
- $this->insert_html_element( $this->state->current_token );
+ case '-H1':
+ case '-H2':
+ case '-H3':
+ case '-H4':
+ case '-H5':
+ case '-H6':
+ if ( ! $this->state->stack_of_open_elements->has_element_in_scope( '(internal: H1 through H6 - do not use)' ) ) {
+ /*
+ * This is a parse error; ignore the token.
+ *
+ * @todo Indicate a parse error once it's possible.
+ */
+ return $this->step();
}
- $this->close_a_p_element();
+ $this->generate_implied_end_tags();
+
+ if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) {
+ // @todo Record parse error: this error doesn't impact parsing.
+ }
+
+ $this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' );
return true;
- // > A start tag whose tag name is "a"
+ /*
+ * > A start tag whose tag name is "a"
+ */
case '+A':
foreach ( $this->state->active_formatting_elements->walk_up() as $item ) {
switch ( $item->node_name ) {
@@ -1474,6 +1709,22 @@ private function step_in_body(): bool {
$this->state->active_formatting_elements->push( $this->state->current_token );
return true;
+ /*
+ * > A start tag whose tag name is "nobr"
+ */
+ case '+NOBR':
+ $this->reconstruct_active_formatting_elements();
+
+ if ( $this->state->stack_of_open_elements->has_element_in_scope( 'NOBR' ) ) {
+ // Parse error.
+ $this->run_adoption_agency_algorithm();
+ $this->reconstruct_active_formatting_elements();
+ }
+
+ $this->insert_html_element( $this->state->current_token );
+ $this->state->active_formatting_elements->push( $this->state->current_token );
+ return true;
+
/*
* > An end tag whose tag name is one of: "a", "b", "big", "code", "em", "font", "i",
* > "nobr", "s", "small", "strike", "strong", "tt", "u"
@@ -1494,16 +1745,65 @@ private function step_in_body(): bool {
$this->run_adoption_agency_algorithm();
return true;
+ /*
+ * > A start tag whose tag name is one of: "applet", "marquee", "object"
+ */
+ case '+APPLET':
+ case '+MARQUEE':
+ case '+OBJECT':
+ $this->reconstruct_active_formatting_elements();
+ $this->insert_html_element( $this->state->current_token );
+ $this->state->active_formatting_elements->insert_marker();
+ $this->state->frameset_ok = false;
+ return true;
+
+ /*
+ * > A end tag token whose tag name is one of: "applet", "marquee", "object"
+ *
+ * @todo This needs to check if the element in scope is an HTML element, meaning that
+ * when SVG and MathML support is added, this needs to differentiate between an
+ * HTML element of the given name, such as `