Skip to content

Commit

Permalink
Squash merge #7649
Browse files Browse the repository at this point in the history
Squashed commit of the following:

commit 1cde425
Merge: 05ca2a4 8ad5281
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 20:40:05 2024 +0100

    Merge branch 'trunk' into html-api/fix-63390-seek

commit 05ca2a4
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 20:29:36 2024 +0100

    Remove context-node cases from open elements

    The context node is not pushed to the stack of open elements, and
    therefore does not need special handling.

commit 75ab9c2
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 20:27:57 2024 +0100

    Break at root-node when popping elements on seek

    The context node is not pushed to the stack of open elements,
    this code was either doing nothing or incorrect.

commit 90eb6e2
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 13:53:15 2024 +0100

    Use a temporary bookmark to avoid modifying tag processor internal state

commit b95e402
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 13:45:13 2024 +0100

    Restore private stack properties to private

commit d5a7d5c
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 13:40:18 2024 +0100

    Rework with tests for fragment and full processors

    Use a dataprovider to get a factory function for processors

commit f7af6e3
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 13:19:58 2024 +0100

    Add test seeking from SVG namespace

commit 388bf19
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 13:05:34 2024 +0100

    Add and improve HTML processor bookmark tests

commit d5bf14c
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 12:17:47 2024 +0100

    Add and improve comments

commit d181938
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 12:13:48 2024 +0100

    Fix fragment state reset for different context nodes

commit 9a0c017
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 12:05:43 2024 +0100

    Set parsing namespace correctly when seeking

commit 660dc85
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 11:49:21 2024 +0100

    Remove condition that may not be necessary

    This seems related to virutal tokens and it likely covered by the virtual token condition

commit 9af204c
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 11:47:56 2024 +0100

    Assert setting bookmark two

    This is consisten with the assertion on bookmark one

commit 851df38
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 11:47:17 2024 +0100

    Fix fragment parser bug that should reset breadcrumbs before seeking

commit 6106a56
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 11:46:49 2024 +0100

    Use do-while loop to iterate after first check

commit 744cf01
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Wed Nov 6 11:42:42 2024 +0100

    Add commentary around state resetting in seek

commit cb63bbc
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Tue Nov 5 19:51:29 2024 +0100

    Lint: remove empty line

commit 737bf92
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Tue Nov 5 19:49:01 2024 +0100

    Restore original bookmark matching

commit 89aa01b
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Tue Nov 5 19:47:16 2024 +0100

    Fix the issue

commit dc3f1e6
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Tue Nov 5 19:06:23 2024 +0100

    try fixes

commit 541b4f6
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Fri Oct 25 23:19:19 2024 +0200

    wip

commit 5d75bb7
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Fri Oct 25 23:15:51 2024 +0200

    Work on fix

commit f9777aa
Author: Jon Surrell <sirreal@users.noreply.github.com>
Date:   Fri Oct 25 22:51:44 2024 +0200

    Add failing test case
  • Loading branch information
sirreal committed Nov 8, 2024
1 parent 0789538 commit 42f3df4
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 35 deletions.
9 changes: 0 additions & 9 deletions src/wp-includes/html-api/class-wp-html-open-elements.php
Original file line number Diff line number Diff line change
Expand Up @@ -520,11 +520,6 @@ public function pop(): bool {
return false;
}

if ( 'context-node' === $item->bookmark_name ) {
$this->stack[] = $item;
return false;
}

$this->after_element_pop( $item );
return true;
}
Expand Down Expand Up @@ -585,10 +580,6 @@ public function push( WP_HTML_Token $stack_item ): void {
* @return bool Whether the node was found and removed from the stack of open elements.
*/
public function remove_node( WP_HTML_Token $token ): bool {
if ( 'context-node' === $token->bookmark_name ) {
return false;
}

foreach ( $this->walk_up() as $position_from_end => $item ) {
if ( $token->bookmark_name !== $item->bookmark_name ) {
continue;
Expand Down
82 changes: 57 additions & 25 deletions src/wp-includes/html-api/class-wp-html-processor.php
Original file line number Diff line number Diff line change
Expand Up @@ -5409,51 +5409,83 @@ public function seek( $bookmark_name ): bool {
*/
if ( 'backward' === $direction ) {
/*
* Instead of clearing the parser state and starting fresh, calling the stack methods
* maintains the proper flags in the parser.
* When moving backward, stateful stacks should be cleared.
*/
foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) {
if ( 'context-node' === $item->bookmark_name ) {
/*
* Fragment parsers always start with an HTML root node at the top of the stack.
* Do not remove it.
*/
if ( 'root-node' === $item->bookmark_name ) {
break;
}

$this->state->stack_of_open_elements->remove_node( $item );
}

foreach ( $this->state->active_formatting_elements->walk_up() as $item ) {
if ( 'context-node' === $item->bookmark_name ) {
break;
}

$this->state->active_formatting_elements->remove_node( $item );
}

parent::seek( 'context-node' );
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY;
$this->state->frameset_ok = true;
$this->element_queue = array();
$this->current_element = null;
/*
* **After** clearing stacks, more processor state can be reset.
* This must be done after clearing the stack because those stacks generate events that
* would appear on a subsequent call to `next_token()`.
*/
$this->state->frameset_ok = true;
$this->state->stack_of_template_insertion_modes = array();
$this->state->head_element = null;
$this->state->form_element = null;
$this->state->current_token = null;
$this->current_element = null;
$this->element_queue = array();

/*
* The absence of a context node indicates a full parse.
* The presence of a context node indicates a fragment parser.
*/
if ( null === $this->context_node ) {
$this->change_parsing_namespace( 'html' );
$this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_INITIAL;
$this->breadcrumbs = array();

if ( isset( $this->context_node ) ) {
$this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 );
$this->bookmarks['initial'] = new WP_HTML_Span( 0, 0 );
parent::seek( 'initial' );
unset( $this->bookmarks['initial'] );
} else {
$this->breadcrumbs = array();
}
}
$this->change_parsing_namespace(
$this->context_node->integration_node_type
? 'html'
: $this->context_node->namespace
);

if ( 'TEMPLATE' === $this->context_node->node_name ) {
$this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE;
}

// When moving forwards, reparse the document until reaching the same location as the original bookmark.
if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) {
return true;
$this->reset_insertion_mode_appropriately();
$this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 );
parent::seek( $this->context_node->bookmark_name );
}
}

while ( $this->next_token() ) {
/*
* Here, the processor moves forward through the document until it matches the bookmark.
* do-while is used here because the processor is expected to already be stopped on
* a token than may match the bookmarked location.
*/
do {
/*
* The processor will stop on virtual tokens, but bookmarks may not be set on them.
* They should not be matched when seeking a bookmark, skip them.
*/
if ( $this->is_virtual() ) {
continue;
}
if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) {
while ( isset( $this->current_element ) && WP_HTML_Stack_Event::POP === $this->current_element->operation ) {
$this->current_element = array_shift( $this->element_queue );
}
return true;
}
}
} while ( $this->next_token() );

return false;
}
Expand Down
143 changes: 143 additions & 0 deletions tests/phpunit/tests/html-api/wpHtmlProcessor-bookmark.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
<?php
/**
* Unit tests covering WP_HTML_Processor bookmark functionality.
*
* @package WordPress
* @subpackage HTML-API
*/

/**
* @group html-api
*
* @coversDefaultClass WP_HTML_Processor
*/
class Tests_HtmlApi_WpHtmlProcessor_Bookmark extends WP_UnitTestCase {
/**
* @dataProvider data_processor_constructors
*
* @ticket 62290
*/
public function test_processor_seek_same_location( callable $factory ) {
$processor = $factory( '<div><span>' );
$this->assertTrue( $processor->next_tag( 'DIV' ) );
$this->assertTrue( $processor->set_bookmark( 'mark' ), 'Failed to set bookmark.' );
$this->assertTrue( $processor->has_bookmark( 'mark' ), 'Failed has_bookmark check.' );

// Confirm the bookmark works and processing continues normally.
$this->assertTrue( $processor->seek( 'mark' ), 'Failed to seek to bookmark.' );
$this->assertSame( 'DIV', $processor->get_tag() );
$this->assertSame( array( 'HTML', 'BODY', 'DIV' ), $processor->get_breadcrumbs() );
$this->assertTrue( $processor->next_tag() );
$this->assertSame( 'SPAN', $processor->get_tag() );
$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'SPAN' ), $processor->get_breadcrumbs() );
}

/**
* @dataProvider data_processor_constructors
*
* @ticket 62290
*/
public function test_processor_seek_backward( callable $factory ) {
$processor = $factory( '<div><span>' );
$this->assertTrue( $processor->next_tag( 'DIV' ) );
$this->assertTrue( $processor->set_bookmark( 'mark' ), 'Failed to set bookmark.' );
$this->assertTrue( $processor->has_bookmark( 'mark' ), 'Failed has_bookmark check.' );

// Move past the bookmark so it must scan backwards.
$this->assertTrue( $processor->next_tag( 'SPAN' ) );

// Confirm the bookmark works.
$this->assertTrue( $processor->seek( 'mark' ), 'Failed to seek to bookmark.' );
$this->assertSame( 'DIV', $processor->get_tag() );
}

/**
* @dataProvider data_processor_constructors
*
* @ticket 62290
*/
public function test_processor_seek_forward( callable $factory ) {
$processor = $factory( '<div one></div><span two></span><a three>' );
$this->assertTrue( $processor->next_tag( 'DIV' ) );
$this->assertTrue( $processor->set_bookmark( 'one' ), 'Failed to set bookmark "one".' );
$this->assertTrue( $processor->has_bookmark( 'one' ), 'Failed "one" has_bookmark check.' );

// Move past the bookmark so it must scan backwards.
$this->assertTrue( $processor->next_tag( 'SPAN' ) );
$this->assertTrue( $processor->get_attribute( 'two' ) );
$this->assertTrue( $processor->set_bookmark( 'two' ), 'Failed to set bookmark "two".' );
$this->assertTrue( $processor->has_bookmark( 'two' ), 'Failed "two" has_bookmark check.' );

// Seek back.
$this->assertTrue( $processor->seek( 'one' ), 'Failed to seek to bookmark "one".' );
$this->assertSame( 'DIV', $processor->get_tag() );

// Seek forward and continue processing.
$this->assertTrue( $processor->seek( 'two' ), 'Failed to seek to bookmark "two".' );
$this->assertSame( 'SPAN', $processor->get_tag() );
$this->assertTrue( $processor->get_attribute( 'two' ) );

$this->assertTrue( $processor->next_tag() );
$this->assertSame( 'A', $processor->get_tag() );
$this->assertTrue( $processor->get_attribute( 'three' ) );
}

/**
* Ensure the parsing namespace is handled when seeking from foreign content.
*
* @dataProvider data_processor_constructors
*
* @ticket 62290
*/
public function test_seek_back_from_foreign_content( callable $factory ) {
$processor = $factory( '<custom-element /><svg><rect />' );
$this->assertTrue( $processor->next_tag( 'CUSTOM-ELEMENT' ) );
$this->assertTrue( $processor->set_bookmark( 'mark' ), 'Failed to set bookmark "mark".' );
$this->assertTrue( $processor->has_bookmark( 'mark' ), 'Failed "mark" has_bookmark check.' );

/*
* <custom-element /> has self-closing flag, but HTML elements (that are not void elements) cannot self-close,
* they must be closed by some means, usually a closing tag.
*
* If the div were interpreted as foreign content, it would self-close.
*/
$this->assertTrue( $processor->has_self_closing_flag() );
$this->assertTrue( $processor->expects_closer(), 'Incorrectly interpreted HTML custom-element with self-closing flag as self-closing element.' );

// Proceed into foreign content.
$this->assertTrue( $processor->next_tag( 'RECT' ) );
$this->assertSame( 'svg', $processor->get_namespace() );
$this->assertTrue( $processor->has_self_closing_flag() );
$this->assertFalse( $processor->expects_closer() );
$this->assertSame( array( 'HTML', 'BODY', 'CUSTOM-ELEMENT', 'SVG', 'RECT' ), $processor->get_breadcrumbs() );

// Seek back.
$this->assertTrue( $processor->seek( 'mark' ), 'Failed to seek to bookmark "mark".' );
$this->assertSame( 'CUSTOM-ELEMENT', $processor->get_tag() );
// If the parsing namespace were not correct here (html),
// then the self-closing flag would be misinterpreted.
$this->assertTrue( $processor->has_self_closing_flag() );
$this->assertTrue( $processor->expects_closer(), 'Incorrectly interpreted HTML custom-element with self-closing flag as self-closing element.' );

// Proceed into foreign content again.
$this->assertTrue( $processor->next_tag( 'RECT' ) );
$this->assertSame( 'svg', $processor->get_namespace() );
$this->assertTrue( $processor->has_self_closing_flag() );
$this->assertFalse( $processor->expects_closer() );

// The RECT should still descend from the CUSTOM-ELEMENT despite its self-closing flag.
$this->assertSame( array( 'HTML', 'BODY', 'CUSTOM-ELEMENT', 'SVG', 'RECT' ), $processor->get_breadcrumbs() );
}

/**
* Data provider.
*
* @return array
*/
public static function data_processor_constructors(): array {
return array(
'Full parser' => array( array( WP_HTML_Processor::class, 'create_full_parser' ) ),
'Fragment parser' => array( array( WP_HTML_Processor::class, 'create_fragment' ) ),
);
}
}
2 changes: 1 addition & 1 deletion tests/phpunit/tests/html-api/wpHtmlProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ public function test_clear_to_navigate_after_seeking() {

// Create a bookmark inside of that stack.
if ( null !== $processor->get_attribute( 'two' ) ) {
$processor->set_bookmark( 'two' );
$this->assertTrue( $processor->set_bookmark( 'two' ) );
break;
}
}
Expand Down

0 comments on commit 42f3df4

Please sign in to comment.