diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php
index 9ab7a0dd86..cc1ce63fd7 100644
--- a/packages/playground/data-liberation/bootstrap.php
+++ b/packages/playground/data-liberation/bootstrap.php
@@ -30,7 +30,6 @@
require_once __DIR__ . '/src/WP_URL.php';
require_once __DIR__ . '/src/xml-api/WP_XML_Decoder.php';
-require_once __DIR__ . '/src/xml-api/WP_XML_Tag_Processor.php';
require_once __DIR__ . '/src/xml-api/WP_XML_Processor.php';
require_once __DIR__ . '/src/WP_WXR_URL_Rewrite_Processor.php';
@@ -38,8 +37,7 @@
// Polyfill WordPress core functions
-function _doing_it_wrong() {
-
+function _doing_it_wrong($method, $message, $version) {
}
function __($input) {
diff --git a/packages/playground/data-liberation/phpunit.xml b/packages/playground/data-liberation/phpunit.xml
index a3e66da030..50553590e5 100644
--- a/packages/playground/data-liberation/phpunit.xml
+++ b/packages/playground/data-liberation/phpunit.xml
@@ -9,7 +9,6 @@
tests/WPBlockMarkupUrlProcessorTests.phptests/URLParserWHATWGComplianceTests.phptests/WPXMLProcessorTests.php
- tests/WPXMLTagProcessorTests.phptests/UrldecodeNTests.php
diff --git a/packages/playground/data-liberation/src/WP_WXR_URL_Rewrite_Processor.php b/packages/playground/data-liberation/src/WP_WXR_URL_Rewrite_Processor.php
index 34caa67513..2b6bb5b343 100644
--- a/packages/playground/data-liberation/src/WP_WXR_URL_Rewrite_Processor.php
+++ b/packages/playground/data-liberation/src/WP_WXR_URL_Rewrite_Processor.php
@@ -3,8 +3,8 @@
class WP_WXR_URL_Rewrite_Processor {
- public static function stream( $current_site_url, $new_site_url ) {
- return WP_XML_Processor::stream(
+ public static function create_stream_processor( $current_site_url, $new_site_url ) {
+ return WP_XML_Processor::create_stream_processor(
function ( $processor ) use ( $current_site_url, $new_site_url ) {
if ( static::is_wxr_content_node( $processor ) ) {
$text = $processor->get_modifiable_text();
diff --git a/packages/playground/data-liberation/src/stream-api/WP_File_Byte_Stream.php b/packages/playground/data-liberation/src/stream-api/WP_File_Byte_Stream.php
index 284e7c37a5..b2710f84ec 100644
--- a/packages/playground/data-liberation/src/stream-api/WP_File_Byte_Stream.php
+++ b/packages/playground/data-liberation/src/stream-api/WP_File_Byte_Stream.php
@@ -11,6 +11,7 @@ public function __construct( $file_path, $chunk_size = 8096 ) {
$this->file_path = $file_path;
$this->chunk_size = $chunk_size;
parent::__construct();
+ $this->append_eof();
}
public function pause() {
diff --git a/packages/playground/data-liberation/src/stream-api/WP_Stream_Chain.php b/packages/playground/data-liberation/src/stream-api/WP_Stream_Chain.php
index e128471a75..b6a71cfab9 100644
--- a/packages/playground/data-liberation/src/stream-api/WP_Stream_Chain.php
+++ b/packages/playground/data-liberation/src/stream-api/WP_Stream_Chain.php
@@ -16,6 +16,11 @@
* Consult it for reasoning and usage examples:
*
* https://github.com/adamziel/wxr-normalize/pull/1
+ *
+ * @TODO: Allow each stream to indicate its output reached EOF
+ * and propagate that information downstream. Otherwise,
+ * WP_XML_Processor will always end in an "incomplete input"
+ * state.
*/
class WP_Stream_Chain extends WP_Byte_Stream implements ArrayAccess, Iterator {
private $first_stream;
diff --git a/packages/playground/data-liberation/src/stream-api/WP_Stream_Processor.php b/packages/playground/data-liberation/src/stream-api/WP_Stream_Processor.php
index a954a31cf0..d07b092120 100644
--- a/packages/playground/data-liberation/src/stream-api/WP_Stream_Processor.php
+++ b/packages/playground/data-liberation/src/stream-api/WP_Stream_Processor.php
@@ -2,6 +2,7 @@
interface WP_Stream_Processor {
public function append_bytes( string $bytes );
+ public function input_finished(): void;
public function is_finished(): bool;
public function is_paused_at_incomplete_input(): bool;
public function get_last_error(): ?string;
diff --git a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
index bef650e33c..237419866e 100644
--- a/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
+++ b/packages/playground/data-liberation/src/xml-api/WP_XML_Processor.php
@@ -2,15 +2,581 @@
/**
* XML API: WP_XML_Processor class
*
+ * Scans through an XML document to find specific tags, then
+ * transforms those tags by adding, removing, or updating the
+ * values of the XML attributes within that tag (opener).
+ *
+ * It implements a subset of the XML 1.0 specification (https://www.w3.org/TR/xml/)
+ * and supports XML documents with the following characteristics:
+ *
+ * * XML 1.0
+ * * Well-formed
+ * * UTF-8 encoded
+ * * Not standalone (so can use external entities)
+ * * No DTD, DOCTYPE, ATTLIST, ENTITY, or conditional sections
+ *
+ * ### Possible future direction for this module
+ *
+ * The final goal is to support both 1.0 and 1.1 depending on the
+ * initial processing instruction (). We're
+ * starting with 1.0, however, because most that's what most WXR
+ * files declare.
+ *
+ * @TODO: Track specific error states, expose informative messages, line
+ * numbers, indexes, and other debugging info.
+ *
+ * @TODO: Skip over the following syntax elements:
+ * *
+ *
+ * or
+ *
+ *
+ *
+ * ' >
+ * %xx;
+ * ]>
+ *
+ * @TODO: Support XML 1.1.
* @package WordPress
* @subpackage HTML-API
* @since WP_VERSION
*/
/**
+ * Core class used to modify attributes in an XML document for tags matching a query.
+ *
+ * ## Usage
+ *
+ * Use of this class requires three steps:
+ *
+ * 1. Create a new class instance with your input XML document.
+ * 2. Find the tag(s) you are looking for.
+ * 3. Request changes to the attributes in those tag(s).
+ *
+ * Example:
+ *
+ * $tags = new WP_XML_Processor( $xml );
+ * if ( $tags->next_tag( 'wp:option' ) ) {
+ * $tags->set_attribute( 'selected', 'yes' );
+ * }
+ *
+ * ### Finding tags
+ *
+ * The `next_tag()` function moves the internal cursor through
+ * your input XML document until it finds a tag meeting any of
+ * the supplied restrictions in the optional query argument. If
+ * no argument is provided then it will find the next XML tag,
+ * regardless of what kind it is.
+ *
+ * If you want to _find whatever the next tag is_:
+ *
+ * $tags->next_tag();
+ *
+ * | Goal | Query |
+ * |-----------------------------------------------------------|---------------------------------------------------------------------------------|
+ * | Find any tag. | `$tags->next_tag();` |
+ * | Find next image tag. | `$tags->next_tag( array( 'tag_name' => 'wp:image' ) );` |
+ * | Find next image tag (without passing the array). | `$tags->next_tag( 'wp:image' );` |
+ *
+ * If a tag was found meeting your criteria then `next_tag()`
+ * will return `true` and you can proceed to modify it. If it
+ * returns `false`, however, it failed to find the tag and
+ * moved the cursor to the end of the file.
+ *
+ * Once the cursor reaches the end of the file the processor
+ * is done and if you want to reach an earlier tag you will
+ * need to recreate the processor and start over, as it's
+ * unable to back up or move in reverse.
+ *
+ * See the section on bookmarks for an exception to this
+ * no-backing-up rule.
+ *
+ * #### Custom queries
+ *
+ * Sometimes it's necessary to further inspect an XML tag than
+ * the query syntax here permits. In these cases one may further
+ * inspect the search results using the read-only functions
+ * provided by the processor or external state or variables.
+ *
+ * Example:
+ *
+ * // Paint up to the first five `wp:musician` or `wp:actor` tags marked with the "jazzy" style.
+ * $remaining_count = 5;
+ * while ( $remaining_count > 0 && $tags->next_tag() ) {
+ * if (
+ * ( 'wp:musician' === $tags->get_tag() || 'wp:actor' === $tags->get_tag() ) &&
+ * 'jazzy' === $tags->get_attribute( 'data-style' )
+ * ) {
+ * $tags->set_attribute( 'wp:theme-style', 'theme-style-everest-jazz' );
+ * $remaining_count--;
+ * }
+ * }
+ *
+ * `get_attribute()` will return `null` if the attribute wasn't present
+ * on the tag when it was called. It may return `""` (the empty string)
+ * in cases where the attribute was present but its value was empty.
+ * For boolean attributes, those whose name is present but no value is
+ * given, it will return `true` (the only way to set `false` for an
+ * attribute is to remove it).
+ *
+ * #### When matching fails
+ *
+ * When `next_tag()` returns `false` it could mean different things:
+ *
+ * - The requested tag wasn't found in the input document.
+ * - The input document ended in the middle of an XML syntax element.
+ *
+ * When a document ends in the middle of a syntax element it will pause
+ * the processor. This is to make it possible in the future to extend the
+ * input document and proceed - an important requirement for chunked
+ * streaming parsing of a document.
+ *
+ * Example:
+ *
+ * $processor = new WP_XML_Processor( 'This next_tag( array( 'tag_name' => 'wp:todo-list' ) ) ) {
+ * $p->set_bookmark( 'list-start' );
+ * while ( $p->next_tag( array( 'tag_closers' => 'visit' ) ) ) {
+ * if ( 'wp:todo' === $p->get_tag() && $p->is_tag_closer() ) {
+ * $p->set_bookmark( 'list-end' );
+ * $p->seek( 'list-start' );
+ * $p->set_attribute( 'data-contained-todos', (string) $total_todos );
+ * $total_todos = 0;
+ * $p->seek( 'list-end' );
+ * break;
+ * }
+ *
+ * if ( 'wp:todo-item' === $p->get_tag() && ! $p->is_tag_closer() ) {
+ * $total_todos++;
+ * }
+ * }
+ * }
+ *
+ * ## Tokens and finer-grained processing.
+ *
+ * It's possible to scan through every lexical token in the
+ * XML document using the `next_token()` function. This
+ * alternative form takes no argument and provides no built-in
+ * query syntax.
+ *
+ * Example:
+ *
+ * $title = '(untitled)';
+ * $text = '';
+ * while ( $processor->next_token() ) {
+ * switch ( $processor->get_token_name() ) {
+ * case '#text':
+ * $text .= $processor->get_modifiable_text();
+ * break;
+ *
+ * case 'wp:new-line':
+ * $text .= "\n";
+ * break;
+ *
+ * case 'wp:title':
+ * $title = $processor->get_modifiable_text();
+ * break;
+ * }
+ * }
+ * return trim( "# {$title}\n\n{$text}" );
+ *
+ * ### Tokens and _modifiable text_.
+ *
+ * #### Other tokens with modifiable text.
+ *
+ * There are also non-elements which are void/self-closing in nature and contain
+ * modifiable text that is part of that individual syntax token itself.
+ *
+ * - `#text` nodes, whose entire token _is_ the modifiable text.
+ * - XML comments and tokens that become comments due to some syntax error. The
+ * text for these tokens is the portion of the comment inside of the syntax.
+ * E.g. for `` the text is `" comment "` (note the spaces are included).
+ * - `CDATA` sections, whose text is the content inside of the section itself. E.g. for
+ * `` the text is `"some content"`.
+ * - XML Processing instruction nodes like `` (with restrictions [1]).
+ *
+ * [1]: XML requires "xml" as a processing instruction name. The Tag Processor captures the entire
+ * processing instruction as a single token up to the closing `?>`.
+ *
+ * ## Design and limitations
+ *
+ * The Tag Processor is designed to linearly scan XML documents and tokenize
+ * XML tags and their attributes. It's designed to do this as efficiently as
+ * possible without compromising parsing integrity. Therefore it will be
+ * slower than some methods of modifying XML, such as those incorporating
+ * over-simplified PCRE patterns, but will not introduce the defects and
+ * failures that those methods bring in, which lead to broken page renders
+ * and often to security vulnerabilities. On the other hand, it will be faster
+ * than full-blown XML parsers such as DOMDocument and use considerably
+ * less memory. It requires a negligible memory overhead, enough to consider
+ * it a zero-overhead system.
+ *
+ * The performance characteristics are maintained by avoiding tree construction.
+ *
+ * The Tag Processor's checks the most important aspects of XML integrity as it scans
+ * through the document. It verifies that a single root element exists, that are
+ * no unclosed tags, and that each opener tag has a corresponding closer. It also
+ * ensures no duplicate attributes exist on a single tag.
+ *
+ * At the same time, The Tag Processor also skips expensive validation of XML entities
+ * in the document. The Tag Processor will initially pass through the invalid entity references
+ * and only fail when the developer attempts to read their value. If that doesn't happen,
+ * the invalid values will be left untouched in the final document.
+ *
+ * Most operations within the Tag Processor are designed to minimize the difference
+ * between an input and output document for any given change. For example, the
+ * `set_attribure` and `remove_attribute` methods preserve whitespace and the attribute
+ * ordering within the element definition. An exception to this rule is that all attribute
+ * updates store their values as double-quoted strings, meaning that attributes on input with
+ * single-quoted or unquoted values will appear in the output with double-quotes.
+ *
+ * ### Text Encoding
+ *
+ * The Tag Processor assumes that the input XML document is encoded with a
+ * UTF-8 encoding and will refuse to process documents that declare other encodings.
+ *
* @since WP_VERSION
*/
-class WP_XML_Processor extends WP_XML_Tag_Processor implements WP_Stream_Processor {
+class WP_XML_Processor {
+ /**
+ * The maximum number of bookmarks allowed to exist at
+ * any given time.
+ *
+ * @since WP_VERSION
+ * @var int
+ *
+ * @see WP_XML_Processor::set_bookmark()
+ */
+ const MAX_BOOKMARKS = 10;
+
+ /**
+ * Maximum number of times seek() can be called.
+ * Prevents accidental infinite loops.
+ *
+ * @since WP_VERSION
+ * @var int
+ *
+ * @see WP_XML_Processor::seek()
+ */
+ const MAX_SEEK_OPS = 1000;
+
+ /**
+ * The XML document to parse.
+ *
+ * @since WP_VERSION
+ * @var string
+ */
+ public $xml;
+
+ /**
+ * Specifies mode of operation of the parser at any given time.
+ *
+ * | State | Meaning |
+ * | ----------------|------------------------------------------------------------------------|
+ * | *Ready* | The parser is ready to run. |
+ * | *Complete* | There is nothing left to parse. |
+ * | *Incomplete* | The XML ended in the middle of a token; nothing more can be parsed. |
+ * | *Matched tag* | Found an XML tag; it's possible to modify its attributes. |
+ * | *Text node* | Found a #text node; this is plaintext and modifiable. |
+ * | *CDATA node* | Found a CDATA section; this is modifiable. |
+ * | *PI node* | Found a processing instruction; this is modifiable. |
+ * | *XML declaration* | Found an XML declaration; this is modifiable. |
+ * | *Comment* | Found a comment or bogus comment; this is modifiable. |
+ *
+ * @since WP_VERSION
+ *
+ * @see WP_XML_Processor::STATE_READY
+ * @see WP_XML_Processor::STATE_COMPLETE
+ * @see WP_XML_Processor::STATE_INCOMPLETE_INPUT
+ * @see WP_XML_Processor::STATE_MATCHED_TAG
+ * @see WP_XML_Processor::STATE_TEXT_NODE
+ * @see WP_XML_Processor::STATE_CDATA_NODE
+ * @see WP_XML_Processor::STATE_PI_NODE
+ * @see WP_XML_Processor::STATE_XML_DECLARATION
+ * @see WP_XML_Processor::STATE_COMMENT
+ *
+ * @var string
+ */
+ protected $parser_state = self::STATE_READY;
+
+ /**
+ * Whether the input has been finished.
+ *
+ * @var bool
+ */
+ protected $expecting_more_input = true;
+
+ /**
+ * How many bytes from the original XML document have been read and parsed.
+ *
+ * This value points to the latest byte offset in the input document which
+ * has been already parsed. It is the internal cursor for the Tag Processor
+ * and updates while scanning through the XML tokens.
+ *
+ * @since WP_VERSION
+ * @var int
+ */
+ public $bytes_already_parsed = 0;
+
+ /**
+ * Byte offset in input document where current token starts.
+ *
+ * Example:
+ *
+ * ...
+ * 01234
+ * - token starts at 0
+ *
+ * @since WP_VERSION
+ *
+ * @var int|null
+ */
+ protected $token_starts_at;
+
+ /**
+ * Byte length of current token.
+ *
+ * Example:
+ *
+ * ...
+ * 012345678901234
+ * - token length is 14 - 0 = 14
+ *
+ * a is a token.
+ * 0123456789 123456789 123456789
+ * - token length is 17 - 2 = 15
+ *
+ * @since WP_VERSION
+ *
+ * @var int|null
+ */
+ private $token_length;
+
+ /**
+ * Byte offset in input document where current tag name starts.
+ *
+ * Example:
+ *
+ * ...
+ * 01234
+ * - tag name starts at 1
+ *
+ * @since WP_VERSION
+ *
+ * @var int|null
+ */
+ private $tag_name_starts_at;
+
+ /**
+ * Byte length of current tag name.
+ *
+ * Example:
+ *
+ * ...
+ * 01234
+ * --- tag name length is 3
+ *
+ * @since WP_VERSION
+ *
+ * @var int|null
+ */
+ private $tag_name_length;
+
+ /**
+ * Byte offset into input document where current modifiable text starts.
+ *
+ * @since WP_VERSION
+ *
+ * @var int
+ */
+ private $text_starts_at;
+
+ /**
+ * Byte length of modifiable text.
+ *
+ * @since WP_VERSION
+ *
+ * @var string
+ */
+ private $text_length;
+
+ /**
+ * Whether the current tag is an opening tag, e.g. , or a closing tag, e.g. .
+ *
+ * @var bool
+ */
+ private $is_closing_tag;
+
+ /**
+ * Stores an explanation for why something failed, if it did.
+ *
+ * @see self::get_last_error
+ *
+ * @since WP_VERSION
+ *
+ * @var string|null
+ */
+ protected $last_error = null;
+
+ /**
+ * Lazily-built index of attributes found within an XML tag, keyed by the attribute name.
+ *
+ * Example:
+ *
+ * // Supposing the parser is working through this content
+ * // and stops after recognizing the `id` attribute.
+ * //
+ * // ^ parsing will continue from this point.
+ * $this->attributes = array(
+ * 'id' => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false )
+ * );
+ *
+ * // When picking up parsing again, or when asking to find the
+ * // `class` attribute we will continue and add to this array.
+ * $this->attributes = array(
+ * 'id' => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false ),
+ * 'class' => new WP_HTML_Attribute_Token( 'class', 23, 7, 17, 13, false )
+ * );
+ *
+ * @since WP_VERSION
+ * @var WP_HTML_Attribute_Token[]
+ */
+ private $attributes = array();
+
+ /**
+ * Tracks a semantic location in the original XML which
+ * shifts with updates as they are applied to the document.
+ *
+ * @since WP_VERSION
+ * @var WP_HTML_Span[]
+ */
+ protected $bookmarks = array();
+
+ /**
+ * Lexical replacements to apply to input XML document.
+ *
+ * "Lexical" in this class refers to the part of this class which
+ * operates on pure text _as text_ and not as XML. There's a line
+ * between the public interface, with XML-semantic methods like
+ * `set_attribute` and `add_class`, and an internal state that tracks
+ * text offsets in the input document.
+ *
+ * When higher-level XML methods are called, those have to transform their
+ * operations (such as setting an attribute's value) into text diffing
+ * operations (such as replacing the sub-string from indices A to B with
+ * some given new string). These text-diffing operations are the lexical
+ * updates.
+ *
+ * As new higher-level methods are added they need to collapse their
+ * operations into these lower-level lexical updates since that's the
+ * Tag Processor's internal language of change. Any code which creates
+ * these lexical updates must ensure that they do not cross XML syntax
+ * boundaries, however, so these should never be exposed outside of this
+ * class or any classes which intentionally expand its functionality.
+ *
+ * These are enqueued while editing the document instead of being immediately
+ * applied to avoid processing overhead, string allocations, and string
+ * copies when applying many updates to a single document.
+ *
+ * Example:
+ *
+ * // Replace an attribute stored with a new value, indices
+ * // sourced from the lazily-parsed XML recognizer.
+ * $start = $attributes['src']->start;
+ * $length = $attributes['src']->length;
+ * $modifications[] = new WP_HTML_Text_Replacement( $start, $length, $new_value );
+ *
+ * // Correspondingly, something like this will appear in this array.
+ * $lexical_updates = array(
+ * WP_HTML_Text_Replacement( 14, 28, 'https://my-site.my-domain/wp-content/uploads/2014/08/kittens.jpg' )
+ * );
+ *
+ * @since WP_VERSION
+ * @var WP_HTML_Text_Replacement[]
+ */
+ protected $lexical_updates = array();
+
+ /**
+ * Tracks and limits `seek()` calls to prevent accidental infinite loops.
+ *
+ * @since WP_VERSION
+ * @var int
+ *
+ * @see WP_XML_Processor::seek()
+ */
+ protected $seek_count = 0;
/**
* Indicates the current parsing stage.
@@ -32,9 +598,9 @@ class WP_XML_Processor extends WP_XML_Tag_Processor implements WP_Stream_Process
* | *Element* | The parser is parsing the root element. |
* | *Misc* | The parser is parsing miscellaneous content. |
*
- * @see WP_XML_Tag_Processor::IN_PROLOG_CONTEXT
- * @see WP_XML_Tag_Processor::IN_ELEMENT_CONTEXT
- * @see WP_XML_Tag_Processor::IN_MISC_CONTEXT
+ * @see WP_XML_Processor::IN_PROLOG_CONTEXT
+ * @see WP_XML_Processor::IN_ELEMENT_CONTEXT
+ * @see WP_XML_Processor::IN_MISC_CONTEXT
*
* @since WP_VERSION
* @var bool
@@ -50,13 +616,51 @@ class WP_XML_Processor extends WP_XML_Tag_Processor implements WP_Stream_Process
*/
public $stack_of_open_elements = array();
- public static function stream( $node_visitor_callback ) {
- $xml_processor = new WP_XML_Processor( '', array(), WP_XML_Processor::IN_PROLOG_CONTEXT );
+ public $had_previous_chunks = false;
+
+ /**
+ *
+ */
+ public static function from_string( $xml, $known_definite_encoding = 'UTF-8' ) {
+ if ( 'UTF-8' !== $known_definite_encoding ) {
+ return null;
+ }
+
+ $processor = new WP_XML_Processor( $xml );
+ $processor->input_finished();
+ return $processor;
+ }
+
+ public static function from_stream( $xml, $known_definite_encoding = 'UTF-8' ) {
+ if ( 'UTF-8' !== $known_definite_encoding ) {
+ return null;
+ }
+ return new WP_XML_Processor( $xml );
+ }
+
+ /**
+ * Constructor.
+ *
+ * Do not use this method. Use the static creator methods instead.
+ *
+ * @access private
+ *
+ * @since 6.4.0
+ *
+ * @see WP_XML_Processor::create_fragment()
+ * @see WP_XML_Processor::create_stream()
+ *
+ * @param string $xml XML to process.
+ */
+ protected function __construct( $xml ) {
+ $this->xml = $xml;
+ }
+
+ public static function create_stream_processor( $node_visitor_callback ) {
+ $xml_processor = WP_XML_Processor::from_stream( '' );
return new ProcessorByteStream(
$xml_processor,
function ( $state ) use ( $xml_processor, $node_visitor_callback ) {
- $buffer = $xml_processor->flush_processed_xml();
-
$new_bytes = $state->consume_input_bytes();
if ( null !== $new_bytes ) {
$xml_processor->append_bytes( $new_bytes );
@@ -67,16 +671,16 @@ function ( $state ) use ( $xml_processor, $node_visitor_callback ) {
$node_visitor_callback( $xml_processor );
}
+ $buffer = '';
if ( $tokens_found > 0 ) {
$buffer .= $xml_processor->flush_processed_xml();
} elseif (
- $tokens_found === 0 &&
- ! $xml_processor->is_paused_at_incomplete_input() &&
- $xml_processor->get_current_depth() === 0
+ $tokens_found === 0 &&
+ ! $xml_processor->is_paused_at_incomplete_input() &&
+ $xml_processor->get_current_depth() === 0
) {
- // We've reached the end of the document, let's finish up.
- // @TODO: Fix this so it doesn't return the entire XML
- $buffer .= $xml_processor->get_unprocessed_xml();
+ $buffer .= $xml_processor->flush_processed_xml();
+ $buffer .= $xml_processor->get_updated_xml();
$state->finish();
}
@@ -90,6 +694,8 @@ function ( $state ) use ( $xml_processor, $node_visitor_callback ) {
);
}
+ /*
+ @TODO: implement these methods for re-entrancy
public function pause() {
return array(
@@ -97,7 +703,7 @@ public function pause() {
// @TODO: Include all the information below in the bookmark:
'bytes_already_parsed' => $this->token_starts_at,
'breadcrumbs' => $this->get_breadcrumbs(),
- 'parser_context' => $this->get_parser_context(),
+ 'parser_context' => $this->parser_context,
'stack_of_open_elements' => $this->stack_of_open_elements,
);
}
@@ -107,8 +713,9 @@ public function resume( $paused ) {
$this->stack_of_open_elements = $paused['stack_of_open_elements'];
$this->parser_context = $paused['parser_context'];
$this->bytes_already_parsed = $paused['bytes_already_parsed'];
- $this->base_class_next_token();
+ $this->parse_next_token();
}
+ */
/**
* Wipes out the processed XML and appends the next chunk of XML to
@@ -117,17 +724,41 @@ public function resume( $paused ) {
* @param string $next_chunk XML to append.
*/
public function append_bytes( string $next_chunk ) {
- $this->xml .= $next_chunk;
+ if ( ! $this->expecting_more_input ) {
+ _doing_it_wrong(
+ __METHOD__,
+ __( 'Cannot append bytes after the last input chunk was provided and input_finished() was called.' ),
+ 'WP_VERSION'
+ );
+ return false;
+ }
+ $this->xml .= $next_chunk;
+ $this->had_previous_chunks = true;
+ if ( $this->parser_state === self::STATE_INCOMPLETE_INPUT ) {
+ $this->parser_state = self::STATE_READY;
+ }
+ return true;
+ }
+
+ /**
+ * Indicates that all the XML document bytes have been provided.
+ *
+ * After calling this method, the processor will emit errors where
+ * previously it would have entered the STATE_INCOMPLETE_INPUT state.
+ */
+ public function input_finished() {
+ $this->expecting_more_input = false;
}
public function flush_processed_xml() {
+ // Flush updates
$this->get_updated_xml();
- $processed_xml = $this->get_processed_xml();
- $unprocessed_xml = $this->get_unprocessed_xml();
+ $processed_xml = substr( $this->xml, 0, $this->bytes_already_parsed );
+ $unprocessed_xml = substr( $this->xml, $this->bytes_already_parsed );
$breadcrumbs = $this->get_breadcrumbs();
- $parser_context = $this->get_parser_context();
+ $parser_context = $this->parser_context;
$this->reset_state();
@@ -140,29 +771,486 @@ public function flush_processed_xml() {
}
/**
- * Constructor.
+ * Internal method which finds the next token in the XML document.
*
- * @since WP_VERSION
+ * This method is a protected internal function which implements the logic for
+ * finding the next token in a document. It exists so that the parser can update
+ * its state without affecting the location of the cursor in the document and
+ * without triggering subclass methods for things like `next_token()`, e.g. when
+ * applying patches before searching for the next token.
+ *
+ * @since 6.5.0
*
- * @param string $xml XML to process.
+ * @access private
+ *
+ * @return bool Whether a token was parsed.
*/
- public function __construct( $xml, $breadcrumbs = array(), $parser_context = self::IN_PROLOG_CONTEXT ) {
- parent::__construct( $xml );
- $this->stack_of_open_elements = $breadcrumbs;
- $this->parser_context = $parser_context;
- }
+ protected function parse_next_token() {
+ $was_at = $this->bytes_already_parsed;
+ $this->after_tag();
- public function get_parser_context() {
- return $this->parser_context;
- }
+ // Don't proceed if there's nothing more to scan.
+ if (
+ self::STATE_COMPLETE === $this->parser_state ||
+ self::STATE_INCOMPLETE_INPUT === $this->parser_state ||
+ null !== $this->last_error
+ ) {
+ return false;
+ }
- /**
- * Finds the next element matching the $query.
- *
- * This doesn't currently have a way to represent non-tags and doesn't process
- * semantic rules for text nodes. For access to the raw tokens consider using
- * WP_XML_Tag_Processor instead.
- *
+ /*
+ * The next step in the parsing loop determines the parsing state;
+ * clear it so that state doesn't linger from the previous step.
+ */
+ $this->parser_state = self::STATE_READY;
+
+ if ( $this->bytes_already_parsed >= strlen( $this->xml ) ) {
+ if ( $this->expecting_more_input ) {
+ $this->parser_state = self::STATE_INCOMPLETE_INPUT;
+ } else {
+ $this->parser_state = self::STATE_COMPLETE;
+ }
+ return false;
+ }
+
+ // Find the next tag if it exists.
+ if ( false === $this->parse_next_tag() ) {
+ if ( self::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
+ $this->bytes_already_parsed = $was_at;
+ }
+
+ return false;
+ }
+
+ if ( null !== $this->last_error ) {
+ return false;
+ }
+
+ /*
+ * For legacy reasons the rest of this function handles tags and their
+ * attributes. If the processor has reached the end of the document
+ * or if it matched any other token then it should return here to avoid
+ * attempting to process tag-specific syntax.
+ */
+ if (
+ self::STATE_INCOMPLETE_INPUT !== $this->parser_state &&
+ self::STATE_COMPLETE !== $this->parser_state &&
+ self::STATE_MATCHED_TAG !== $this->parser_state
+ ) {
+ return true;
+ }
+
+ if ( $this->is_closing_tag ) {
+ $this->skip_whitespace();
+ } else {
+ // Parse all of its attributes.
+ while ( $this->parse_next_attribute() ) {
+ continue;
+ }
+ }
+
+ if ( null !== $this->last_error ) {
+ return false;
+ }
+
+ // Ensure that the tag closes before the end of the document.
+ if (
+ self::STATE_INCOMPLETE_INPUT === $this->parser_state ||
+ $this->bytes_already_parsed >= strlen( $this->xml )
+ ) {
+ // Does this appropriately clear state (parsed attributes)?
+ $this->set_incomplete_input_or_parse_error();
+ $this->bytes_already_parsed = $was_at;
+
+ return false;
+ }
+
+ $tag_ends_at = strpos( $this->xml, '>', $this->bytes_already_parsed );
+ if ( false === $tag_ends_at ) {
+ $this->set_incomplete_input_or_parse_error();
+ $this->bytes_already_parsed = $was_at;
+
+ return false;
+ }
+
+ if ( $this->is_closing_tag && $tag_ends_at !== $this->bytes_already_parsed ) {
+ $this->last_error = self::ERROR_SYNTAX;
+ _doing_it_wrong(
+ __METHOD__,
+ __( 'Invalid closing tag encountered.' ),
+ 'WP_VERSION'
+ );
+ return false;
+ }
+
+ $this->parser_state = self::STATE_MATCHED_TAG;
+ $this->bytes_already_parsed = $tag_ends_at + 1;
+ $this->token_length = $this->bytes_already_parsed - $this->token_starts_at;
+
+ /*
+ * If we are in a PCData element, everything until the closer
+ * is considered text.
+ */
+ if ( ! $this->is_pcdata_element() ) {
+ return true;
+ }
+
+ /*
+ * Preserve the opening tag pointers, as these will be overwritten
+ * when finding the closing tag. They will be reset after finding
+ * the closing to tag to point to the opening of the special atomic
+ * tag sequence.
+ */
+ $tag_name_starts_at = $this->tag_name_starts_at;
+ $tag_name_length = $this->tag_name_length;
+ $tag_ends_at = $this->token_starts_at + $this->token_length;
+ $attributes = $this->attributes;
+
+ $found_closer = $this->skip_pcdata( $this->get_tag() );
+
+ // Closer not found, the document is incomplete.
+ if ( false === $found_closer ) {
+ $this->set_incomplete_input_or_parse_error();
+ $this->bytes_already_parsed = $was_at;
+ return false;
+ }
+
+ /*
+ * The values here look like they reference the opening tag but they reference
+ * the closing tag instead. This is why the opening tag values were stored
+ * above in a variable. It reads confusingly here, but that's because the
+ * functions that skip the contents have moved all the internal cursors past
+ * the inner content of the tag.
+ */
+ $this->token_starts_at = $was_at;
+ $this->token_length = $this->bytes_already_parsed - $this->token_starts_at;
+ $this->text_starts_at = $tag_ends_at;
+ $this->text_length = $this->tag_name_starts_at - $this->text_starts_at;
+ $this->tag_name_starts_at = $tag_name_starts_at;
+ $this->tag_name_length = $tag_name_length;
+ $this->attributes = $attributes;
+
+ return true;
+ }
+
+ /**
+ * Whether the processor paused because the input XML document ended
+ * in the middle of a syntax element, such as in the middle of a tag.
+ *
+ * Example:
+ *
+ * $processor = new WP_XML_Processor( '
Surprising fact you may no…
+ * ^ ^
+ * \-|-- it shifts with edits
+ *
+ * Bookmarks provide the ability to seek to a previously-scanned
+ * place in the XML document. This avoids the need to re-scan
+ * the entire document.
+ *
+ * Example:
+ *
+ *
One
Two
Three
+ * ^^^^
+ * want to note this last item
+ *
+ * $p = new WP_XML_Processor( $xml );
+ * $in_list = false;
+ * while ( $p->next_tag( array( 'tag_closers' => $in_list ? 'visit' : 'skip' ) ) ) {
+ * if ( 'UL' === $p->get_tag() ) {
+ * if ( $p->is_tag_closer() ) {
+ * $in_list = false;
+ * $p->set_bookmark( 'resume' );
+ * if ( $p->seek( 'last-li' ) ) {
+ * $p->add_class( 'last-li' );
+ * }
+ * $p->seek( 'resume' );
+ * $p->release_bookmark( 'last-li' );
+ * $p->release_bookmark( 'resume' );
+ * } else {
+ * $in_list = true;
+ * }
+ * }
+ *
+ * if ( 'LI' === $p->get_tag() ) {
+ * $p->set_bookmark( 'last-li' );
+ * }
+ * }
+ *
+ * Bookmarks intentionally hide the internal string offsets
+ * to which they refer. They are maintained internally as
+ * updates are applied to the XML document and therefore
+ * retain their "position" - the location to which they
+ * originally pointed. The inability to use bookmarks with
+ * functions like `substr` is therefore intentional to guard
+ * against accidentally breaking the XML.
+ *
+ * Because bookmarks allocate memory and require processing
+ * for every applied update, they are limited and require
+ * a name. They should not be created with programmatically-made
+ * names, such as "li_{$index}" with some loop. As a general
+ * rule they should only be created with string-literal names
+ * like "start-of-section" or "last-paragraph".
+ *
+ * Bookmarks are a powerful tool to enable complicated behavior.
+ * Consider double-checking that you need this tool if you are
+ * reaching for it, as inappropriate use could lead to broken
+ * XML structure or unwanted processing overhead.
+ *
+ * @since WP_VERSION
+ *
+ * @param string $name Identifies this particular bookmark.
+ * @return bool Whether the bookmark was successfully created.
+ */
+ public function set_bookmark( $name ) {
+ // It only makes sense to set a bookmark if the parser has paused on a concrete token.
+ if (
+ self::STATE_COMPLETE === $this->parser_state ||
+ self::STATE_INCOMPLETE_INPUT === $this->parser_state
+ ) {
+ return false;
+ }
+
+ if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= static::MAX_BOOKMARKS ) {
+ _doing_it_wrong(
+ __METHOD__,
+ __( 'Too many bookmarks: cannot create any more.' ),
+ 'WP_VERSION'
+ );
+ return false;
+ }
+
+ $this->bookmarks[ $name ] = new WP_HTML_Span( $this->token_starts_at, $this->token_length );
+
+ return true;
+ }
+
+
+ /**
+ * Removes a bookmark that is no longer needed.
+ *
+ * Releasing a bookmark frees up the small
+ * performance overhead it requires.
+ *
+ * @param string $name Name of the bookmark to remove.
+ * @return bool Whether the bookmark already existed before removal.
+ */
+ public function release_bookmark( $name ) {
+ if ( ! array_key_exists( $name, $this->bookmarks ) ) {
+ return false;
+ }
+
+ unset( $this->bookmarks[ $name ] );
+
+ return true;
+ }
+
+ /**
+ * Skips contents of PCDATA element.
+ *
+ * @since WP_VERSION
+ *
+ * @see https://www.w3.org/TR/xml/#sec-mixed-content
+ *
+ * @param string $tag_name The tag name which will close the PCDATA region.
+ * @return false|int Byte offset of the closing tag, or false if not found.
+ */
+ private function skip_pcdata( $tag_name ) {
+ $xml = $this->xml;
+ $doc_length = strlen( $xml );
+ $tag_length = strlen( $tag_name );
+
+ $at = $this->bytes_already_parsed;
+ while ( false !== $at && $at < $doc_length ) {
+ $at = strpos( $this->xml, '' . $tag_name, $at );
+ $this->tag_name_starts_at = $at;
+
+ // Fail if there is no possible tag closer.
+ if ( false === $at ) {
+ return false;
+ }
+
+ $at += 2 + $tag_length;
+ $at += strspn( $this->xml, " \t\f\r\n", $at );
+ $this->bytes_already_parsed = $at;
+
+ /*
+ * Ensure that the tag name terminates to avoid matching on
+ * substrings of a longer tag name. For example, the sequence
+ * "= strlen( $xml ) ) {
+ return false;
+ }
+ if ( '>' === $xml[ $at ] ) {
+ $this->bytes_already_parsed = $at + 1;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Returns the last error, if any.
+ *
+ * Various situations lead to parsing failure but this class will
+ * return `false` in all those cases. To determine why something
+ * failed it's possible to request the last error. This can be
+ * helpful to know to distinguish whether a given tag couldn't
+ * be found or if content in the document caused the processor
+ * to give up and abort processing.
+ *
+ * Example
+ *
+ * $processor = WP_XML_Processor::create_fragment( '' );
+ * false === $processor->next_tag();
+ * WP_XML_Processor::ERROR_SYNTAX === $processor->get_last_error();
+ *
+ * @since WP_VERSION
+ *
+ * @see self::ERROR_UNSUPPORTED
+ * @see self::ERROR_EXCEEDED_MAX_BOOKMARKS
+ *
+ * @return string|null The last error, if one exists, otherwise null.
+ */
+ public function get_last_error(): ?string {
+ return $this->last_error;
+ }
+
+ /**
+ * Tag names declared as PCDATA elements.
+ *
+ * PCDATA elements are elements in which everything is treated as
+ * text, even syntax that may look like other elements, closers,
+ * processing instructions, etc.
+ *
+ * Example:
+ *
+ *
+ *
+ * This text contains syntax that seems
+ * like XML nodes:
+ *
+ *
+ *
+ *
+ *
+ *
+ * &<>"'
+ *
+ * But! It's all treated as text.
+ *
+ *
+ *
+ * @var array
+ */
+ private $pcdata_elements = array();
+
+ /**
+ * Declares an element as PCDATA.
+ *
+ * PCDATA elements are elements in which everything is treated as
+ * text, even syntax that may look like other elements, closers,
+ * processing instructions, etc.
+ *
+ * For example:
+ *
+ * $processor = new WP_XML_Processor(
+ * <<
+ *
+ * This text uses syntax that may seem
+ * like XML nodes:
+ *
+ *
+ *
+ *
+ *
+ *
+ * &<>"'
+ *
+ * But! It's all treated as text.
+ *
+ *
+ * XML
+ * );
+ *
+ * $processor->declare_element_as_pcdata('my-pcdata');
+ * $processor->next_tag('my-pcdata');
+ * $processor->next_token();
+ *
+ * // Returns everything inside the
+ * // element as text:
+ * $processor->get_modifiable_text();
+ *
+ * @param string $element_name The name of the element to declare as PCDATA.
+ * @return void
+ */
+ public function declare_element_as_pcdata( $element_name ) {
+ $this->pcdata_elements[ $element_name ] = true;
+ }
+
+ /**
+ * Indicates if the currently matched tag is a PCDATA element.
+ *
+ * @since WP_VERSION
+ *
+ * @return bool Whether the currently matched tag is a PCDATA element.
+ */
+ public function is_pcdata_element() {
+ return array_key_exists( $this->get_tag(), $this->pcdata_elements );
+ }
+
+
+ /**
+ * Finds the next element matching the $query.
+ *
+ * This doesn't currently have a way to represent non-tags and doesn't process
+ * semantic rules for text nodes. For access to the raw tokens consider using
+ * WP_XML_Processor instead.
+ *
* @since WP_VERSION
*
* @param array|string|null $query {
@@ -244,159 +1332,1462 @@ public function next_tag( $query = null ) {
return false;
}
- /*
- * Sets a bookmark in the XML document.
- *
- * Bookmarks represent specific places or tokens in the HTML
- * document, such as a tag opener or closer. When applying
- * edits to a document, such as setting an attribute, the
- * text offsets of that token may shift; the bookmark is
- * kept updated with those shifts and remains stable unless
- * the entire span of text in which the token sits is removed.
- *
- * Release bookmarks when they are no longer needed.
+ /**
+ * Parses the next tag.
*
- * Example:
+ * This will find and start parsing the next tag, including
+ * the opening `<`, the potential closer `/`, and the tag
+ * name. It does not parse the attributes or scan to the
+ * closing `>`; these are left for other methods.
*
- *
- * ^^^^
- * want to note this last item
+ * $p = new WP_XML_Processor( 'Test' );
+ * $p->next_tag( array( 'class_name' => 'test' ) ) === true;
+ * $p->get_attribute( 'data-test-id' ) === '14';
+ * $p->get_attribute( 'enabled' ) === true;
+ * $p->get_attribute( 'aria-label' ) === null;
*
- * $p = new WP_HTML_Tag_Processor( $html );
- * $in_list = false;
- * while ( $p->next_tag( array( 'tag_closers' => $in_list ? 'visit' : 'skip' ) ) ) {
- * if ( 'UL' === $p->get_tag() ) {
- * if ( $p->is_tag_closer() ) {
- * $in_list = false;
- * $p->set_bookmark( 'resume' );
- * if ( $p->seek( 'last-li' ) ) {
- * $p->add_class( 'last-li' );
- * }
- * $p->seek( 'resume' );
- * $p->release_bookmark( 'last-li' );
- * $p->release_bookmark( 'resume' );
- * } else {
- * $in_list = true;
- * }
- * }
+ * $p->next_tag() === false;
+ * $p->get_attribute( 'class' ) === null;
*
- * if ( 'LI' === $p->get_tag() ) {
- * $p->set_bookmark( 'last-li' );
- * }
- * }
+ * @since WP_VERSION
+ *
+ * @param string $name Name of attribute whose value is requested.
+ * @return string|true|null Value of attribute or `null` if not available. Boolean attributes return `true`.
+ */
+ public function get_attribute( $name ) {
+ if (
+ self::STATE_MATCHED_TAG !== $this->parser_state &&
+ self::STATE_XML_DECLARATION !== $this->parser_state
+ ) {
+ return null;
+ }
+
+ // Return any enqueued attribute value updates if they exist.
+ $enqueued_value = $this->get_enqueued_attribute_value( $name );
+ if ( false !== $enqueued_value ) {
+ return $enqueued_value;
+ }
+
+ if ( ! isset( $this->attributes[ $name ] ) ) {
+ return null;
+ }
+
+ $attribute = $this->attributes[ $name ];
+ $raw_value = substr( $this->xml, $attribute->value_starts_at, $attribute->value_length );
+
+ $decoded = WP_XML_Decoder::decode( $raw_value );
+ if ( ! isset( $decoded ) ) {
+ /**
+ * If the attribute contained an invalid value, it's
+ * a fatal error.
+ *
+ * @see WP_XML_Decoder::decode()
+ */
+ $this->last_error = self::ERROR_SYNTAX;
+ _doing_it_wrong(
+ __METHOD__,
+ __( 'Invalid attribute value encountered.' ),
+ 'WP_VERSION'
+ );
+ return false;
+ }
+
+ return $decoded;
+ }
+
+ /**
+ * Gets names of all attributes matching a given prefix in the current tag.
+ *
+ * Note that matching is case-sensitive. This is in accordance with the spec.
+ *
+ * Example:
+ *
+ * $p = new WP_XML_Processor( 'Test' );
+ * $p->next_tag( array( 'class_name' => 'test' ) ) === true;
+ * $p->get_attribute_names_with_prefix( 'data-' ) === array( 'data-ENABLED' );
+ * $p->get_attribute_names_with_prefix( 'DATA-' ) === array( 'DATA-test-id' );
+ * $p->get_attribute_names_with_prefix( 'DAta-' ) === array();
+ *
+ * @since WP_VERSION
+ *
+ * @param string $prefix Prefix of requested attribute names.
+ * @return array|null List of attribute names, or `null` when no tag opener is matched.
+ */
+ public function get_attribute_names_with_prefix( $prefix ) {
+ if (
+ self::STATE_MATCHED_TAG !== $this->parser_state ||
+ $this->is_closing_tag
+ ) {
+ return null;
+ }
+
+ $matches = array();
+ foreach ( array_keys( $this->attributes ) as $attr_name ) {
+ if ( str_starts_with( $attr_name, $prefix ) ) {
+ $matches[] = $attr_name;
+ }
+ }
+ return $matches;
+ }
+
+ /**
+ * Returns the uppercase name of the matched tag.
+ *
+ * Example:
+ *
+ * $p = new WP_XML_Processor( 'Test' );
+ * $p->next_tag() === true;
+ * $p->get_tag() === 'DIV';
+ *
+ * $p->next_tag() === false;
+ * $p->get_tag() === null;
+ *
+ * @since WP_VERSION
+ *
+ * @return string|null Name of currently matched tag in input XML, or `null` if none found.
+ */
+ public function get_tag() {
+ if ( null === $this->tag_name_starts_at ) {
+ return null;
+ }
+
+ $tag_name = substr( $this->xml, $this->tag_name_starts_at, $this->tag_name_length );
+
+ if ( self::STATE_MATCHED_TAG === $this->parser_state ) {
+ return $tag_name;
+ }
+
+ return null;
+ }
+
+ /**
+ * Indicates if the currently matched tag is an empty element tag.
+ *
+ * XML tags ending with a solidus ("/") are parsed as empty elements. They have no
+ * content and no matching closer is expected.
+
+ * @since WP_VERSION
+ *
+ * @return bool Whether the currently matched tag is an empty element tag.
+ */
+ public function is_empty_element() {
+ if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
+ return false;
+ }
+
+ /*
+ * An empty element tag is defined by the solidus at the _end_ of the tag, not the beginning.
+ *
+ * Example:
+ *
+ *
+ * ^ this appears one character before the end of the closing ">".
+ */
+ return '/' === $this->xml[ $this->token_starts_at + $this->token_length - 2 ];
+ }
+
+ /**
+ * Indicates if the current tag token is a tag closer.
+ *
+ * Example:
+ *
+ * $p = new WP_XML_Processor( '' );
+ * $p->next_tag( array( 'tag_name' => 'wp:content', 'tag_closers' => 'visit' ) );
+ * $p->is_tag_closer() === false;
+ *
+ * $p->next_tag( array( 'tag_name' => 'wp:content', 'tag_closers' => 'visit' ) );
+ * $p->is_tag_closer() === true;
+ *
+ * @since WP_VERSION
+ *
+ * @return bool Whether the current tag is a tag closer.
+ */
+ public function is_tag_closer() {
+ return (
+ self::STATE_MATCHED_TAG === $this->parser_state &&
+ $this->is_closing_tag
+ );
+ }
+
+ /**
+ * Indicates the kind of matched token, if any.
+ *
+ * This differs from `get_token_name()` in that it always
+ * returns a static string indicating the type, whereas
+ * `get_token_name()` may return values derived from the
+ * token itself, such as a tag name or processing
+ * instruction tag.
+ *
+ * Possible values:
+ * - `#tag` when matched on a tag.
+ * - `#text` when matched on a text node.
+ * - `#cdata-section` when matched on a CDATA node.
+ * - `#comment` when matched on a comment.
+ * - `#presumptuous-tag` when matched on an empty tag closer.
+ *
+ * @since WP_VERSION
*
- * Bookmarks intentionally hide the internal string offsets
- * to which they refer. They are maintained internally as
- * updates are applied to the HTML document and therefore
- * retain their "position" - the location to which they
- * originally pointed. The inability to use bookmarks with
- * functions like `substr` is therefore intentional to guard
- * against accidentally breaking the HTML.
+ * @return string|null What kind of token is matched, or null.
+ */
+ public function get_token_type() {
+ switch ( $this->parser_state ) {
+ case self::STATE_MATCHED_TAG:
+ return '#tag';
+
+ default:
+ return $this->get_token_name();
+ }
+ }
+
+ /**
+ * Returns the node name represented by the token.
*
- * Because bookmarks allocate memory and require processing
- * for every applied update, they are limited and require
- * a name. They should not be created with programmatically-made
- * names, such as "li_{$index}" with some loop. As a general
- * rule they should only be created with string-literal names
- * like "start-of-section" or "last-paragraph".
+ * This matches the DOM API value `nodeName`. Some values
+ * are static, such as `#text` for a text node, while others
+ * are dynamically generated from the token itself.
*
- * Bookmarks are a powerful tool to enable complicated behavior.
- * Consider double-checking that you need this tool if you are
- * reaching for it, as inappropriate use could lead to broken
- * HTML structure or unwanted processing overhead.
+ * Dynamic names:
+ * - Uppercase tag name for tag matches.
+ *
+ * Note that if the Tag Processor is not matched on a token
+ * then this function will return `null`, either because it
+ * hasn't yet found a token or because it reached the end
+ * of the document without matching a token.
*
* @since WP_VERSION
*
- * @param string $bookmark_name Identifies this particular bookmark.
- * @return bool Whether the bookmark was successfully created.
+ * @return string|null Name of the matched token.
*/
- public function set_bookmark( $bookmark_name ) {
- return parent::set_bookmark( "_{$bookmark_name}" );
+ public function get_token_name() {
+ switch ( $this->parser_state ) {
+ case self::STATE_MATCHED_TAG:
+ return $this->get_tag();
+
+ case self::STATE_TEXT_NODE:
+ return '#text';
+
+ case self::STATE_CDATA_NODE:
+ return '#cdata-section';
+
+ case self::STATE_XML_DECLARATION:
+ return '#xml-declaration';
+
+ case self::STATE_PI_NODE:
+ return '#processing-instructions';
+
+ case self::STATE_COMMENT:
+ return '#comment';
+ }
}
/**
- * Moves the internal cursor in the HTML Processor to a given bookmark's location.
+ * Returns the modifiable text for a matched token, or an empty string.
*
- * Be careful! Seeking backwards to a previous location resets the parser to the
- * start of the document and reparses the entire contents up until it finds the
- * sought-after bookmarked location.
+ * Modifiable text is text content that may be read and changed without
+ * changing the XML structure of the document around it. This includes
+ * the contents of `#text` nodes in the XML as well as the inner
+ * contents of XML comments, Processing Instructions, and others, even
+ * though these nodes aren't part of a parsed DOM tree. They also contain
+ * the contents of SCRIPT and STYLE tags, of TEXTAREA tags, and of any
+ * other section in an XML document which cannot contain XML markup (DATA).
*
- * In order to prevent accidental infinite loops, there's a
- * maximum limit on the number of times seek() can be called.
+ * If a token has no modifiable text then an empty string is returned to
+ * avoid needless crashing or type errors. An empty string does not mean
+ * that a token has modifiable text, and a token with modifiable text may
+ * have an empty string (e.g. a comment with no contents).
+ *
+ * @since WP_VERSION
+ *
+ * @return string
+ */
+ public function get_modifiable_text() {
+ if ( null === $this->text_starts_at ) {
+ return '';
+ }
+
+ $text = substr( $this->xml, $this->text_starts_at, $this->text_length );
+
+ /*
+ * > the XML processor must behave as if it normalized all line breaks in external parsed
+ * > entities (including the document entity) on input, before parsing, by translating both
+ * > the two-character sequence #xD #xA and any #xD that is not followed by #xA to a single
+ * > #xA character.
+ *
+ * See https://www.w3.org/TR/xml/#sec-line-ends
+ */
+ $text = str_replace( array( "\r\n", "\r" ), "\n", $text );
+
+ // Comment data, CDATA sections, and PCData tags contents are not decoded any further.
+ if (
+ self::STATE_CDATA_NODE === $this->parser_state ||
+ self::STATE_COMMENT === $this->parser_state ||
+ $this->is_pcdata_element()
+ ) {
+ return $text;
+ }
+
+ $decoded = WP_XML_Decoder::decode( $text );
+ if ( ! isset( $decoded ) ) {
+ /**
+ * If the attribute contained an invalid value, it's
+ * a fatal error.
+ *
+ * @see WP_XML_Decoder::decode()
+ */
+
+ $this->last_error = self::ERROR_SYNTAX;
+ _doing_it_wrong(
+ __METHOD__,
+ __( 'Invalid text content encountered.' ),
+ 'WP_VERSION'
+ );
+ return false;
+ }
+ return $decoded;
+ }
+
+ public function set_modifiable_text( $new_value ) {
+ switch ( $this->parser_state ) {
+ case self::STATE_TEXT_NODE:
+ case self::STATE_COMMENT:
+ $this->lexical_updates[] = new WP_HTML_Text_Replacement(
+ $this->text_starts_at,
+ $this->text_length,
+ // @TODO This is naive, let's rethink this.
+ htmlspecialchars( $new_value, ENT_XML1, 'UTF-8' )
+ );
+ return true;
+
+ case self::STATE_CDATA_NODE:
+ $this->lexical_updates[] = new WP_HTML_Text_Replacement(
+ $this->text_starts_at,
+ $this->text_length,
+ // @TODO This is naive, let's rethink this.
+ str_replace( ']]>', ']]>', $new_value )
+ );
+ return true;
+ default:
+ _doing_it_wrong(
+ __METHOD__,
+ __( 'Cannot set text content on a non-text node.' ),
+ 'WP_VERSION'
+ );
+ return false;
+ }
+ }
+
+ /**
+ * Updates or creates a new attribute on the currently matched tag with the passed value.
*
- * @throws Exception When unable to allocate a bookmark for the next token in the input HTML document.
+ * For boolean attributes special handling is provided:
+ * - When `true` is passed as the value, then only the attribute name is added to the tag.
+ * - When `false` is passed, the attribute gets removed if it existed before.
+ *
+ * For string attributes, the value is escaped using the `esc_attr` function.
*
* @since WP_VERSION
*
- * @param string $bookmark_name Jump to the place in the document identified by this bookmark name.
- * @return bool Whether the internal cursor was successfully moved to the bookmark's location.
+ * @param string $name The attribute name to target.
+ * @param string|bool $value The new attribute value.
+ * @return bool Whether an attribute value was set.
*/
- public function seek( $bookmark_name ) {
- // Flush any pending updates to the document before beginning.
- $this->get_updated_xml();
- return parent::seek( "_{$bookmark_name}" );
+ public function set_attribute( $name, $value ) {
+ if ( ! is_string( $value ) ) {
+ _doing_it_wrong(
+ __METHOD__,
+ __( 'Non-string attribute values cannot be passed to set_attribute().' ),
+ 'WP_VERSION'
+ );
+ return false;
+ }
+ if (
+ self::STATE_MATCHED_TAG !== $this->parser_state ||
+ $this->is_closing_tag
+ ) {
+ return false;
+ }
+
+ $value = htmlspecialchars( $value, ENT_XML1, 'UTF-8' );
+ $updated_attribute = "{$name}=\"{$value}\"";
+
+ /*
+ * > An attribute name must not appear more than once
+ * > in the same start-tag or empty-element tag.
+ * - XML 1.0 spec
+ *
+ * @see https://www.w3.org/TR/xml/#sec-starttags
+ */
+ if ( isset( $this->attributes[ $name ] ) ) {
+ /*
+ * Update an existing attribute.
+ *
+ * Example – set attribute id to "new" in :
+ *
+ *
+ * ^-------------^
+ * start end
+ * replacement: `id="new"`
+ *
+ * Result:
+ */
+ $existing_attribute = $this->attributes[ $name ];
+ $this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement(
+ $existing_attribute->start,
+ $existing_attribute->length,
+ $updated_attribute
+ );
+ } else {
+ /*
+ * Create a new attribute at the tag's name end.
+ *
+ * Example – add attribute id="new" to :
+ *
+ *
+ * ^
+ * start and end
+ * replacement: ` id="new"`
+ *
+ * Result:
+ */
+ $this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement(
+ $this->tag_name_starts_at + $this->tag_name_length,
+ 0,
+ ' ' . $updated_attribute
+ );
+ }
+
+ return true;
}
/**
- * Removes a bookmark that is no longer needed.
+ * Remove an attribute from the currently-matched tag.
*
- * Releasing a bookmark frees up the small
- * performance overhead it requires.
+ * @since WP_VERSION
+ *
+ * @param string $name The attribute name to remove.
+ * @return bool Whether an attribute was removed.
+ */
+ public function remove_attribute( $name ) {
+ if (
+ self::STATE_MATCHED_TAG !== $this->parser_state ||
+ $this->is_closing_tag
+ ) {
+ return false;
+ }
+
+ /*
+ * If updating an attribute that didn't exist in the input
+ * document, then remove the enqueued update and move on.
+ *
+ * For example, this might occur when calling `remove_attribute()`
+ * after calling `set_attribute()` for the same attribute
+ * and when that attribute wasn't originally present.
+ */
+ if ( ! isset( $this->attributes[ $name ] ) ) {
+ if ( isset( $this->lexical_updates[ $name ] ) ) {
+ unset( $this->lexical_updates[ $name ] );
+ }
+ return false;
+ }
+
+ /*
+ * Removes an existing tag attribute.
+ *
+ * Example – remove the attribute id from :
+ *
+ * ^-------------^
+ * start end
+ * replacement: ``
+ *
+ * Result:
+ */
+ $this->lexical_updates[ $name ] = new WP_HTML_Text_Replacement(
+ $this->attributes[ $name ]->start,
+ $this->attributes[ $name ]->length,
+ ''
+ );
+
+ return true;
+ }
+
+ /**
+ * Returns the string representation of the XML Tag Processor.
*
* @since WP_VERSION
*
- * @param string $bookmark_name Name of the bookmark to remove.
- * @return bool Whether the bookmark already existed before removal.
+ * @see WP_XML_Processor::get_updated_xml()
+ *
+ * @return string The processed XML.
*/
- public function release_bookmark( $bookmark_name ) {
- return parent::release_bookmark( "_{$bookmark_name}" );
+ public function __toString() {
+ return $this->get_updated_xml();
}
/**
- * Checks whether a bookmark with the given name exists.
+ * Returns the string representation of the XML Tag Processor.
*
- * @since 6.5.0
+ * @since WP_VERSION
*
- * @param string $bookmark_name Name to identify a bookmark that potentially exists.
- * @return bool Whether that bookmark exists.
+ * @return string The processed XML.
*/
- public function has_bookmark( $bookmark_name ) {
- return parent::has_bookmark( "_{$bookmark_name}" );
+ public function get_updated_xml() {
+ $requires_no_updating = 0 === count( $this->lexical_updates );
+
+ /*
+ * When there is nothing more to update and nothing has already been
+ * updated, return the original document and avoid a string copy.
+ */
+ if ( $requires_no_updating ) {
+ return $this->xml;
+ }
+
+ /*
+ * Keep track of the position right before the current token. This will
+ * be necessary for reparsing the current token after updating the XML.
+ */
+ $before_current_token = $this->token_starts_at ?? 0;
+
+ /*
+ * 1. Apply the enqueued edits and update all the pointers to reflect those changes.
+ */
+ $before_current_token += $this->apply_lexical_updates( $before_current_token );
+
+ /*
+ * 2. Rewind to before the current tag and reparse to get updated attributes.
+ *
+ * At this point the internal cursor points to the end of the tag name.
+ * Rewind before the tag name starts so that it's as if the cursor didn't
+ * move; a call to `next_tag()` will reparse the recently-updated attributes
+ * and additional calls to modify the attributes will apply at this same
+ * location, but in order to avoid issues with subclasses that might add
+ * behaviors to `next_tag()`, the internal methods should be called here
+ * instead.
+ *
+ * It's important to note that in this specific place there will be no change
+ * because the processor was already at a tag when this was called and it's
+ * rewinding only to the beginning of this very tag before reprocessing it
+ * and its attributes.
+ *
+ *
Previous XMLMore XML
+ * ↑ │ back up by the length of the tag name plus the opening <
+ * └←─┘ back up by strlen("em") + 1 ==> 3
+ */
+ $this->bytes_already_parsed = $before_current_token;
+ $this->parse_next_token();
+
+ return $this->xml;
}
/**
- * Low-level token iteration is not available in WP_XML_Processor
- * as it could lead to undefined behaviors.
+ * Finds the next token in the XML document.
+ *
+ * An XML document can be viewed as a stream of tokens,
+ * where tokens are things like XML tags, XML comments,
+ * text nodes, etc. This method finds the next token in
+ * the XML document and returns whether it found one.
+ *
+ * If it starts parsing a token and reaches the end of the
+ * document then it will seek to the start of the last
+ * token and pause, returning `false` to indicate that it
+ * failed to find a complete token.
*
- * @use WP_XML_Processor::next_tag() instead.
+ * Possible token types, based on the XML specification:
*
- * @return false
+ * - an XML tag
+ * - a text node - the plaintext inside tags.
+ * - a CData section
+ * - an XML comment.
+ * - a DOCTYPE declaration.
+ * - a processing instruction, e.g. ``.
+ *
+ * @return bool Whether a token was parsed.
*/
public function next_token() {
return $this->step();
}
/**
- * Steps through the XML document and stop at the next tag, if any.
+ * Moves the internal cursor to the next token in the XML document
+ * according to the XML specification.
+ *
+ * It considers the current XML context (prolog, element, or misc)
+ * and only expects the nodes that are allowed in that context.
*
* @since WP_VERSION
*
- * @param string $node_to_process Whether to parse the next node or reprocess the current node.
- * @return bool Whether a tag was matched.
+ * @access private
+ *
+ * @param int $node_to_process Whether to process the next node or
+ * reprocess the current node, e.g. using another parser context.
+ * @return bool Whether a token was parsed.
*/
private function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
// Refuse to proceed if there was a previous error.
@@ -406,8 +2797,8 @@ private function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
// Finish stepping when there are no more tokens in the document.
if (
- WP_XML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
- WP_XML_Tag_Processor::STATE_COMPLETE === $this->parser_state
+ WP_XML_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
+ WP_XML_Processor::STATE_COMPLETE === $this->parser_state
) {
return false;
}
@@ -416,17 +2807,15 @@ private function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
if ( $this->is_empty_element() ) {
$this->pop_open_element();
}
- $this->base_class_next_token();
}
- static $i = 0;
switch ( $this->parser_context ) {
case self::IN_PROLOG_CONTEXT:
- return $this->step_in_prolog();
+ return $this->step_in_prolog( $node_to_process );
case self::IN_ELEMENT_CONTEXT:
- return $this->step_in_element();
+ return $this->step_in_element( $node_to_process );
case self::IN_MISC_CONTEXT:
- return $this->step_in_misc();
+ return $this->step_in_misc( $node_to_process );
default:
$this->last_error = self::ERROR_UNSUPPORTED;
return false;
@@ -439,19 +2828,31 @@ private function step( $node_to_process = self::PROCESS_NEXT_NODE ) {
* @since WP_VERSION
*
* @see https://www.w3.org/TR/xml/#NT-document.
- * @see WP_XML_Tag_Processor::step
+ * @see WP_XML_Processor::step
*
* @return bool Whether a node was found.
*/
- private function step_in_prolog() {
+ private function step_in_prolog( $node_to_process = self::PROCESS_NEXT_NODE ) {
+ if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
+ $has_next_node = $this->parse_next_token();
+ if (
+ false === $has_next_node &&
+ ! $this->expecting_more_input
+ ) {
+ $this->last_error = self::ERROR_SYNTAX;
+ _doing_it_wrong( __METHOD__, 'The root element was not found.', 'WP_VERSION' );
+ return false;
+ }
+ }
+
// XML requires a root element. If we've reached the end of data in the prolog stage,
// before finding a root element, then the document is incomplete.
- if ( WP_XML_Tag_Processor::STATE_COMPLETE === $this->parser_state ) {
- $this->parser_state = self::STATE_INCOMPLETE_INPUT;
+ if ( WP_XML_Processor::STATE_COMPLETE === $this->parser_state ) {
+ $this->set_incomplete_input_or_parse_error();
return false;
}
// Do not step if we paused due to an incomplete input.
- if ( WP_XML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
+ if ( WP_XML_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
return false;
}
switch ( $this->get_token_type() ) {
@@ -484,20 +2885,25 @@ private function step_in_prolog() {
* @since WP_VERSION
*
* @see https://www.w3.org/TR/xml/#NT-document.
- * @see WP_XML_Tag_Processor::step
+ * @see WP_XML_Processor::step
*
* @return bool Whether a node was found.
*/
- private function step_in_element() {
- // An XML document isn't complete until the root element is closed.
- if ( self::STATE_COMPLETE === $this->parser_state &&
- count( $this->stack_of_open_elements ) > 0
- ) {
- $this->parser_state = self::STATE_INCOMPLETE_INPUT;
- return false;
+ private function step_in_element( $node_to_process = self::PROCESS_NEXT_NODE ) {
+ if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
+ $has_next_node = $this->parse_next_token();
+ if (
+ false === $has_next_node &&
+ ! $this->expecting_more_input
+ ) {
+ $this->last_error = self::ERROR_SYNTAX;
+ _doing_it_wrong( __METHOD__, 'A tag was not closed.', 'WP_VERSION' );
+ return false;
+ }
}
+
// Do not step if we paused due to an incomplete input.
- if ( WP_XML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
+ if ( WP_XML_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
return false;
}
@@ -541,11 +2947,28 @@ private function step_in_element() {
* @since WP_VERSION
*
* @see https://www.w3.org/TR/xml/#NT-document.
- * @see WP_XML_Tag_Processor::step
+ * @see WP_XML_Processor::step
*
* @return bool Whether a node was found.
*/
- private function step_in_misc() {
+ private function step_in_misc( $node_to_process = self::PROCESS_NEXT_NODE ) {
+ if ( self::PROCESS_NEXT_NODE === $node_to_process ) {
+ $has_next_node = $this->parse_next_token();
+ if (
+ false === $has_next_node &&
+ ! $this->expecting_more_input
+ ) {
+ // Parsing is complete.
+ $this->parser_state = self::STATE_COMPLETE;
+ return true;
+ }
+ }
+
+ // Do not step if we paused due to an incomplete input.
+ if ( WP_XML_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
+ return false;
+ }
+
if ( self::STATE_COMPLETE === $this->parser_state ) {
return true;
}
@@ -559,33 +2982,13 @@ private function step_in_misc() {
$whitespaces = strspn( $text, " \t\n\r" );
if ( strlen( $text ) !== $whitespaces ) {
$this->last_error = self::ERROR_SYNTAX;
- _doing_it_wrong( __METHOD__, 'Unexpected token type in prolog stage.', 'WP_VERSION' );
+ _doing_it_wrong( __METHOD__, 'Unexpected token type "' . $this->get_token_type() . '" in misc stage.', 'WP_VERSION' );
return false;
}
return $this->step();
default:
- /*
- * If we're at the end of the document, we can never be sure
- * whether it's complete or are we still waiting for a comment
- * or a processing directive. Let's mark the parse as complete
- * and let the API consumer decide whether they want to re-parse
- * once more data becomes available in.
- */
- if (
- WP_XML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state &&
- $this->is_incomplete_text_node
- ) {
- $text = $this->get_modifiable_text();
- // Non-whitespace characters are not allowed after the root element was closed.
- $contains_only_whitespace = strlen( $text ) === strspn( $text, " \t\n\r" );
- if ( $contains_only_whitespace ) {
- $this->parser_state = self::STATE_COMPLETE;
- return false;
- }
- }
-
$this->last_error = self::ERROR_SYNTAX;
- _doing_it_wrong( __METHOD__, 'Unexpected token type in misc stage.', 'WP_VERSION' );
+ _doing_it_wrong( __METHOD__, 'Unexpected token type "' . $this->get_token_type() . '" in misc stage.', 'WP_VERSION' );
return false;
}
}
@@ -622,7 +3025,7 @@ public function get_breadcrumbs() {
*
* Example:
*
- * $processor = new WP_XML_Tag_Processor( '' );
+ * $processor = new WP_XML_Processor( '' );
* $processor->next_tag( 'img' );
* true === $processor->matches_breadcrumbs( array( 'content', 'image' ) );
* true === $processor->matches_breadcrumbs( array( 'wp:post', 'content', 'image' ) );
@@ -707,10 +3110,165 @@ private function push_open_element( $tag_name ) {
);
}
- private function last_open_element() {
- return end( $this->stack_of_open_elements );
+ private function set_incomplete_input_or_parse_error() {
+ if ( $this->expecting_more_input ) {
+ $this->parser_state = self::STATE_INCOMPLETE_INPUT;
+ } else {
+ $this->parser_state = self::STATE_INVALID_DOCUMENT;
+ $this->last_error = self::ERROR_SYNTAX;
+ // @TODO: Add a more specific error message.
+ _doing_it_wrong( __METHOD__, 'Unexpected syntax encountered.', 'WP_VERSION' );
+ }
}
+ /**
+ * Parser Ready State.
+ *
+ * Indicates that the parser is ready to run and waiting for a state transition.
+ * It may not have started yet, or it may have just finished parsing a token and
+ * is ready to find the next one.
+ *
+ * @since WP_VERSION
+ *
+ * @access private
+ */
+ const STATE_READY = 'STATE_READY';
+
+ /**
+ * Parser Complete State.
+ *
+ * Indicates that the parser has reached the end of the document and there is
+ * nothing left to scan. It finished parsing the last token completely.
+ *
+ * @since WP_VERSION
+ *
+ * @access private
+ */
+ const STATE_COMPLETE = 'STATE_COMPLETE';
+
+ /**
+ * Parser Incomplete Input State.
+ *
+ * Indicates that the parser has reached the end of the document before finishing
+ * a token. It started parsing a token but there is a possibility that the input
+ * XML document was truncated in the middle of a token.
+ *
+ * The parser is reset at the start of the incomplete token and has paused. There
+ * is nothing more than can be scanned unless provided a more complete document.
+ *
+ * @since WP_VERSION
+ *
+ * @access private
+ */
+ const STATE_INCOMPLETE_INPUT = 'STATE_INCOMPLETE_INPUT';
+
+ /**
+ * Parser Invalid Input State.
+ *
+ * Indicates that the parsed xml document contains malformed input and cannot be parsed.
+ *
+ * @since WP_VERSION
+ *
+ * @access private
+ */
+ const STATE_INVALID_DOCUMENT = 'STATE_INVALID_DOCUMENT';
+
+ /**
+ * Parser Matched Tag State.
+ *
+ * Indicates that the parser has found an XML tag and it's possible to get
+ * the tag name and read or modify its attributes (if it's not a closing tag).
+ *
+ * @since WP_VERSION
+ *
+ * @access private
+ */
+ const STATE_MATCHED_TAG = 'STATE_MATCHED_TAG';
+
+ /**
+ * Parser Text Node State.
+ *
+ * Indicates that the parser has found a text node and it's possible
+ * to read and modify that text.
+ *
+ * @since WP_VERSION
+ *
+ * @access private
+ */
+ const STATE_TEXT_NODE = 'STATE_TEXT_NODE';
+
+ /**
+ * Parser CDATA Node State.
+ *
+ * Indicates that the parser has found a CDATA node and it's possible
+ * to read and modify its modifiable text. Note that in XML there are
+ * no CDATA nodes outside of foreign content (SVG and MathML). Outside
+ * of foreign content, they are treated as XML comments.
+ *
+ * @since WP_VERSION
+ *
+ * @access private
+ */
+ const STATE_CDATA_NODE = 'STATE_CDATA_NODE';
+
+ /**
+ * Indicates that the parser has found an XML processing instruction.
+ *
+ * @since WP_VERSION
+ *
+ * @access private
+ */
+ const STATE_PI_NODE = 'STATE_PI_NODE';
+
+ /**
+ * Indicates that the parser has found an XML declaration
+ *
+ * @since WP_VERSION
+ *
+ * @access private
+ */
+ const STATE_XML_DECLARATION = 'STATE_XML_DECLARATION';
+
+ /**
+ * Indicates that the parser has found an XML comment and it's
+ * possible to read and modify its modifiable text.
+ *
+ * @since WP_VERSION
+ *
+ * @access private
+ */
+ const STATE_COMMENT = 'STATE_COMMENT';
+
+ /**
+ * Indicates that the parser encountered unsupported syntax and has bailed.
+ *
+ * @since WP_VERSION
+ *
+ * @var string
+ */
+ const ERROR_SYNTAX = 'syntax';
+
+ /**
+ * Indicates that the provided XML document contains a declaration that is
+ * unsupported by the parser.
+ *
+ * @since WP_VERSION
+ *
+ * @var string
+ */
+ const ERROR_UNSUPPORTED = 'unsupported';
+
+ /**
+ * Indicates that the parser encountered more XML tokens than it
+ * was able to process and has bailed.
+ *
+ * @since WP_VERSION
+ *
+ * @var string
+ */
+ const ERROR_EXCEEDED_MAX_BOOKMARKS = 'exceeded-max-bookmarks';
+
+
/**
* Indicates that we're parsing the `prolog` part of the XML
* document.
diff --git a/packages/playground/data-liberation/src/xml-api/WP_XML_Tag_Processor.php b/packages/playground/data-liberation/src/xml-api/WP_XML_Tag_Processor.php
deleted file mode 100644
index 26ac382e2e..0000000000
--- a/packages/playground/data-liberation/src/xml-api/WP_XML_Tag_Processor.php
+++ /dev/null
@@ -1,2837 +0,0 @@
-). We're
- * starting with 1.0, however, because most that's what most WXR
- * files declare.
- *
- * ## Future work
- *
- * @TODO: Skip over the following syntax elements:
- * *
- *
- * or
- *
- *
- *
- * ' >
- * %xx;
- * ]>
- *
- * @TODO: Support XML 1.1.
- * @package WordPress
- * @subpackage HTML-API
- * @since WP_VERSION
- */
-
-/**
- * Core class used to modify attributes in an XML document for tags matching a query.
- *
- * ## Usage
- *
- * Use of this class requires three steps:
- *
- * 1. Create a new class instance with your input XML document.
- * 2. Find the tag(s) you are looking for.
- * 3. Request changes to the attributes in those tag(s).
- *
- * Example:
- *
- * $tags = new WP_XML_Tag_Processor( $xml );
- * if ( $tags->next_tag( 'wp:option' ) ) {
- * $tags->set_attribute( 'selected', 'yes' );
- * }
- *
- * ### Finding tags
- *
- * The `next_tag()` function moves the internal cursor through
- * your input XML document until it finds a tag meeting any of
- * the supplied restrictions in the optional query argument. If
- * no argument is provided then it will find the next XML tag,
- * regardless of what kind it is.
- *
- * If you want to _find whatever the next tag is_:
- *
- * $tags->next_tag();
- *
- * | Goal | Query |
- * |-----------------------------------------------------------|---------------------------------------------------------------------------------|
- * | Find any tag. | `$tags->next_tag();` |
- * | Find next image tag. | `$tags->next_tag( array( 'tag_name' => 'wp:image' ) );` |
- * | Find next image tag (without passing the array). | `$tags->next_tag( 'wp:image' );` |
- *
- * If a tag was found meeting your criteria then `next_tag()`
- * will return `true` and you can proceed to modify it. If it
- * returns `false`, however, it failed to find the tag and
- * moved the cursor to the end of the file.
- *
- * Once the cursor reaches the end of the file the processor
- * is done and if you want to reach an earlier tag you will
- * need to recreate the processor and start over, as it's
- * unable to back up or move in reverse.
- *
- * See the section on bookmarks for an exception to this
- * no-backing-up rule.
- *
- * #### Custom queries
- *
- * Sometimes it's necessary to further inspect an XML tag than
- * the query syntax here permits. In these cases one may further
- * inspect the search results using the read-only functions
- * provided by the processor or external state or variables.
- *
- * Example:
- *
- * // Paint up to the first five `wp:musician` or `wp:actor` tags marked with the "jazzy" style.
- * $remaining_count = 5;
- * while ( $remaining_count > 0 && $tags->next_tag() ) {
- * if (
- * ( 'wp:musician' === $tags->get_tag() || 'wp:actor' === $tags->get_tag() ) &&
- * 'jazzy' === $tags->get_attribute( 'data-style' )
- * ) {
- * $tags->set_attribute( 'wp:theme-style', 'theme-style-everest-jazz' );
- * $remaining_count--;
- * }
- * }
- *
- * `get_attribute()` will return `null` if the attribute wasn't present
- * on the tag when it was called. It may return `""` (the empty string)
- * in cases where the attribute was present but its value was empty.
- * For boolean attributes, those whose name is present but no value is
- * given, it will return `true` (the only way to set `false` for an
- * attribute is to remove it).
- *
- * #### When matching fails
- *
- * When `next_tag()` returns `false` it could mean different things:
- *
- * - The requested tag wasn't found in the input document.
- * - The input document ended in the middle of an XML syntax element.
- *
- * When a document ends in the middle of a syntax element it will pause
- * the processor. This is to make it possible in the future to extend the
- * input document and proceed - an important requirement for chunked
- * streaming parsing of a document.
- *
- * Example:
- *
- * $processor = new WP_XML_Tag_Processor( 'This next_tag( array( 'tag_name' => 'wp:todo-list' ) ) ) {
- * $p->set_bookmark( 'list-start' );
- * while ( $p->next_tag( array( 'tag_closers' => 'visit' ) ) ) {
- * if ( 'wp:todo' === $p->get_tag() && $p->is_tag_closer() ) {
- * $p->set_bookmark( 'list-end' );
- * $p->seek( 'list-start' );
- * $p->set_attribute( 'data-contained-todos', (string) $total_todos );
- * $total_todos = 0;
- * $p->seek( 'list-end' );
- * break;
- * }
- *
- * if ( 'wp:todo-item' === $p->get_tag() && ! $p->is_tag_closer() ) {
- * $total_todos++;
- * }
- * }
- * }
- *
- * ## Tokens and finer-grained processing.
- *
- * It's possible to scan through every lexical token in the
- * XML document using the `next_token()` function. This
- * alternative form takes no argument and provides no built-in
- * query syntax.
- *
- * Example:
- *
- * $title = '(untitled)';
- * $text = '';
- * while ( $processor->next_token() ) {
- * switch ( $processor->get_token_name() ) {
- * case '#text':
- * $text .= $processor->get_modifiable_text();
- * break;
- *
- * case 'wp:new-line':
- * $text .= "\n";
- * break;
- *
- * case 'wp:title':
- * $title = $processor->get_modifiable_text();
- * break;
- * }
- * }
- * return trim( "# {$title}\n\n{$text}" );
- *
- * ### Tokens and _modifiable text_.
- *
- * #### Other tokens with modifiable text.
- *
- * There are also non-elements which are void/self-closing in nature and contain
- * modifiable text that is part of that individual syntax token itself.
- *
- * - `#text` nodes, whose entire token _is_ the modifiable text.
- * - XML comments and tokens that become comments due to some syntax error. The
- * text for these tokens is the portion of the comment inside of the syntax.
- * E.g. for `` the text is `" comment "` (note the spaces are included).
- * - `CDATA` sections, whose text is the content inside of the section itself. E.g. for
- * `` the text is `"some content"`.
- * - XML Processing instruction nodes like `` (with restrictions [1]).
- *
- * [1]: XML requires "xml" as a processing instruction name. The Tag Processor captures the entire
- * processing instruction as a single token up to the closing `?>`.
- *
- * ## Design and limitations
- *
- * The Tag Processor is designed to linearly scan XML documents and tokenize
- * XML tags and their attributes. It's designed to do this as efficiently as
- * possible without compromising parsing integrity. Therefore it will be
- * slower than some methods of modifying XML, such as those incorporating
- * over-simplified PCRE patterns, but will not introduce the defects and
- * failures that those methods bring in, which lead to broken page renders
- * and often to security vulnerabilities. On the other hand, it will be faster
- * than full-blown XML parsers such as DOMDocument and use considerably
- * less memory. It requires a negligible memory overhead, enough to consider
- * it a zero-overhead system.
- *
- * The performance characteristics are maintained by avoiding tree construction.
- *
- * The Tag Processor's checks the most important aspects of XML integrity as it scans
- * through the document. It verifies that a single root element exists, that are
- * no unclosed tags, and that each opener tag has a corresponding closer. It also
- * ensures no duplicate attributes exist on a single tag.
- *
- * At the same time, The Tag Processor also skips expensive validation of XML entities
- * in the document. The Tag Processor will initially pass through the invalid entity references
- * and only fail when the developer attempts to read their value. If that doesn't happen,
- * the invalid values will be left untouched in the final document.
- *
- * Most operations within the Tag Processor are designed to minimize the difference
- * between an input and output document for any given change. For example, the
- * `set_attribure` and `remove_attribute` methods preserve whitespace and the attribute
- * ordering within the element definition. An exception to this rule is that all attribute
- * updates store their values as double-quoted strings, meaning that attributes on input with
- * single-quoted or unquoted values will appear in the output with double-quotes.
- *
- * ### Text Encoding
- *
- * The Tag Processor assumes that the input XML document is encoded with a
- * UTF-8 encoding and will refuse to process documents that declare other encodings.
- *
- * @since WP_VERSION
- */
-class WP_XML_Tag_Processor {
- /**
- * The maximum number of bookmarks allowed to exist at
- * any given time.
- *
- * @since WP_VERSION
- * @var int
- *
- * @see WP_XML_Tag_Processor::set_bookmark()
- */
- const MAX_BOOKMARKS = 10;
-
- /**
- * Maximum number of times seek() can be called.
- * Prevents accidental infinite loops.
- *
- * @since WP_VERSION
- * @var int
- *
- * @see WP_XML_Tag_Processor::seek()
- */
- const MAX_SEEK_OPS = 1000;
-
- /**
- * The XML document to parse.
- *
- * @since WP_VERSION
- * @var string
- */
- public $xml;
-
- /**
- * The last query passed to next_tag().
- *
- * @since WP_VERSION
- * @var array|null
- */
- private $last_query;
-
- /**
- * The tag name this processor currently scans for.
- *
- * @since WP_VERSION
- * @var string|null
- */
- private $sought_tag_name;
-
- /**
- * The match offset this processor currently scans for.
- *
- * @since WP_VERSION
- * @var int|null
- */
- private $sought_match_offset;
-
- /**
- * Whether to visit tag closers, e.g. , when walking an input document.
- *
- * @since WP_VERSION
- * @var bool
- */
- private $stop_on_tag_closers;
-
- /**
- * Specifies mode of operation of the parser at any given time.
- *
- * | State | Meaning |
- * | ----------------|------------------------------------------------------------------------|
- * | *Ready* | The parser is ready to run. |
- * | *Complete* | There is nothing left to parse. |
- * | *Incomplete* | The XML ended in the middle of a token; nothing more can be parsed. |
- * | *Matched tag* | Found an XML tag; it's possible to modify its attributes. |
- * | *Text node* | Found a #text node; this is plaintext and modifiable. |
- * | *CDATA node* | Found a CDATA section; this is modifiable. |
- * | *PI node* | Found a processing instruction; this is modifiable. |
- * | *XML declaration* | Found an XML declaration; this is modifiable. |
- * | *Comment* | Found a comment or bogus comment; this is modifiable. |
- *
- * @since WP_VERSION
- *
- * @see WP_XML_Tag_Processor::STATE_READY
- * @see WP_XML_Tag_Processor::STATE_COMPLETE
- * @see WP_XML_Tag_Processor::STATE_INCOMPLETE_INPUT
- * @see WP_XML_Tag_Processor::STATE_MATCHED_TAG
- * @see WP_XML_Tag_Processor::STATE_TEXT_NODE
- * @see WP_XML_Tag_Processor::STATE_CDATA_NODE
- * @see WP_XML_Tag_Processor::STATE_PI_NODE
- * @see WP_XML_Tag_Processor::STATE_XML_DECLARATION
- * @see WP_XML_Tag_Processor::STATE_COMMENT
- *
- * @var string
- */
- protected $parser_state = self::STATE_READY;
-
- /**
- * Whether we stopped at an incomplete text node.
- *
- * If we are before the last tag in the document, every text
- * node is incomplete until we find the next tag. However,
- * if we are after the last tag, an incomplete all-whitespace
- * node may either mean we're the end of the document or
- * that we're still waiting for more data/
- *
- * This flag allows us to differentiate between these two
- * cases in context-aware APIs such as WP_XML_Processor.
- *
- * @var bool
- */
- protected $is_incomplete_text_node = false;
-
- /**
- * How many bytes from the original XML document have been read and parsed.
- *
- * This value points to the latest byte offset in the input document which
- * has been already parsed. It is the internal cursor for the Tag Processor
- * and updates while scanning through the XML tokens.
- *
- * @since WP_VERSION
- * @var int
- */
- public $bytes_already_parsed = 0;
-
- /**
- * Byte offset in input document where current token starts.
- *
- * Example:
- *
- * ...
- * 01234
- * - token starts at 0
- *
- * @since WP_VERSION
- *
- * @var int|null
- */
- protected $token_starts_at;
-
- /**
- * Byte length of current token.
- *
- * Example:
- *
- * ...
- * 012345678901234
- * - token length is 14 - 0 = 14
- *
- * a is a token.
- * 0123456789 123456789 123456789
- * - token length is 17 - 2 = 15
- *
- * @since WP_VERSION
- *
- * @var int|null
- */
- private $token_length;
-
- /**
- * Byte offset in input document where current tag name starts.
- *
- * Example:
- *
- * ...
- * 01234
- * - tag name starts at 1
- *
- * @since WP_VERSION
- *
- * @var int|null
- */
- private $tag_name_starts_at;
-
- /**
- * Byte length of current tag name.
- *
- * Example:
- *
- * ...
- * 01234
- * --- tag name length is 3
- *
- * @since WP_VERSION
- *
- * @var int|null
- */
- private $tag_name_length;
-
- /**
- * Byte offset into input document where current modifiable text starts.
- *
- * @since WP_VERSION
- *
- * @var int
- */
- private $text_starts_at;
-
- /**
- * Byte length of modifiable text.
- *
- * @since WP_VERSION
- *
- * @var string
- */
- private $text_length;
-
- /**
- * Whether the current tag is an opening tag, e.g. , or a closing tag, e.g. .
- *
- * @var bool
- */
- private $is_closing_tag;
-
- /**
- * Stores an explanation for why something failed, if it did.
- *
- * @see self::get_last_error
- *
- * @since WP_VERSION
- *
- * @var string|null
- */
- protected $last_error = null;
-
- /**
- * Lazily-built index of attributes found within an XML tag, keyed by the attribute name.
- *
- * Example:
- *
- * // Supposing the parser is working through this content
- * // and stops after recognizing the `id` attribute.
- * //
- * // ^ parsing will continue from this point.
- * $this->attributes = array(
- * 'id' => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false )
- * );
- *
- * // When picking up parsing again, or when asking to find the
- * // `class` attribute we will continue and add to this array.
- * $this->attributes = array(
- * 'id' => new WP_HTML_Attribute_Token( 'id', 9, 6, 5, 11, false ),
- * 'class' => new WP_HTML_Attribute_Token( 'class', 23, 7, 17, 13, false )
- * );
- *
- * @since WP_VERSION
- * @var WP_HTML_Attribute_Token[]
- */
- private $attributes = array();
-
- /**
- * Tracks a semantic location in the original XML which
- * shifts with updates as they are applied to the document.
- *
- * @since WP_VERSION
- * @var WP_HTML_Span[]
- */
- protected $bookmarks = array();
-
- /**
- * Lexical replacements to apply to input XML document.
- *
- * "Lexical" in this class refers to the part of this class which
- * operates on pure text _as text_ and not as XML. There's a line
- * between the public interface, with XML-semantic methods like
- * `set_attribute` and `add_class`, and an internal state that tracks
- * text offsets in the input document.
- *
- * When higher-level XML methods are called, those have to transform their
- * operations (such as setting an attribute's value) into text diffing
- * operations (such as replacing the sub-string from indices A to B with
- * some given new string). These text-diffing operations are the lexical
- * updates.
- *
- * As new higher-level methods are added they need to collapse their
- * operations into these lower-level lexical updates since that's the
- * Tag Processor's internal language of change. Any code which creates
- * these lexical updates must ensure that they do not cross XML syntax
- * boundaries, however, so these should never be exposed outside of this
- * class or any classes which intentionally expand its functionality.
- *
- * These are enqueued while editing the document instead of being immediately
- * applied to avoid processing overhead, string allocations, and string
- * copies when applying many updates to a single document.
- *
- * Example:
- *
- * // Replace an attribute stored with a new value, indices
- * // sourced from the lazily-parsed XML recognizer.
- * $start = $attributes['src']->start;
- * $length = $attributes['src']->length;
- * $modifications[] = new WP_HTML_Text_Replacement( $start, $length, $new_value );
- *
- * // Correspondingly, something like this will appear in this array.
- * $lexical_updates = array(
- * WP_HTML_Text_Replacement( 14, 28, 'https://my-site.my-domain/wp-content/uploads/2014/08/kittens.jpg' )
- * );
- *
- * @since WP_VERSION
- * @var WP_HTML_Text_Replacement[]
- */
- protected $lexical_updates = array();
-
- /**
- * Tracks and limits `seek()` calls to prevent accidental infinite loops.
- *
- * @since WP_VERSION
- * @var int
- *
- * @see WP_XML_Tag_Processor::seek()
- */
- protected $seek_count = 0;
-
- public $had_previous_chunks = false;
-
- /**
- * Constructor.
- *
- * @since WP_VERSION
- *
- * @param string $xml XML to process.
- */
- public function __construct( $xml ) {
- $this->xml = $xml;
- }
-
- /**
- * Finds the next element matching the $query.
- *
- * This doesn't currently have a way to represent non-tags and doesn't process
- * semantic rules for text nodes.
- *
- * @since WP_VERSION
- *
- * @param array|string|null $query {
- * Optional. Which element name to find. Default is to find any tag.
- *
- * @type string|null $tag_name Which tag to find, or `null` for "any tag."
- * @type int|null $match_offset Find the Nth tag matching all search criteria.
- * 1 for "first" tag, 3 for "third," etc.
- * Defaults to first tag.
- * @type string|null $tag_closers "visit" or "skip": whether to stop on tag closers, e.g. .
- * }
- * @return bool Whether a tag was matched.
- */
- public function next_tag( $query = null ) {
- $this->parse_query( $query );
- $already_found = 0;
-
- do {
- if ( false === $this->base_class_next_token() ) {
- return false;
- }
-
- if ( self::STATE_MATCHED_TAG !== $this->parser_state ) {
- continue;
- }
-
- if ( $this->matches() ) {
- ++$already_found;
- }
- } while ( $already_found < $this->sought_match_offset );
-
- return true;
- }
-
- /**
- * Finds the next token in the XML document.
- *
- * An XML document can be viewed as a stream of tokens,
- * where tokens are things like XML tags, XML comments,
- * text nodes, etc. This method finds the next token in
- * the XML document and returns whether it found one.
- *
- * If it starts parsing a token and reaches the end of the
- * document then it will seek to the start of the last
- * token and pause, returning `false` to indicate that it
- * failed to find a complete token.
- *
- * Possible token types, based on the XML specification:
- *
- * - an XML tag, whether opening, closing, or void.
- * - a text node - the plaintext inside tags.
- * - an XML comment.
- * - a processing instruction, e.g. ``.
- *
- * The Tag Processor currently only supports the tag token.
- *
- * @since WP_VERSION
- *
- * @access private
- *
- * @return bool Whether a token was parsed.
- */
- public function next_token() {
- return $this->base_class_next_token();
- }
-
- /**
- * Internal method which finds the next token in the HTML document.
- *
- * This method is a protected internal function which implements the logic for
- * finding the next token in a document. It exists so that the parser can update
- * its state without affecting the location of the cursor in the document and
- * without triggering subclass methods for things like `next_token()`, e.g. when
- * applying patches before searching for the next token.
- *
- * @since 6.5.0
- *
- * @access private
- *
- * @return bool Whether a token was parsed.
- */
- protected function base_class_next_token() {
- $was_at = $this->bytes_already_parsed;
- $this->after_tag();
-
- // Don't proceed if there's nothing more to scan.
- if (
- self::STATE_COMPLETE === $this->parser_state ||
- self::STATE_INCOMPLETE_INPUT === $this->parser_state ||
- null !== $this->last_error
- ) {
- return false;
- }
-
- /*
- * The next step in the parsing loop determines the parsing state;
- * clear it so that state doesn't linger from the previous step.
- */
- $this->parser_state = self::STATE_READY;
-
- if ( $this->bytes_already_parsed >= strlen( $this->xml ) ) {
- $this->parser_state = self::STATE_COMPLETE;
- return false;
- }
-
- // Find the next tag if it exists.
- if ( false === $this->parse_next_tag() ) {
- if ( self::STATE_INCOMPLETE_INPUT === $this->parser_state ) {
- $this->bytes_already_parsed = $was_at;
- }
-
- return false;
- }
-
- if ( null !== $this->last_error ) {
- return false;
- }
-
- /*
- * For legacy reasons the rest of this function handles tags and their
- * attributes. If the processor has reached the end of the document
- * or if it matched any other token then it should return here to avoid
- * attempting to process tag-specific syntax.
- */
- if (
- self::STATE_INCOMPLETE_INPUT !== $this->parser_state &&
- self::STATE_COMPLETE !== $this->parser_state &&
- self::STATE_MATCHED_TAG !== $this->parser_state
- ) {
- return true;
- }
-
- if ( $this->is_closing_tag ) {
- $this->skip_whitespace();
- } else {
- // Parse all of its attributes.
- while ( $this->parse_next_attribute() ) {
- continue;
- }
- }
-
- if ( null !== $this->last_error ) {
- return false;
- }
-
- // Ensure that the tag closes before the end of the document.
- if (
- self::STATE_INCOMPLETE_INPUT === $this->parser_state ||
- $this->bytes_already_parsed >= strlen( $this->xml )
- ) {
- // Does this appropriately clear state (parsed attributes)?
- $this->parser_state = self::STATE_INCOMPLETE_INPUT;
- $this->bytes_already_parsed = $was_at;
-
- return false;
- }
-
- $tag_ends_at = strpos( $this->xml, '>', $this->bytes_already_parsed );
- if ( false === $tag_ends_at ) {
- $this->parser_state = self::STATE_INCOMPLETE_INPUT;
- $this->bytes_already_parsed = $was_at;
-
- return false;
- }
-
- if ( $this->is_closing_tag && $tag_ends_at !== $this->bytes_already_parsed ) {
- $this->last_error = self::ERROR_SYNTAX;
- _doing_it_wrong(
- __METHOD__,
- __( 'Invalid closing tag encountered.' ),
- 'WP_VERSION'
- );
- return false;
- }
-
- $this->parser_state = self::STATE_MATCHED_TAG;
- $this->bytes_already_parsed = $tag_ends_at + 1;
- $this->token_length = $this->bytes_already_parsed - $this->token_starts_at;
-
- /*
- * If we are in a PCData element, everything until the closer
- * is considered text.
- */
- if ( ! $this->is_pcdata_element() ) {
- return true;
- }
-
- /*
- * Preserve the opening tag pointers, as these will be overwritten
- * when finding the closing tag. They will be reset after finding
- * the closing to tag to point to the opening of the special atomic
- * tag sequence.
- */
- $tag_name_starts_at = $this->tag_name_starts_at;
- $tag_name_length = $this->tag_name_length;
- $tag_ends_at = $this->token_starts_at + $this->token_length;
- $attributes = $this->attributes;
-
- $found_closer = $this->skip_pcdata( $this->get_tag() );
-
- // Closer not found, the document is incomplete.
- if ( false === $found_closer ) {
- $this->parser_state = self::STATE_INCOMPLETE_INPUT;
- $this->bytes_already_parsed = $was_at;
- return false;
- }
-
- /*
- * The values here look like they reference the opening tag but they reference
- * the closing tag instead. This is why the opening tag values were stored
- * above in a variable. It reads confusingly here, but that's because the
- * functions that skip the contents have moved all the internal cursors past
- * the inner content of the tag.
- */
- $this->token_starts_at = $was_at;
- $this->token_length = $this->bytes_already_parsed - $this->token_starts_at;
- $this->text_starts_at = $tag_ends_at;
- $this->text_length = $this->tag_name_starts_at - $this->text_starts_at;
- $this->tag_name_starts_at = $tag_name_starts_at;
- $this->tag_name_length = $tag_name_length;
- $this->attributes = $attributes;
-
- return true;
- }
-
- /**
- * Whether the processor paused because the input XML document ended
- * in the middle of a syntax element, such as in the middle of a tag.
- *
- * Example:
- *
- * $processor = new WP_XML_Tag_Processor( '
Surprising fact you may no…
- * ^ ^
- * \-|-- it shifts with edits
- *
- * Bookmarks provide the ability to seek to a previously-scanned
- * place in the XML document. This avoids the need to re-scan
- * the entire document.
- *
- * Example:
- *
- *
- * ↑ │ back up by the length of the tag name plus the opening <
- * └←─┘ back up by strlen("em") + 1 ==> 3
- */
- $this->bytes_already_parsed = $before_current_tag;
- $this->base_class_next_token();
-
- return $this->xml;
- }
-
- /**
- * Parses tag query input into internal search criteria.
- *
- * @since WP_VERSION
- *
- * @param array|string|null $query {
- * Optional. Which tag name to find, having which class, etc. Default is to find any tag.
- *
- * @type string|null $tag_name Which tag to find, or `null` for "any tag."
- * @type int|null $match_offset Find the Nth tag matching all search criteria.
- * 1 for "first" tag, 3 for "third," etc.
- * Defaults to first tag.
- * @type string $tag_closers "visit" or "skip": whether to stop on tag closers, e.g.
.
- * }
- */
- private function parse_query( $query ) {
- if ( null !== $query && $query === $this->last_query ) {
- return;
- }
-
- $this->last_query = $query;
- $this->sought_tag_name = null;
- $this->sought_match_offset = 1;
- $this->stop_on_tag_closers = false;
-
- // A single string value means "find the tag of this name".
- if ( is_string( $query ) ) {
- $this->sought_tag_name = $query;
- return;
- }
-
- // An empty query parameter applies no restrictions on the search.
- if ( null === $query ) {
- return;
- }
-
- // If not using the string interface, an associative array is required.
- if ( ! is_array( $query ) ) {
- _doing_it_wrong(
- __METHOD__,
- __( 'The query argument must be an array or a tag name.' ),
- 'WP_VERSION'
- );
- return;
- }
-
- if ( isset( $query['tag_name'] ) && is_string( $query['tag_name'] ) ) {
- $this->sought_tag_name = $query['tag_name'];
- }
-
- if ( isset( $query['match_offset'] ) && is_int( $query['match_offset'] ) && 0 < $query['match_offset'] ) {
- $this->sought_match_offset = $query['match_offset'];
- }
-
- if ( isset( $query['tag_closers'] ) ) {
- $this->stop_on_tag_closers = 'visit' === $query['tag_closers'];
- }
- }
-
-
- /**
- * Checks whether a given tag and its attributes match the search criteria.
- *
- * @since WP_VERSION
- *
- * @return bool Whether the given tag and its attribute match the search criteria.
- */
- private function matches() {
- if ( $this->is_closing_tag && ! $this->stop_on_tag_closers ) {
- return false;
- }
-
- // Does the tag name match the requested tag name in a case-insensitive manner?
- if ( null !== $this->sought_tag_name ) {
- /*
- * String (byte) length lookup is fast. If they aren't the
- * same length then they can't be the same string values.
- */
- if ( strlen( $this->sought_tag_name ) !== $this->tag_name_length ) {
- return false;
- }
-
- /*
- * Check each character to determine if they are the same.
- */
- for ( $i = 0; $i < $this->tag_name_length; $i++ ) {
- if ( $this->xml[ $this->tag_name_starts_at + $i ] !== $this->sought_tag_name[ $i ] ) {
- return false;
- }
- }
- }
-
- return true;
- }
-
- /**
- * Parser Ready State.
- *
- * Indicates that the parser is ready to run and waiting for a state transition.
- * It may not have started yet, or it may have just finished parsing a token and
- * is ready to find the next one.
- *
- * @since WP_VERSION
- *
- * @access private
- */
- const STATE_READY = 'STATE_READY';
-
- /**
- * Parser Complete State.
- *
- * Indicates that the parser has reached the end of the document and there is
- * nothing left to scan. It finished parsing the last token completely.
- *
- * @since WP_VERSION
- *
- * @access private
- */
- const STATE_COMPLETE = 'STATE_COMPLETE';
-
- /**
- * Parser Incomplete Input State.
- *
- * Indicates that the parser has reached the end of the document before finishing
- * a token. It started parsing a token but there is a possibility that the input
- * XML document was truncated in the middle of a token.
- *
- * The parser is reset at the start of the incomplete token and has paused. There
- * is nothing more than can be scanned unless provided a more complete document.
- *
- * @since WP_VERSION
- *
- * @access private
- */
- const STATE_INCOMPLETE_INPUT = 'STATE_INCOMPLETE_INPUT';
-
- /**
- * Parser Invalid Input State.
- *
- * Indicates that the parsed xml document contains malformed input and cannot be parsed.
- *
- * @since WP_VERSION
- *
- * @access private
- */
- const STATE_INVALID_DOCUMENT = 'STATE_INVALID_DOCUMENT';
-
- /**
- * Parser Matched Tag State.
- *
- * Indicates that the parser has found an XML tag and it's possible to get
- * the tag name and read or modify its attributes (if it's not a closing tag).
- *
- * @since WP_VERSION
- *
- * @access private
- */
- const STATE_MATCHED_TAG = 'STATE_MATCHED_TAG';
-
- /**
- * Parser Text Node State.
- *
- * Indicates that the parser has found a text node and it's possible
- * to read and modify that text.
- *
- * @since WP_VERSION
- *
- * @access private
- */
- const STATE_TEXT_NODE = 'STATE_TEXT_NODE';
-
- /**
- * Parser CDATA Node State.
- *
- * Indicates that the parser has found a CDATA node and it's possible
- * to read and modify its modifiable text. Note that in XML there are
- * no CDATA nodes outside of foreign content (SVG and MathML). Outside
- * of foreign content, they are treated as XML comments.
- *
- * @since WP_VERSION
- *
- * @access private
- */
- const STATE_CDATA_NODE = 'STATE_CDATA_NODE';
-
- /**
- * Indicates that the parser has found an XML processing instruction.
- *
- * @since WP_VERSION
- *
- * @access private
- */
- const STATE_PI_NODE = 'STATE_PI_NODE';
-
- /**
- * Indicates that the parser has found an XML declaration
- *
- * @since WP_VERSION
- *
- * @access private
- */
- const STATE_XML_DECLARATION = 'STATE_XML_DECLARATION';
-
- /**
- * Indicates that the parser has found an XML comment and it's
- * possible to read and modify its modifiable text.
- *
- * @since WP_VERSION
- *
- * @access private
- */
- const STATE_COMMENT = 'STATE_COMMENT';
-
- /**
- * Indicates that the parser encountered unsupported syntax and has bailed.
- *
- * @since WP_VERSION
- *
- * @var string
- */
- const ERROR_SYNTAX = 'syntax';
-
- /**
- * Indicates that the provided XML document contains a declaration that is
- * unsupported by the parser.
- *
- * @since WP_VERSION
- *
- * @var string
- */
- const ERROR_UNSUPPORTED = 'unsupported';
-
- /**
- * Indicates that the parser encountered more XML tokens than it
- * was able to process and has bailed.
- *
- * @since WP_VERSION
- *
- * @var string
- */
- const ERROR_EXCEEDED_MAX_BOOKMARKS = 'exceeded-max-bookmarks';
-}
diff --git a/packages/playground/data-liberation/tests/WPWXRURLRewriterTests.php b/packages/playground/data-liberation/tests/WPWXRURLRewriterTests.php
index 3d66d628f7..3323fdf249 100644
--- a/packages/playground/data-liberation/tests/WPWXRURLRewriterTests.php
+++ b/packages/playground/data-liberation/tests/WPWXRURLRewriterTests.php
@@ -11,7 +11,7 @@ public function test_process($fixture_path, $expected_outcome_path) {
$chain = new WP_Stream_Chain(
[
'file' => new WP_File_Byte_Stream($fixture_path, 100),
- 'wxr' => WP_WXR_URL_Rewrite_Processor::stream(
+ 'wxr' => WP_WXR_URL_Rewrite_Processor::create_stream_processor(
'https://playground.internal/path',
'https://playground.wordpress.net/new-path'
),
diff --git a/packages/playground/data-liberation/tests/WPXMLProcessorTests.php b/packages/playground/data-liberation/tests/WPXMLProcessorTests.php
index 276eb7c311..974038ce73 100644
--- a/packages/playground/data-liberation/tests/WPXMLProcessorTests.php
+++ b/packages/playground/data-liberation/tests/WPXMLProcessorTests.php
@@ -5,7 +5,6 @@
* @package WordPress
* @subpackage XML-API
*/
-
use PHPUnit\Framework\TestCase;
/**
@@ -14,6 +13,1435 @@
* @coversDefaultClass WP_XML_Processor
*/
class WPXMLProcessorTests extends TestCase {
+ const XML_SIMPLE = 'Text';
+ const XML_WITH_CLASSES = 'Text';
+ const XML_MALFORMED = 'Back to notifications';
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_tag
+ */
+ public function test_get_tag_returns_null_before_finding_tags() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+
+ $this->assertNull( $processor->get_tag(), 'Calling get_tag() without selecting a tag did not return null' );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_tag
+ */
+ public function test_get_tag_returns_null_when_not_in_open_tag() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+
+ $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+ $this->assertNull( $processor->get_tag(), 'Accessing a non-existing tag did not return null' );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_tag
+ */
+ public function test_get_tag_returns_open_tag_name() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+
+ $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
+ $this->assertSame( 'wp:content', $processor->get_tag(), 'Accessing an existing tag name did not return "div"' );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::is_empty_element
+ *
+ * @dataProvider data_is_empty_element
+ *
+ * @param string $xml Input XML whose first tag might contain the self-closing flag `/`.
+ * @param bool $flag_is_set Whether the input XML's first tag contains the self-closing flag.
+ */
+ public function test_is_empty_element_matches_input_xml( $xml, $flag_is_set ) {
+ $processor = WP_XML_Processor::from_string( $xml );
+ $processor->next_tag( array( 'tag_closers' => 'visit' ) );
+
+ if ( $flag_is_set ) {
+ $this->assertTrue( $processor->is_empty_element(), 'Did not find the empty element tag when it was present.' );
+ } else {
+ $this->assertFalse( $processor->is_empty_element(), 'Found the empty element tag when it was absent.' );
+ }
+ }
+
+ /**
+ * Data provider. XML tags which might have a self-closing flag, and an indicator if they do.
+ *
+ * @return array[]
+ */
+ public static function data_is_empty_element() {
+ return array(
+ // These should not have a self-closer, and will leave an element un-closed if it's assumed they are self-closing.
+ 'Self-closing flag on non-void XML element' => array( '', true ),
+ 'No self-closing flag on non-void XML element' => array( '', false ),
+ // These should not have a self-closer, but are benign when used because the elements are void.
+ 'Self-closing flag on void XML element' => array( '', true ),
+ 'No self-closing flag on void XML element' => array( '', false ),
+ 'Self-closing flag on void XML element without spacing' => array( '', true ),
+ // These should not have a self-closer, but as part of a tag closer they are entirely ignored.
+ 'No self-closing flag on tag closer' => array( '', false ),
+ // These can and should have self-closers, and will leave an element un-closed if it's assumed they aren't self-closing.
+ 'Self-closing flag on a foreign element' => array( '', true ),
+ 'No self-closing flag on a foreign element' => array( '', false ),
+ // These involve syntax peculiarities.
+ 'Self-closing flag after extra spaces' => array( '', true ),
+ 'Self-closing flag after quoted attribute' => array( '', true ),
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_get_attribute_returns_null_when_not_in_open_tag() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+
+ $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+ $this->assertNull( $processor->get_attribute( 'wp:post-type' ), 'Accessing an attribute of a non-existing tag did not return null' );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_get_attribute_returns_null_when_in_closing_tag() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+
+ $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
+ $this->assertTrue( $processor->next_token(), 'Querying an existing closing tag did not return true' );
+ $this->assertTrue( $processor->next_token(), 'Querying an existing closing tag did not return true' );
+ $this->assertNull( $processor->get_attribute( 'wp:post-type' ), 'Accessing an attribute of a closing tag did not return null' );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_get_attribute_returns_null_when_attribute_missing() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+
+ $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
+ $this->assertNull( $processor->get_attribute( 'test-id' ), 'Accessing a non-existing attribute did not return null' );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @expectedIncorrectUsage WP_XML_Processor::base_class_next_token
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_attributes_are_rejected_in_tag_closers() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+
+ $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
+ $this->assertTrue( $processor->next_token(), 'Querying a text node did not return true.' );
+ $this->assertFalse( $processor->next_token(), 'Querying an existing but invalid closing tag did not return false.' );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_get_attribute_returns_attribute_value() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+
+ $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
+ $this->assertSame( 'test', $processor->get_attribute( 'wp:post-type' ), 'Accessing a wp:post-type="test" attribute value did not return "test"' );
+ }
+
+ /**
+ * @ticket 61365
+ * @expectedIncorrectUsage WP_XML_Processor::parse_next_attribute
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_parsing_stops_on_malformed_attribute_value_no_value() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+
+ $this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
+ }
+
+ /**
+ * @ticket 61365
+ * @expectedIncorrectUsage WP_XML_Processor::parse_next_attribute
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_parsing_stops_on_malformed_attribute_value_no_quotes() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+
+ $this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
+ }
+
+ /**
+ * @ticket 61365
+ * @expectedIncorrectUsage WP_XML_Processor::get_attribute
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_malformed_attribute_value_containing_ampersand_is_treated_as_plaintext() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+
+ $this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' );
+ $this->assertEquals('WordPress & WordPress', $processor->get_attribute('enabled'));
+ }
+
+ /**
+ * @ticket 61365
+ * @expectedIncorrectUsage WP_XML_Processor::get_attribute
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_malformed_attribute_value_containing_entity_without_semicolon_is_treated_as_plaintext() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+
+ $this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' );
+ $this->assertEquals('', $processor->get_attribute('enabled'));
+ }
+
+ /**
+ * @ticket 61365
+ * @expectedIncorrectUsage WP_XML_Processor::parse_next_attribute
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_parsing_stops_on_malformed_attribute_value_contains_lt_character() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+
+ $this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
+ }
+
+ /**
+ * @ticket 61365
+ * @expectedIncorrectUsage WP_XML_Processor::parse_next_attribute
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_parsing_stops_on_malformed_tags_duplicate_attributes() {
+ $processor = WP_XML_Processor::from_string( 'Text' );
+
+ $this->assertFalse( $processor->next_tag() );
+ }
+
+ /**
+ * @ticket 61365
+ * @expectedIncorrectUsage WP_XML_Processor::parse_next_attribute
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_parsing_stops_on_malformed_attribute_name_contains_slash() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+
+ $this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_get_modifiable_text_returns_a_decoded_value() {
+ $processor = WP_XML_Processor::from_string( '“😄”' );
+
+ $processor->next_tag( 'root' );
+ $processor->next_token();
+
+ $this->assertEquals(
+ '“😄”',
+ $processor->get_modifiable_text(),
+ 'Reading an encoded text did not decode it.'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_get_attribute_returns_a_decoded_value() {
+ $processor = WP_XML_Processor::from_string( '' );
+
+ $this->assertTrue( $processor->next_tag( 'root' ), 'Querying a tag did not return true' );
+ $this->assertEquals(
+ '“😄”',
+ $processor->get_attribute( 'encoded-data' ),
+ 'Reading an encoded attribute did not decode it.'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute
+ *
+ * @param string $attribute_name Name of data-enabled attribute with case variations.
+ */
+ public function test_get_attribute_is_case_sensitive() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+ $processor->next_tag();
+
+ $this->assertEquals(
+ 'true',
+ $processor->get_attribute( 'DATA-enabled' ),
+ 'Accessing an attribute by a same-cased name did return not its value'
+ );
+
+ $this->assertNull(
+ $processor->get_attribute( 'data-enabled' ),
+ 'Accessing an attribute by a differently-cased name did return its value'
+ );
+ }
+
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::remove_attribute
+ */
+ public function test_remove_attribute_is_case_sensitive() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+ $processor->next_tag();
+ $processor->remove_attribute( 'data-enabled' );
+
+ $this->assertSame( 'Test', $processor->get_updated_xml(), 'A case-sensitive remove_attribute call did remove the attribute' );
+
+ $processor->remove_attribute( 'DATA-enabled' );
+
+ $this->assertSame( 'Test', $processor->get_updated_xml(), 'A case-sensitive remove_attribute call did not remove the attribute' );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::set_attribute
+ */
+ public function test_set_attribute_is_case_sensitive() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+ $processor->next_tag();
+ $processor->set_attribute( 'data-enabled', 'abc' );
+
+ $this->assertSame( 'Test', $processor->get_updated_xml(), 'A case-insensitive set_attribute call did not update the existing attribute' );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute_names_with_prefix
+ */
+ public function test_get_attribute_names_with_prefix_returns_null_before_finding_tags() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+ $this->assertNull(
+ $processor->get_attribute_names_with_prefix( 'data-' ),
+ 'Accessing attributes by their prefix did not return null when no tag was selected'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute_names_with_prefix
+ */
+ public function test_get_attribute_names_with_prefix_returns_null_when_not_in_open_tag() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+ $processor->next_tag( 'p' );
+ $this->assertNull( $processor->get_attribute_names_with_prefix( 'data-' ), 'Accessing attributes of a non-existing tag did not return null' );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute_names_with_prefix
+ */
+ public function test_get_attribute_names_with_prefix_returns_null_when_in_closing_tag() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+ $processor->next_tag( 'wp:content' );
+ $processor->next_tag( array( 'tag_closers' => 'visit' ) );
+
+ $this->assertNull( $processor->get_attribute_names_with_prefix( 'data-' ), 'Accessing attributes of a closing tag did not return null' );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute_names_with_prefix
+ */
+ public function test_get_attribute_names_with_prefix_returns_empty_array_when_no_attributes_present() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+ $processor->next_tag( 'wp:content' );
+
+ $this->assertSame( array(), $processor->get_attribute_names_with_prefix( 'data-' ), 'Accessing the attributes on a tag without any did not return an empty array' );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute_names_with_prefix
+ */
+ public function test_get_attribute_names_with_prefix_returns_matching_attribute_names_in_original_case() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+ $processor->next_tag();
+
+ $this->assertSame(
+ array( 'data-test-ID' ),
+ $processor->get_attribute_names_with_prefix( 'data-' ),
+ 'Accessing attributes by their prefix did not return their lowercase names'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute_names_with_prefix
+ */
+ public function test_get_attribute_names_with_prefix_returns_attribute_added_by_set_attribute() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+ $processor->next_tag();
+ $processor->set_attribute( 'data-test-id', '14' );
+
+ $this->assertSame(
+ 'Test',
+ $processor->get_updated_xml(),
+ "Updated XML doesn't include attribute added via set_attribute"
+ );
+ $this->assertSame(
+ array( 'data-test-id', 'data-foo' ),
+ $processor->get_attribute_names_with_prefix( 'data-' ),
+ "Accessing attribute names doesn't find attribute added via set_attribute"
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::__toString
+ */
+ public function test_to_string_returns_updated_xml() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+ $processor->next_tag();
+ $processor->remove_attribute( 'id' );
+
+ $processor->next_tag();
+ $processor->set_attribute( 'id', 'wp:content-id-1' );
+
+ $this->assertSame(
+ $processor->get_updated_xml(),
+ (string) $processor,
+ 'get_updated_xml() returned a different value than __toString()'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_updated_xml
+ */
+ public function test_get_updated_xml_applies_the_updates_so_far_and_keeps_the_processor_on_the_current_tag() {
+ $processor = WP_XML_Processor::from_string( 'Test' );
+ $processor->next_tag();
+ $processor->remove_attribute( 'id' );
+
+ $processor->next_tag();
+ $processor->set_attribute( 'id', 'wp:content-id-1' );
+
+ $this->assertSame(
+ 'Test',
+ $processor->get_updated_xml(),
+ 'Calling get_updated_xml after updating the attributes of the second tag returned different XML than expected'
+ );
+
+ $processor->set_attribute( 'id', 'wp:content-id-2' );
+
+ $this->assertSame(
+ 'Test',
+ $processor->get_updated_xml(),
+ 'Calling get_updated_xml after updating the attributes of the second tag for the second time returned different XML than expected'
+ );
+
+ $processor->next_tag();
+ $processor->remove_attribute( 'id' );
+
+ $this->assertSame(
+ 'Test',
+ $processor->get_updated_xml(),
+ 'Calling get_updated_xml after removing the id attribute of the third tag returned different XML than expected'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_updated_xml
+ */
+ public function test_get_updated_xml_without_updating_any_attributes_returns_the_original_xml() {
+ $processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
+
+ $this->assertSame(
+ self::XML_SIMPLE,
+ $processor->get_updated_xml(),
+ 'Casting WP_XML_Processor to a string without performing any updates did not return the initial XML snippet'
+ );
+ }
+
+ /**
+ * Ensures that when seeking to an earlier spot in the document that
+ * all previously-enqueued updates are applied as they ought to be.
+ *
+ * @ticket 61365
+ * @expectedIncorrectUsage WP_XML_Processor::parse_next_attribute
+ */
+ public function test_get_updated_xml_applies_updates_to_content_after_seeking_to_before_parsed_bytes() {
+ $processor = WP_XML_Processor::from_string( '' );
+
+ $processor->next_tag();
+ $processor->set_attribute( 'wonky', 'true' );
+ $processor->next_tag();
+ $processor->set_bookmark( 'here' );
+
+ $processor->next_tag( array( 'tag_closers' => 'visit' ) );
+ $processor->seek( 'here' );
+
+ $this->assertSame( '', $processor->get_updated_xml() );
+ }
+
+ public function test_declare_element_as_pcdata() {
+ $text = '
+ This text contains syntax that may seem
+ like XML nodes:
+
+
+
+
+
+
+ &<>"'
+
+ But! It is all treated as text.
+ ';
+ $processor = WP_XML_Processor::from_string(
+ "$text"
+ );
+ $processor->declare_element_as_pcdata( 'my-pcdata' );
+ $processor->next_tag( 'my-pcdata' );
+
+ $this->assertEquals(
+ $text,
+ $processor->get_modifiable_text(),
+ 'get_modifiable_text() did not return the expected text'
+ );
+ }
+
+ /**
+ * Ensures that bookmarks start and length correctly describe a given token in XML.
+ *
+ * @ticket 61365
+ *
+ * @dataProvider data_xml_nth_token_substring
+ *
+ * @param string $xml Input XML.
+ * @param int $match_nth_token Which token to inspect from input XML.
+ * @param string $expected_match Expected full raw token bookmark should capture.
+ */
+ public function test_token_bookmark_span( string $xml, int $match_nth_token, string $expected_match ) {
+ $processor = new class( $xml ) extends WP_XML_Processor {
+ public function __construct( $xml ) {
+ parent::__construct( $xml );
+ }
+
+ /**
+ * Returns the raw span of XML for the currently-matched
+ * token, or null if not paused on any token.
+ *
+ * @return string|null Raw XML content of currently-matched token,
+ * otherwise `null` if not matched.
+ */
+ public function get_raw_token() {
+ if (
+ WP_XML_Processor::STATE_READY === $this->parser_state ||
+ WP_XML_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
+ WP_XML_Processor::STATE_COMPLETE === $this->parser_state
+ ) {
+ return null;
+ }
+
+ $this->set_bookmark( 'mark' );
+ $mark = $this->bookmarks['mark'];
+
+ return substr( $this->xml, $mark->start, $mark->length );
+ }
+ };
+
+ for ( $i = 0; $i < $match_nth_token; $i++ ) {
+ $processor->next_token();
+ }
+
+ $raw_token = $processor->get_raw_token();
+ $this->assertIsString(
+ $raw_token,
+ "Failed to find raw token at position {$match_nth_token}: check test data provider."
+ );
+
+ $this->assertSame(
+ $expected_match,
+ $raw_token,
+ 'Bookmarked wrong span of text for full matched token.'
+ );
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array
+ */
+ public static function data_xml_nth_token_substring() {
+ return array(
+ // Tags.
+ 'DIV start tag' => array( '', 1, '' ),
+ 'DIV start tag with attributes' => array( '', 1, '' ),
+ 'Nested DIV' => array( '', 2, '' ),
+ 'Sibling DIV' => array( '', 3, '' ),
+ 'DIV before text' => array( ' text', 1, '' ),
+ 'DIV after comment' => array( '', 3, '' ),
+ 'DIV before comment' => array( ' ', 1, '' ),
+ 'Start "self-closing" tag' => array( '', 1, '' ),
+ 'Void tag' => array( '', 1, '' ),
+ 'Void tag w/self-closing flag' => array( '', 1, '' ),
+ 'Void tag inside DIV' => array( '', 2, '' ),
+
+ // Text.
+ 'Text' => array( 'Just text', 1, 'Just text' ),
+ 'Text in DIV' => array( 'Text', 2, 'Text' ),
+ 'Text before DIV' => array( 'Text', 1, 'Text' ),
+ 'Text after comment' => array( 'Text', 2, 'Text' ),
+ 'Text before comment' => array( 'Text ', 1, 'Text' ),
+
+ // Comments.
+ 'Comment' => array( '', 1, '' ),
+ 'Comment in DIV' => array( '', 2, '' ),
+ 'Comment before DIV' => array( '', 1, '' ),
+ 'Comment after DIV' => array( '', 3, '' ),
+ 'Comment after comment' => array( '', 2, '' ),
+ 'Comment before comment' => array( ' ', 1, '' ),
+ 'Empty comment' => array( '', 1, '' ),
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::next_tag
+ */
+ public function test_next_tag_with_no_arguments_should_find_the_next_existing_tag() {
+ $processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
+
+ $this->assertTrue( $processor->next_tag(), 'Querying an existing tag did not return true' );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::next_tag
+ */
+ public function test_next_tag_should_return_false_for_a_non_existing_tag() {
+ $processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
+
+ $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_modifiable_text
+ */
+ public function test_normalizes_carriage_returns_in_text_nodes() {
+ $processor = WP_XML_Processor::from_string(
+ "We are\rnormalizing\r\n\nthe\n\r\r\r\ncarriage returns"
+ );
+ $processor->next_tag();
+ $processor->next_token();
+ $this->assertEquals(
+ "We are\nnormalizing\n\nthe\n\n\n\ncarriage returns",
+ $processor->get_modifiable_text(),
+ 'get_raw_token() did not normalize the carriage return characters'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_modifiable_text
+ */
+ public function test_normalizes_carriage_returns_in_cdata() {
+ $processor = WP_XML_Processor::from_string(
+ ""
+ );
+ $processor->next_tag();
+ $processor->next_token();
+ $this->assertEquals(
+ "We are\nnormalizing\n\nthe\n\n\n\ncarriage returns",
+ $processor->get_modifiable_text(),
+ 'get_raw_token() did not normalize the carriage return characters'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::next_tag
+ * @covers WP_XML_Processor::is_tag_closer
+ */
+ public function test_next_tag_should_not_stop_on_closers() {
+ $processor = WP_XML_Processor::from_string( '' );
+
+ $this->assertTrue( $processor->next_tag( array( 'breadcrumbs' => array( 'wp:content' ) ) ), 'Did not find desired tag opener' );
+ $this->assertFalse( $processor->next_tag( array( 'breadcrumbs' => array( 'wp:content' ) ) ), 'Visited an unwanted tag, a tag closer' );
+ }
+
+ /**
+ * Verifies that updates to a document before calls to `get_updated_xml()` don't
+ * lead to the Tag Processor jumping to the wrong tag after the updates.
+ *
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_updated_xml
+ */
+ public function test_internal_pointer_returns_to_original_spot_after_inserting_content_before_cursor() {
+ $tags = WP_XML_Processor::from_string( 'outsideinside' );
+
+ $tags->next_tag();
+ $tags->next_tag();
+ $tags->set_attribute( 'wp:post-type', 'foo' );
+ $tags->next_tag( 'section' );
+
+ // Return to this spot after moving ahead.
+ $tags->set_bookmark( 'here' );
+
+ // Move ahead.
+ $tags->next_tag( 'photo' );
+ $tags->seek( 'here' );
+ $this->assertSame( 'outsideinside', $tags->get_updated_xml() );
+ $this->assertSame( 'section', $tags->get_tag() );
+ $this->assertFalse( $tags->is_tag_closer() );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::set_attribute
+ */
+ public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_markup() {
+ $processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
+
+ $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
+ $this->assertFalse( $processor->next_tag( 'wp:content' ), 'Querying a non-existing tag did not return false' );
+
+ $processor->set_attribute( 'id', 'primary' );
+
+ $this->assertSame(
+ self::XML_SIMPLE,
+ $processor->get_updated_xml(),
+ 'Calling get_updated_xml after updating a non-existing tag returned an XML that was different from the original XML'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::set_attribute
+ * @covers WP_XML_Processor::remove_attribute
+ * @covers WP_XML_Processor::add_class
+ * @covers WP_XML_Processor::remove_class
+ */
+ public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() {
+ $processor = WP_XML_Processor::from_string( '' );
+ $processor->next_token();
+ $this->assertFalse( $processor->is_tag_closer(), 'Skipped tag opener' );
+
+ $processor->next_token();
+ $this->assertTrue( $processor->is_tag_closer(), 'Skipped tag closer' );
+ $this->assertFalse( $processor->set_attribute( 'id', 'test' ), "Allowed setting an attribute on a tag closer when it shouldn't have" );
+ $this->assertFalse( $processor->remove_attribute( 'invalid-id' ), "Allowed removing an attribute on a tag closer when it shouldn't have" );
+ $this->assertSame(
+ '',
+ $processor->get_updated_xml(),
+ 'Calling get_updated_xml after updating a non-existing tag returned an XML that was different from the original XML'
+ );
+ }
+
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::set_attribute
+ */
+ public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() {
+ $processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
+ $processor->next_tag();
+ $processor->set_attribute( 'test-attribute', 'test-value' );
+
+ $this->assertSame(
+ 'Text',
+ $processor->get_updated_xml(),
+ 'Updated XML does not include attribute added via set_attribute()'
+ );
+ $this->assertSame(
+ 'test-value',
+ $processor->get_attribute( 'test-attribute' ),
+ 'get_attribute() (called after get_updated_xml()) did not return attribute added via set_attribute()'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_get_attribute_returns_updated_values_before_they_are_applied() {
+ $processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
+ $processor->next_tag();
+ $processor->set_attribute( 'test-attribute', 'test-value' );
+
+ $this->assertSame(
+ 'test-value',
+ $processor->get_attribute( 'test-attribute' ),
+ 'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()'
+ );
+ $this->assertSame(
+ 'Text',
+ $processor->get_updated_xml(),
+ 'Updated XML does not include attribute added via set_attribute()'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_get_attribute_returns_updated_values_before_they_are_applied_with_different_name_casing() {
+ $processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
+ $processor->next_tag();
+ $processor->set_attribute( 'test-ATTribute', 'test-value' );
+
+ $this->assertSame(
+ 'test-value',
+ $processor->get_attribute( 'test-ATTribute' ),
+ 'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()'
+ );
+ $this->assertSame(
+ 'Text',
+ $processor->get_updated_xml(),
+ 'Updated XML does not include attribute added via set_attribute()'
+ );
+ }
+
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_get_attribute_reflects_removed_attribute_before_it_is_applied() {
+ $processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
+ $processor->next_tag();
+ $processor->remove_attribute( 'id' );
+
+ $this->assertNull(
+ $processor->get_attribute( 'id' ),
+ 'get_attribute() (called before get_updated_xml()) returned attribute that was removed by remove_attribute()'
+ );
+ $this->assertSame(
+ 'Text',
+ $processor->get_updated_xml(),
+ 'Updated XML includes attribute that was removed by remove_attribute()'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_those_updates_are_applied() {
+ $processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
+ $processor->next_tag();
+ $processor->set_attribute( 'test-attribute', 'test-value' );
+ $processor->remove_attribute( 'test-attribute' );
+
+ $this->assertNull(
+ $processor->get_attribute( 'test-attribute' ),
+ 'get_attribute() (called before get_updated_xml()) returned attribute that was added via set_attribute() and then removed by remove_attribute()'
+ );
+ $this->assertSame(
+ self::XML_SIMPLE,
+ $processor->get_updated_xml(),
+ 'Updated XML includes attribute that was added via set_attribute() and then removed by remove_attribute()'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::get_attribute
+ */
+ public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_those_updates_are_applied() {
+ $processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
+ $processor->next_tag();
+ $processor->set_attribute( 'id', 'test-value' );
+ $processor->remove_attribute( 'id' );
+
+ $this->assertNull(
+ $processor->get_attribute( 'id' ),
+ 'get_attribute() (called before get_updated_xml()) returned attribute that was overwritten by set_attribute() and then removed by remove_attribute()'
+ );
+ $this->assertSame(
+ 'Text',
+ $processor->get_updated_xml(),
+ 'Updated XML includes attribute that was overwritten by set_attribute() and then removed by remove_attribute()'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::set_attribute
+ */
+ public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() {
+ $processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
+ $processor->next_tag();
+ $processor->set_attribute( 'id', 'new-id' );
+ $this->assertSame(
+ 'Text',
+ $processor->get_updated_xml(),
+ 'Existing attribute was not updated'
+ );
+ }
+
+ /**
+ * Ensures that when setting an attribute multiple times that only
+ * one update flushes out into the updated XML.
+ *
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::set_attribute
+ */
+ public function test_set_attribute_with_case_variants_updates_only_the_original_first_copy() {
+ $processor = WP_XML_Processor::from_string( '' );
+ $processor->next_tag();
+ $processor->set_attribute( 'data-enabled', 'canary1' );
+ $processor->set_attribute( 'data-enabled', 'canary2' );
+ $processor->set_attribute( 'data-enabled', 'canary3' );
+
+ $this->assertSame( '', strtolower( $processor->get_updated_xml() ) );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::next_tag
+ * @covers WP_XML_Processor::set_attribute
+ */
+ public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() {
+ $processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
+ while ( $processor->next_tag() ) {
+ $processor->set_attribute( 'data-foo', 'bar' );
+ }
+
+ $this->assertSame(
+ 'Text',
+ $processor->get_updated_xml(),
+ 'Not all tags were updated when looping with next_tag() and set_attribute()'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::remove_attribute
+ */
+ public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() {
+ $processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
+ $processor->next_tag();
+ $processor->remove_attribute( 'id' );
+
+ $this->assertSame(
+ 'Text',
+ $processor->get_updated_xml(),
+ 'Attribute was not removed'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::remove_attribute
+ */
+ public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() {
+ $processor = WP_XML_Processor::from_string( self::XML_SIMPLE );
+ $processor->next_tag();
+ $processor->remove_attribute( 'no-such-attribute' );
+
+ $this->assertSame(
+ self::XML_SIMPLE,
+ $processor->get_updated_xml(),
+ 'Content was changed when attempting to remove an attribute that did not exist'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::next_tag
+ */
+ public function test_correctly_parses_xml_attributes_wrapped_in_single_quotation_marks() {
+ $processor = WP_XML_Processor::from_string(
+ 'Text'
+ );
+ $processor->next_tag(
+ array(
+ 'breadcrumbs' => array( 'wp:content' ),
+ 'id' => 'first',
+ )
+ );
+ $processor->remove_attribute( 'id' );
+ $processor->next_tag(
+ array(
+ 'breadcrumbs' => array( 'wp:text' ),
+ 'id' => 'second',
+ )
+ );
+ $processor->set_attribute( 'id', 'single-quote' );
+ $this->assertSame(
+ 'Text',
+ $processor->get_updated_xml(),
+ 'Did not remove single-quoted attribute'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ * @expectedIncorrectUsage WP_XML_Processor::parse_next_attribute
+ * @expectedIncorrectUsage WP_XML_Processor::set_attribute
+ *
+ * @covers WP_XML_Processor::set_attribute
+ */
+ public function test_setting_an_attribute_to_false_is_rejected() {
+ $processor = WP_XML_Processor::from_string(
+ ''
+ );
+ $processor->next_tag( 'input' );
+ $this->assertFalse(
+ $processor->set_attribute( 'checked', false ),
+ 'Accepted a boolean attribute name.'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ * @expectedIncorrectUsage WP_XML_Processor::set_attribute
+ *
+ * @covers WP_XML_Processor::set_attribute
+ */
+ public function test_setting_a_missing_attribute_to_false_does_not_change_the_markup() {
+ $xml_input = '';
+ $processor = WP_XML_Processor::from_string( $xml_input );
+ $processor->next_tag( 'input' );
+ $processor->set_attribute( 'checked', false );
+ $this->assertSame(
+ $xml_input,
+ $processor->get_updated_xml(),
+ 'Changed the markup unexpectedly when setting a non-existing attribute to false'
+ );
+ }
+
+ /**
+ * Ensures that unclosed and invalid comments trigger warnings or errors.
+ *
+ * @ticket 61365
+ *
+ * @covers WP_XML_Processor::next_tag
+ * @covers WP_XML_Processor::paused_at_incomplete_token
+ *
+ * @dataProvider data_xml_with_unclosed_comments
+ *
+ * @param string $xml_ending_before_comment_close XML with opened comments that aren't closed.
+ */
+ public function test_documents_may_end_with_unclosed_comment( $xml_ending_before_comment_close ) {
+ $processor = WP_XML_Processor::from_stream( $xml_ending_before_comment_close );
+
+ $this->assertFalse(
+ $processor->next_tag(),
+ "Should not have found any tag, but found {$processor->get_tag()}."
+ );
+
+ $this->assertTrue(
+ $processor->is_paused_at_incomplete_input(),
+ "Should have indicated that the parser found an incomplete token but didn't."
+ );
+ }
+
+ /**
+ * Data provider.
+ *
+ * @return array[]
+ */
+ public static function data_xml_with_unclosed_comments() {
+ return array(
+ 'Shortest open valid comment' => array( '' );
+ $this->assertFalse( $processor->next_token(), 'Did not reject a malformed XML comment.' );
+ }
+
+ /**
+ * @covers WP_XML_Processor::next_tag
+ */
+ public function test_handles_malformed_taglike_open_short_xml() {
+ $processor = WP_XML_Processor::from_string( '<' );
+ $result = $processor->next_tag();
+ $this->assertFalse( $result, 'Did not handle "<" xml properly.' );
+ }
+
+ /**
+ * @covers WP_XML_Processor::next_tag
+ */
+ public function test_handles_malformed_taglike_close_short_xml() {
+ $processor = WP_XML_Processor::from_string( ' ' );
+ $result = $processor->next_tag();
+ $this->assertFalse( $result, 'Did not handle " " xml properly.' );
+ }
+
+ /**
+ * @expectedIncorrectUsage WP_XML_Processor::base_class_next_token
+ * @covers WP_XML_Processor::next_tag
+ */
+ public function test_rejects_empty_element_that_is_also_a_closer() {
+ $processor = WP_XML_Processor::from_string( ' ' );
+ $result = $processor->next_tag();
+ $this->assertFalse( $result, 'Did not handle "" xml properly.' );
+ }
+
+ /**
+ * Ensures that non-tag syntax starting with `<` is rejected.
+ *
+ * @ticket 61365
+ */
+ public function test_single_text_node_with_taglike_text() {
+ $processor = WP_XML_Processor::from_string( 'This is a text node< /A>' );
+ $this->assertTrue( $processor->next_token(), 'A root node was not found.' );
+ $this->assertTrue( $processor->next_token(), 'A valid text node was not found.' );
+ $this->assertEquals( 'This is a text node', $processor->get_modifiable_text(), 'The contents of a valid text node were not correctly captured.' );
+ $this->assertFalse( $processor->next_tag(), 'A malformed XML markup was not rejected.' );
+ }
+
+ /**
+ * Ensures that non-tag syntax starting with `<` is rejected.
+ *
+ * @ticket 61365
+ */
+ public function test_parses_CDATA() {
+ $processor = WP_XML_Processor::from_string( '' );
+ $processor->next_tag();
+ $this->assertTrue( $processor->next_token(), 'The first text node was not found.' ); $this->assertEquals(
+ 'This is a CDATA text node.',
+ $processor->get_modifiable_text(),
+ 'The contents of a a CDATA text node were not correctly captured.'
+ );
+ }
+
+ /**
+ * @ticket 61365
+ */
+ public function test_yields_CDATA_a_separate_text_node() {
+ $processor = WP_XML_Processor::from_string( 'This is the first text node and this is the third text node.' );
+
+ $processor->next_token();
+ $this->assertTrue( $processor->next_token(), 'The first text node was not found.' );
+ $this->assertEquals(
+ 'This is the first text node ',
+ $processor->get_modifiable_text(),
+ 'The contents of a valid text node were not correctly captured.'
+ );
+
+ $this->assertTrue( $processor->next_token(), 'The CDATA text node was not found.' );
+ $this->assertEquals(
+ ' and this is a second text node ',
+ $processor->get_modifiable_text(),
+ 'The contents of a a CDATA text node were not correctly captured.'
+ );
+
+ $this->assertTrue( $processor->next_token(), 'The text node was not found.' );
+ $this->assertEquals(
+ ' and this is the third text node.',
+ $processor->get_modifiable_text(),
+ 'The contents of a valid text node were not correctly captured.'
+ );
+ }
+
+ /**
+ *
+ * @ticket 61365
+ */
+ public function test_xml_declaration() {
+ $processor = WP_XML_Processor::from_string( '' );
+ $this->assertTrue( $processor->next_token(), 'The XML declaration was not found.' );
+ $this->assertEquals(
+ '#xml-declaration',
+ $processor->get_token_type(),
+ 'The XML declaration was not correctly identified.'
+ );
+ $this->assertEquals( '1.0', $processor->get_attribute( 'version' ), 'The version attribute was not correctly captured.' );
+ $this->assertEquals( 'UTF-8', $processor->get_attribute( 'encoding' ), 'The encoding attribute was not correctly captured.' );
+ }
+
+ /**
+ *
+ * @ticket 61365
+ */
+ public function test_xml_declaration_with_single_quotes() {
+ $processor = WP_XML_Processor::from_string( "" );
+ $this->assertTrue( $processor->next_token(), 'The XML declaration was not found.' );
+ $this->assertEquals(
+ '#xml-declaration',
+ $processor->get_token_type(),
+ 'The XML declaration was not correctly identified.'
+ );
+ $this->assertEquals( '1.0', $processor->get_attribute( 'version' ), 'The version attribute was not correctly captured.' );
+ $this->assertEquals( 'UTF-8', $processor->get_attribute( 'encoding' ), 'The encoding attribute was not correctly captured.' );
+ }
+
+ /**
+ *
+ * @ticket 61365
+ */
+ public function test_processor_instructions() {
+ $processor = WP_XML_Processor::from_string(
+ // The first ' .
+ // The second '
+ );
+ $this->assertTrue( $processor->next_token(), 'The XML declaration was not found.' );
+ $this->assertTrue( $processor->next_token(), 'The processing instruction was not found.' );
+ $this->assertEquals(
+ '#processing-instructions',
+ $processor->get_token_type(),
+ 'The processing instruction was not correctly identified.'
+ );
+ $this->assertEquals( ' stylesheet type="text/xsl" href="style.xsl" ', $processor->get_modifiable_text(), 'The modifiable text was not correctly captured.' );
+ }
+
+ /**
+ * Ensures that updates which are enqueued in front of the cursor
+ * are applied before moving forward in the document.
+ *
+ * @ticket 61365
+ */
+ public function test_applies_updates_before_proceeding() {
+ $xml = '';
+
+ $subclass = new class( $xml ) extends WP_XML_Processor {
+ public function __construct( $xml ) {
+ parent::__construct( $xml );
+ }
+
+ /**
+ * Inserts raw text after the current token.
+ *
+ * @param string $new_xml Raw text to insert.
+ */
+ public function insert_after( $new_xml ) {
+ $this->set_bookmark( 'here' );
+ $this->lexical_updates[] = new WP_HTML_Text_Replacement(
+ $this->bookmarks['here']->start + $this->bookmarks['here']->length,
+ 0,
+ $new_xml
+ );
+ }
+ };
+
+ $subclass->next_tag( 'photo' );
+ $subclass->insert_after( '
snow-capped
' );
+
+ $subclass->next_tag();
+ $this->assertSame(
+ 'p',
+ $subclass->get_tag(),
+ 'Should have matched inserted XML as next tag.'
+ );
+
+ $subclass->next_tag( 'photo' );
+ $subclass->set_attribute( 'alt', 'mountain' );
+
+ $this->assertSame(
+ '
snow-capped
',
+ $subclass->get_updated_xml(),
+ 'Should have properly applied the update from in front of the cursor.'
+ );
+ }
+
/**
* @ticket 61365
@@ -22,7 +1450,7 @@ class WPXMLProcessorTests extends TestCase {
* @covers WP_XML_Processor::get_breadcrumbs
*/
public function test_get_breadcrumbs() {
- $processor = new WP_XML_Processor(
+ $processor = WP_XML_Processor::from_string(
'
@@ -60,7 +1488,7 @@ public function test_get_breadcrumbs() {
*/
public function test_matches_breadcrumbs() {
// Initialize the WP_XML_Processor with the given XML string
- $processor = new WP_XML_Processor( '' );
+ $processor = WP_XML_Processor::from_string( '' );
// Move to the next element with tag name 'img'
$processor->next_tag( 'image' );
@@ -79,7 +1507,7 @@ public function test_matches_breadcrumbs() {
*/
public function test_next_tag_by_breadcrumbs() {
// Initialize the WP_XML_Processor with the given XML string
- $processor = new WP_XML_Processor( '' );
+ $processor = WP_XML_Processor::from_string( '' );
// Move to the next element with tag name 'img'
$processor->next_tag(
@@ -98,7 +1526,7 @@ public function test_next_tag_by_breadcrumbs() {
*/
public function test_get_current_depth() {
// Initialize the WP_XML_Processor with the given XML string
- $processor = new WP_XML_Processor( '' );
+ $processor = WP_XML_Processor::from_string( '' );
// Assert that the initial depth is 0
$this->assertEquals( 0, $processor->get_current_depth() );
@@ -130,11 +1558,11 @@ public function test_get_current_depth() {
* @expectedIncorrectUsage WP_XML_Processor::step_in_misc
*/
public function test_no_text_allowed_after_root_element() {
- $processor = new WP_XML_Processor( 'text' );
+ $processor = WP_XML_Processor::from_string( 'text' );
$this->assertTrue( $processor->next_tag(), 'Did not find a tag.' );
$this->assertFalse( $processor->next_tag(), 'Found a non-existent tag.' );
$this->assertEquals(
- WP_XML_Tag_Processor::ERROR_SYNTAX,
+ WP_XML_Processor::ERROR_SYNTAX,
$processor->get_last_error(),
'Did not run into a parse error after the root element'
);
@@ -144,7 +1572,7 @@ public function test_no_text_allowed_after_root_element() {
* @ticket 61365
*/
public function test_whitespace_text_allowed_after_root_element() {
- $processor = new WP_XML_Processor( ' ' );
+ $processor = WP_XML_Processor::from_string( ' ' );
$this->assertTrue( $processor->next_tag(), 'Did not find a tag.' );
$this->assertFalse( $processor->next_tag(), 'Found a non-existent tag.' );
$this->assertNull( $processor->get_last_error(), 'Ran into a parse error after the root element' );
@@ -154,7 +1582,7 @@ public function test_whitespace_text_allowed_after_root_element() {
* @ticket 61365
*/
public function test_processing_directives_allowed_after_root_element() {
- $processor = new WP_XML_Processor( '' );
+ $processor = WP_XML_Processor::from_string( '' );
$this->assertTrue( $processor->next_tag(), 'Did not find a tag.' );
$this->assertFalse( $processor->next_tag(), 'Found a non-existent tag.' );
$this->assertNull( $processor->get_last_error(), 'Ran into a parse error after the root element' );
@@ -164,7 +1592,7 @@ public function test_processing_directives_allowed_after_root_element() {
* @ticket 61365
*/
public function test_mixed_misc_grammar_allowed_after_root_element() {
- $processor = new WP_XML_Processor( ' ' );
+ $processor = WP_XML_Processor::from_string( ' ' );
$processor->next_tag();
$this->assertEquals( 'root', $processor->get_tag(), 'Did not find a tag.' );
@@ -179,11 +1607,11 @@ public function test_mixed_misc_grammar_allowed_after_root_element() {
* @expectedIncorrectUsage WP_XML_Processor::step_in_misc
*/
public function test_elements_not_allowed_after_root_element() {
- $processor = new WP_XML_Processor( '' );
+ $processor = WP_XML_Processor::from_string( '' );
$this->assertTrue( $processor->next_tag(), 'Did not find a tag.' );
$this->assertFalse( $processor->next_tag(), 'Fount an illegal tag.' );
$this->assertEquals(
- WP_XML_Tag_Processor::ERROR_SYNTAX,
+ WP_XML_Processor::ERROR_SYNTAX,
$processor->get_last_error(),
'Did not run into a parse error after the root element'
);
@@ -195,7 +1623,7 @@ public function test_elements_not_allowed_after_root_element() {
* @return void
*/
public function test_comments_allowed_after_root_element() {
- $processor = new WP_XML_Processor( '' );
+ $processor = WP_XML_Processor::from_string( '' );
$this->assertTrue( $processor->next_tag(), 'Did not find a tag.' );
$this->assertFalse( $processor->next_tag(), 'Found an element node after the root element' );
$this->assertNull( $processor->get_last_error(), 'Ran into a parse error after the root element' );
@@ -208,11 +1636,11 @@ public function test_comments_allowed_after_root_element() {
* @return void
*/
public function test_cdata_not_allowed_after_root_element() {
- $processor = new WP_XML_Processor( '' );
+ $processor = WP_XML_Processor::from_string( '' );
$this->assertTrue( $processor->next_tag(), 'Did not find a tag.' );
$this->assertFalse( $processor->next_tag(), 'Did not reject a comment node after the root element' );
$this->assertEquals(
- WP_XML_Tag_Processor::ERROR_SYNTAX,
+ WP_XML_Processor::ERROR_SYNTAX,
$processor->get_last_error(),
'Did not run into a parse error after the root element'
);
@@ -224,7 +1652,7 @@ public function test_cdata_not_allowed_after_root_element() {
* @covers WP_XML_Processor::next_tag
*/
public function test_detects_invalid_document_no_root_tag() {
- $processor = new WP_XML_Processor(
+ $processor = WP_XML_Processor::from_stream(
'
'
);
@@ -238,7 +1666,7 @@ public function test_detects_invalid_document_no_root_tag() {
* @covers WP_XML_Processor::next_tag
*/
public function test_unclosed_root_yields_incomplete_input() {
- $processor = new WP_XML_Processor(
+ $processor = WP_XML_Processor::from_stream(
'
@@ -249,4 +1677,4 @@ public function test_unclosed_root_yields_incomplete_input() {
}
$this->assertTrue( $processor->is_paused_at_incomplete_input(), 'Did not indicate that the XML input was incomplete.' );
}
-}
+}
\ No newline at end of file
diff --git a/packages/playground/data-liberation/tests/WPXMLTagProcessorTests.php b/packages/playground/data-liberation/tests/WPXMLTagProcessorTests.php
deleted file mode 100644
index c336371ec8..0000000000
--- a/packages/playground/data-liberation/tests/WPXMLTagProcessorTests.php
+++ /dev/null
@@ -1,1426 +0,0 @@
-Text';
- const XML_WITH_CLASSES = 'Text';
- const XML_MALFORMED = 'Back to notifications';
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_tag
- */
- public function test_get_tag_returns_null_before_finding_tags() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
-
- $this->assertNull( $processor->get_tag(), 'Calling get_tag() without selecting a tag did not return null' );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_tag
- */
- public function test_get_tag_returns_null_when_not_in_open_tag() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
-
- $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
- $this->assertNull( $processor->get_tag(), 'Accessing a non-existing tag did not return null' );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_tag
- */
- public function test_get_tag_returns_open_tag_name() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
-
- $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
- $this->assertSame( 'wp:content', $processor->get_tag(), 'Accessing an existing tag name did not return "div"' );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::is_empty_element
- *
- * @dataProvider data_is_empty_element
- *
- * @param string $xml Input XML whose first tag might contain the self-closing flag `/`.
- * @param bool $flag_is_set Whether the input XML's first tag contains the self-closing flag.
- */
- public function test_is_empty_element_matches_input_xml( $xml, $flag_is_set ) {
- $processor = new WP_XML_Tag_Processor( $xml );
- $processor->next_tag( array( 'tag_closers' => 'visit' ) );
-
- if ( $flag_is_set ) {
- $this->assertTrue( $processor->is_empty_element(), 'Did not find the empty element tag when it was present.' );
- } else {
- $this->assertFalse( $processor->is_empty_element(), 'Found the empty element tag when it was absent.' );
- }
- }
-
- /**
- * Data provider. XML tags which might have a self-closing flag, and an indicator if they do.
- *
- * @return array[]
- */
- public static function data_is_empty_element() {
- return array(
- // These should not have a self-closer, and will leave an element un-closed if it's assumed they are self-closing.
- 'Self-closing flag on non-void XML element' => array( '', true ),
- 'No self-closing flag on non-void XML element' => array( '', false ),
- // These should not have a self-closer, but are benign when used because the elements are void.
- 'Self-closing flag on void XML element' => array( '', true ),
- 'No self-closing flag on void XML element' => array( '', false ),
- 'Self-closing flag on void XML element without spacing' => array( '', true ),
- // These should not have a self-closer, but as part of a tag closer they are entirely ignored.
- 'No self-closing flag on tag closer' => array( '', false ),
- // These can and should have self-closers, and will leave an element un-closed if it's assumed they aren't self-closing.
- 'Self-closing flag on a foreign element' => array( '', true ),
- 'No self-closing flag on a foreign element' => array( '', false ),
- // These involve syntax peculiarities.
- 'Self-closing flag after extra spaces' => array( '', true ),
- 'Self-closing flag after quoted attribute' => array( '', true ),
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_get_attribute_returns_null_when_not_in_open_tag() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
-
- $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
- $this->assertNull( $processor->get_attribute( 'wp:post-type' ), 'Accessing an attribute of a non-existing tag did not return null' );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_get_attribute_returns_null_when_in_closing_tag() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
-
- $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
- $this->assertTrue( $processor->next_tag( array( 'tag_closers' => 'visit' ) ), 'Querying an existing closing tag did not return true' );
- $this->assertNull( $processor->get_attribute( 'wp:post-type' ), 'Accessing an attribute of a closing tag did not return null' );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_get_attribute_returns_null_when_attribute_missing() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
-
- $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
- $this->assertNull( $processor->get_attribute( 'test-id' ), 'Accessing a non-existing attribute did not return null' );
- }
-
- /**
- * @ticket 61365
- *
- * @expectedIncorrectUsage WP_XML_Tag_Processor::base_class_next_token
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_attributes_are_rejected_in_tag_closers() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
-
- $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
- $this->assertFalse( $processor->next_tag( array( 'tag_closers' => 'visit' ) ), 'Querying an existing but invalid closing tag did not return false.' );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_get_attribute_returns_attribute_value() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
-
- $this->assertTrue( $processor->next_tag( 'wp:content' ), 'Querying an existing tag did not return true' );
- $this->assertSame( 'test', $processor->get_attribute( 'wp:post-type' ), 'Accessing a wp:post-type="test" attribute value did not return "test"' );
- }
-
- /**
- * @ticket 61365
- * @expectedIncorrectUsage WP_XML_Tag_Processor::parse_next_attribute
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_parsing_stops_on_malformed_attribute_value_no_value() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
-
- $this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
- }
-
- /**
- * @ticket 61365
- * @expectedIncorrectUsage WP_XML_Tag_Processor::parse_next_attribute
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_parsing_stops_on_malformed_attribute_value_no_quotes() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
-
- $this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
- }
-
- /**
- * @ticket 61365
- * @expectedIncorrectUsage WP_XML_Tag_Processor::get_attribute
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_malformed_attribute_value_containing_ampersand_is_treated_as_plaintext() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
-
- $this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' );
- $this->assertEquals('WordPress & WordPress', $processor->get_attribute('enabled'));
- }
-
- /**
- * @ticket 61365
- * @expectedIncorrectUsage WP_XML_Tag_Processor::get_attribute
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_malformed_attribute_value_containing_entity_without_semicolon_is_treated_as_plaintext() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
-
- $this->assertTrue( $processor->next_tag(), 'Querying a tag did not return true' );
- $this->assertEquals('', $processor->get_attribute('enabled'));
- }
-
- /**
- * @ticket 61365
- * @expectedIncorrectUsage WP_XML_Tag_Processor::parse_next_attribute
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_parsing_stops_on_malformed_attribute_value_contains_lt_character() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
-
- $this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
- }
-
- /**
- * @ticket 61365
- * @expectedIncorrectUsage WP_XML_Tag_Processor::parse_next_attribute
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_parsing_stops_on_malformed_tags_duplicate_attributes() {
- $processor = new WP_XML_Tag_Processor( 'Text' );
-
- $this->assertFalse( $processor->next_tag() );
- }
-
- /**
- * @ticket 61365
- * @expectedIncorrectUsage WP_XML_Tag_Processor::parse_next_attribute
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_parsing_stops_on_malformed_attribute_name_contains_slash() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
-
- $this->assertFalse( $processor->next_tag(), 'Querying a malformed start tag did not return false' );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_get_modifiable_text_returns_a_decoded_value() {
- $processor = new WP_XML_Tag_Processor( '“😄”' );
-
- $processor->next_tag( 'root' );
- $processor->next_token();
-
- $this->assertEquals(
- '“😄”',
- $processor->get_modifiable_text(),
- 'Reading an encoded text did not decode it.'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_get_attribute_returns_a_decoded_value() {
- $processor = new WP_XML_Tag_Processor( '' );
-
- $this->assertTrue( $processor->next_tag( 'root' ), 'Querying a tag did not return true' );
- $this->assertEquals(
- '“😄”',
- $processor->get_attribute( 'encoded-data' ),
- 'Reading an encoded attribute did not decode it.'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- *
- * @param string $attribute_name Name of data-enabled attribute with case variations.
- */
- public function test_get_attribute_is_case_sensitive() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
- $processor->next_tag();
-
- $this->assertEquals(
- 'true',
- $processor->get_attribute( 'DATA-enabled' ),
- 'Accessing an attribute by a same-cased name did return not its value'
- );
-
- $this->assertNull(
- $processor->get_attribute( 'data-enabled' ),
- 'Accessing an attribute by a differently-cased name did return its value'
- );
- }
-
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::remove_attribute
- */
- public function test_remove_attribute_is_case_sensitive() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
- $processor->next_tag();
- $processor->remove_attribute( 'data-enabled' );
-
- $this->assertSame( 'Test', $processor->get_updated_xml(), 'A case-sensitive remove_attribute call did remove the attribute' );
-
- $processor->remove_attribute( 'DATA-enabled' );
-
- $this->assertSame( 'Test', $processor->get_updated_xml(), 'A case-sensitive remove_attribute call did not remove the attribute' );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::set_attribute
- */
- public function test_set_attribute_is_case_sensitive() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
- $processor->next_tag();
- $processor->set_attribute( 'data-enabled', 'abc' );
-
- $this->assertSame( 'Test', $processor->get_updated_xml(), 'A case-insensitive set_attribute call did not update the existing attribute' );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute_names_with_prefix
- */
- public function test_get_attribute_names_with_prefix_returns_null_before_finding_tags() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
- $this->assertNull(
- $processor->get_attribute_names_with_prefix( 'data-' ),
- 'Accessing attributes by their prefix did not return null when no tag was selected'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute_names_with_prefix
- */
- public function test_get_attribute_names_with_prefix_returns_null_when_not_in_open_tag() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
- $processor->next_tag( 'p' );
- $this->assertNull( $processor->get_attribute_names_with_prefix( 'data-' ), 'Accessing attributes of a non-existing tag did not return null' );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute_names_with_prefix
- */
- public function test_get_attribute_names_with_prefix_returns_null_when_in_closing_tag() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
- $processor->next_tag( 'wp:content' );
- $processor->next_tag( array( 'tag_closers' => 'visit' ) );
-
- $this->assertNull( $processor->get_attribute_names_with_prefix( 'data-' ), 'Accessing attributes of a closing tag did not return null' );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute_names_with_prefix
- */
- public function test_get_attribute_names_with_prefix_returns_empty_array_when_no_attributes_present() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
- $processor->next_tag( 'wp:content' );
-
- $this->assertSame( array(), $processor->get_attribute_names_with_prefix( 'data-' ), 'Accessing the attributes on a tag without any did not return an empty array' );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute_names_with_prefix
- */
- public function test_get_attribute_names_with_prefix_returns_matching_attribute_names_in_original_case() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
- $processor->next_tag();
-
- $this->assertSame(
- array( 'data-test-ID' ),
- $processor->get_attribute_names_with_prefix( 'data-' ),
- 'Accessing attributes by their prefix did not return their lowercase names'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute_names_with_prefix
- */
- public function test_get_attribute_names_with_prefix_returns_attribute_added_by_set_attribute() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
- $processor->next_tag();
- $processor->set_attribute( 'data-test-id', '14' );
-
- $this->assertSame(
- 'Test',
- $processor->get_updated_xml(),
- "Updated XML doesn't include attribute added via set_attribute"
- );
- $this->assertSame(
- array( 'data-test-id', 'data-foo' ),
- $processor->get_attribute_names_with_prefix( 'data-' ),
- "Accessing attribute names doesn't find attribute added via set_attribute"
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::__toString
- */
- public function test_to_string_returns_updated_xml() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
- $processor->next_tag();
- $processor->remove_attribute( 'id' );
-
- $processor->next_tag();
- $processor->set_attribute( 'id', 'wp:content-id-1' );
-
- $this->assertSame(
- $processor->get_updated_xml(),
- (string) $processor,
- 'get_updated_xml() returned a different value than __toString()'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_updated_xml
- */
- public function test_get_updated_xml_applies_the_updates_so_far_and_keeps_the_processor_on_the_current_tag() {
- $processor = new WP_XML_Tag_Processor( 'Test' );
- $processor->next_tag();
- $processor->remove_attribute( 'id' );
-
- $processor->next_tag();
- $processor->set_attribute( 'id', 'wp:content-id-1' );
-
- $this->assertSame(
- 'Test',
- $processor->get_updated_xml(),
- 'Calling get_updated_xml after updating the attributes of the second tag returned different XML than expected'
- );
-
- $processor->set_attribute( 'id', 'wp:content-id-2' );
-
- $this->assertSame(
- 'Test',
- $processor->get_updated_xml(),
- 'Calling get_updated_xml after updating the attributes of the second tag for the second time returned different XML than expected'
- );
-
- $processor->next_tag();
- $processor->remove_attribute( 'id' );
-
- $this->assertSame(
- 'Test',
- $processor->get_updated_xml(),
- 'Calling get_updated_xml after removing the id attribute of the third tag returned different XML than expected'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_updated_xml
- */
- public function test_get_updated_xml_without_updating_any_attributes_returns_the_original_xml() {
- $processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-
- $this->assertSame(
- self::XML_SIMPLE,
- $processor->get_updated_xml(),
- 'Casting WP_XML_Tag_Processor to a string without performing any updates did not return the initial XML snippet'
- );
- }
-
- /**
- * Ensures that when seeking to an earlier spot in the document that
- * all previously-enqueued updates are applied as they ought to be.
- *
- * @ticket 61365
- * @expectedIncorrectUsage WP_XML_Tag_Processor::parse_next_attribute
- */
- public function test_get_updated_xml_applies_updates_to_content_after_seeking_to_before_parsed_bytes() {
- $processor = new WP_XML_Tag_Processor( '' );
-
- $processor->next_tag();
- $processor->set_attribute( 'wonky', 'true' );
- $processor->next_tag();
- $processor->set_bookmark( 'here' );
-
- $processor->next_tag( array( 'tag_closers' => 'visit' ) );
- $processor->seek( 'here' );
-
- $this->assertSame( '', $processor->get_updated_xml() );
- }
-
- public function test_declare_element_as_pcdata() {
- $text = '
- This text contains syntax that may seem
- like XML nodes:
-
-
-
-
-
-
- &<>"'
-
- But! It is all treated as text.
- ';
- $processor = new WP_XML_Tag_Processor(
- "$text"
- );
- $processor->declare_element_as_pcdata( 'my-pcdata' );
- $processor->next_tag( 'my-pcdata' );
-
- $this->assertEquals(
- $text,
- $processor->get_modifiable_text(),
- 'get_modifiable_text() did not return the expected text'
- );
- }
-
- /**
- * Ensures that bookmarks start and length correctly describe a given token in XML.
- *
- * @ticket 61365
- *
- * @dataProvider data_xml_nth_token_substring
- *
- * @param string $xml Input XML.
- * @param int $match_nth_token Which token to inspect from input XML.
- * @param string $expected_match Expected full raw token bookmark should capture.
- */
- public function test_token_bookmark_span( string $xml, int $match_nth_token, string $expected_match ) {
- $processor = new class( $xml ) extends WP_XML_Tag_Processor {
- /**
- * Returns the raw span of XML for the currently-matched
- * token, or null if not paused on any token.
- *
- * @return string|null Raw XML content of currently-matched token,
- * otherwise `null` if not matched.
- */
- public function get_raw_token() {
- if (
- WP_XML_Tag_Processor::STATE_READY === $this->parser_state ||
- WP_XML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state ||
- WP_XML_Tag_Processor::STATE_COMPLETE === $this->parser_state
- ) {
- return null;
- }
-
- $this->set_bookmark( 'mark' );
- $mark = $this->bookmarks['mark'];
-
- return substr( $this->xml, $mark->start, $mark->length );
- }
- };
-
- for ( $i = 0; $i < $match_nth_token; $i++ ) {
- $processor->next_token();
- }
-
- $raw_token = $processor->get_raw_token();
- $this->assertIsString(
- $raw_token,
- "Failed to find raw token at position {$match_nth_token}: check test data provider."
- );
-
- $this->assertSame(
- $expected_match,
- $raw_token,
- 'Bookmarked wrong span of text for full matched token.'
- );
- }
-
- /**
- * Data provider.
- *
- * @return array
- */
- public static function data_xml_nth_token_substring() {
- return array(
- // Tags.
- 'DIV start tag' => array( '', 1, '' ),
- 'DIV start tag with attributes' => array( '', 1, '' ),
- 'Nested DIV' => array( '', 2, '' ),
- 'Sibling DIV' => array( '', 3, '' ),
- 'DIV after text' => array( 'text ', 2, '' ),
- 'DIV before text' => array( ' text', 1, '' ),
- 'DIV after comment' => array( '', 3, '' ),
- 'DIV before comment' => array( ' ', 1, '' ),
- 'Start "self-closing" tag' => array( '', 1, '' ),
- 'Void tag' => array( '', 1, '' ),
- 'Void tag w/self-closing flag' => array( '', 1, '' ),
- 'Void tag inside DIV' => array( '', 2, '' ),
-
- // Text.
- 'Text' => array( 'Just text', 1, 'Just text' ),
- 'Text in DIV' => array( 'Text', 2, 'Text' ),
- 'Text before DIV' => array( 'Text', 1, 'Text' ),
- 'Text after comment' => array( 'Text', 2, 'Text' ),
- 'Text before comment' => array( 'Text ', 1, 'Text' ),
-
- // Comments.
- 'Comment' => array( '', 1, '' ),
- 'Comment in DIV' => array( '', 2, '' ),
- 'Comment before DIV' => array( '', 1, '' ),
- 'Comment after DIV' => array( '', 3, '' ),
- 'Comment after comment' => array( '', 2, '' ),
- 'Comment before comment' => array( ' ', 1, '' ),
- 'Empty comment' => array( '', 1, '' ),
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::next_tag
- */
- public function test_next_tag_with_no_arguments_should_find_the_next_existing_tag() {
- $processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-
- $this->assertTrue( $processor->next_tag(), 'Querying an existing tag did not return true' );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::next_tag
- */
- public function test_next_tag_should_return_false_for_a_non_existing_tag() {
- $processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-
- $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_modifiable_text
- */
- public function test_normalizes_carriage_returns_in_text_nodes() {
- $processor = new WP_XML_Tag_Processor(
- "We are\rnormalizing\r\n\nthe\n\r\r\r\ncarriage returns"
- );
- $processor->next_tag();
- $processor->next_token();
- $this->assertEquals(
- "We are\nnormalizing\n\nthe\n\n\n\ncarriage returns",
- $processor->get_modifiable_text(),
- 'get_raw_token() did not normalize the carriage return characters'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_modifiable_text
- */
- public function test_normalizes_carriage_returns_in_cdata() {
- $processor = new WP_XML_Tag_Processor(
- ""
- );
- $processor->next_tag();
- $processor->next_token();
- $this->assertEquals(
- "We are\nnormalizing\n\nthe\n\n\n\ncarriage returns",
- $processor->get_modifiable_text(),
- 'get_raw_token() did not normalize the carriage return characters'
- );
- }
-
- /**
- * @ticket 61365
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::next_tag
- * @covers WP_XML_Tag_Processor::is_tag_closer
- */
- public function test_next_tag_should_stop_on_closers_only_when_requested() {
- $processor = new WP_XML_Tag_Processor( '' );
-
- $this->assertTrue( $processor->next_tag( array( 'tag_name' => 'wp:content' ) ), 'Did not find desired tag opener' );
- $this->assertFalse( $processor->next_tag( array( 'tag_name' => 'wp:content' ) ), 'Visited an unwanted tag, a tag closer' );
-
- $processor = new WP_XML_Tag_Processor( '' );
- $processor->next_tag(
- array(
- 'tag_name' => 'wp:content',
- 'tag_closers' => 'visit',
- )
- );
-
- $this->assertFalse( $processor->is_tag_closer(), 'Indicated a tag opener is a tag closer' );
- $this->assertTrue(
- $processor->next_tag(
- array(
- 'tag_name' => 'wp:content',
- 'tag_closers' => 'visit',
- )
- ),
- 'Did not stop at desired tag closer'
- );
- $this->assertTrue( $processor->is_tag_closer(), 'Indicated a tag closer is a tag opener' );
-
- $processor = new WP_XML_Tag_Processor( '' );
- $this->assertTrue( $processor->next_tag( array( 'tag_closers' => 'visit' ) ), "Did not find a tag opener when tag_closers was set to 'visit'" );
- $this->assertFalse( $processor->next_tag( array( 'tag_closers' => 'visit' ) ), "Found a closer where there wasn't one" );
- }
-
- /**
- * Verifies that updates to a document before calls to `get_updated_xml()` don't
- * lead to the Tag Processor jumping to the wrong tag after the updates.
- *
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_updated_xml
- */
- public function test_internal_pointer_returns_to_original_spot_after_inserting_content_before_cursor() {
- $tags = new WP_XML_Tag_Processor( 'outsideinside' );
-
- $tags->next_tag();
- $tags->next_tag();
- $tags->set_attribute( 'wp:post-type', 'foo' );
- $tags->next_tag( 'section' );
-
- // Return to this spot after moving ahead.
- $tags->set_bookmark( 'here' );
-
- // Move ahead.
- $tags->next_tag( 'photo' );
- $tags->seek( 'here' );
- $this->assertSame( 'outsideinside', $tags->get_updated_xml() );
- $this->assertSame( 'section', $tags->get_tag() );
- $this->assertFalse( $tags->is_tag_closer() );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::set_attribute
- */
- public function test_set_attribute_on_a_non_existing_tag_does_not_change_the_markup() {
- $processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
-
- $this->assertFalse( $processor->next_tag( 'p' ), 'Querying a non-existing tag did not return false' );
- $this->assertFalse( $processor->next_tag( 'wp:content' ), 'Querying a non-existing tag did not return false' );
-
- $processor->set_attribute( 'id', 'primary' );
-
- $this->assertSame(
- self::XML_SIMPLE,
- $processor->get_updated_xml(),
- 'Calling get_updated_xml after updating a non-existing tag returned an XML that was different from the original XML'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::set_attribute
- * @covers WP_XML_Tag_Processor::remove_attribute
- * @covers WP_XML_Tag_Processor::add_class
- * @covers WP_XML_Tag_Processor::remove_class
- */
- public function test_attribute_ops_on_tag_closer_do_not_change_the_markup() {
- $processor = new WP_XML_Tag_Processor( '' );
- $processor->next_tag(
- array(
- 'tag_name' => 'wp:content',
- 'tag_closers' => 'visit',
- )
- );
-
- $this->assertFalse( $processor->is_tag_closer(), 'Skipped tag opener' );
-
- $processor->next_tag(
- array(
- 'tag_name' => 'wp:content',
- 'tag_closers' => 'visit',
- )
- );
-
- $this->assertTrue( $processor->is_tag_closer(), 'Skipped tag closer' );
- $this->assertFalse( $processor->set_attribute( 'id', 'test' ), "Allowed setting an attribute on a tag closer when it shouldn't have" );
- $this->assertFalse( $processor->remove_attribute( 'invalid-id' ), "Allowed removing an attribute on a tag closer when it shouldn't have" );
- $this->assertSame(
- '',
- $processor->get_updated_xml(),
- 'Calling get_updated_xml after updating a non-existing tag returned an XML that was different from the original XML'
- );
- }
-
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::set_attribute
- */
- public function test_set_attribute_with_a_non_existing_attribute_adds_a_new_attribute_to_the_markup() {
- $processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
- $processor->next_tag();
- $processor->set_attribute( 'test-attribute', 'test-value' );
-
- $this->assertSame(
- 'Text',
- $processor->get_updated_xml(),
- 'Updated XML does not include attribute added via set_attribute()'
- );
- $this->assertSame(
- 'test-value',
- $processor->get_attribute( 'test-attribute' ),
- 'get_attribute() (called after get_updated_xml()) did not return attribute added via set_attribute()'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_get_attribute_returns_updated_values_before_they_are_applied() {
- $processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
- $processor->next_tag();
- $processor->set_attribute( 'test-attribute', 'test-value' );
-
- $this->assertSame(
- 'test-value',
- $processor->get_attribute( 'test-attribute' ),
- 'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()'
- );
- $this->assertSame(
- 'Text',
- $processor->get_updated_xml(),
- 'Updated XML does not include attribute added via set_attribute()'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_get_attribute_returns_updated_values_before_they_are_applied_with_different_name_casing() {
- $processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
- $processor->next_tag();
- $processor->set_attribute( 'test-ATTribute', 'test-value' );
-
- $this->assertSame(
- 'test-value',
- $processor->get_attribute( 'test-ATTribute' ),
- 'get_attribute() (called before get_updated_xml()) did not return attribute added via set_attribute()'
- );
- $this->assertSame(
- 'Text',
- $processor->get_updated_xml(),
- 'Updated XML does not include attribute added via set_attribute()'
- );
- }
-
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_get_attribute_reflects_removed_attribute_before_it_is_applied() {
- $processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
- $processor->next_tag();
- $processor->remove_attribute( 'id' );
-
- $this->assertNull(
- $processor->get_attribute( 'id' ),
- 'get_attribute() (called before get_updated_xml()) returned attribute that was removed by remove_attribute()'
- );
- $this->assertSame(
- 'Text',
- $processor->get_updated_xml(),
- 'Updated XML includes attribute that was removed by remove_attribute()'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_get_attribute_reflects_adding_and_then_removing_an_attribute_before_those_updates_are_applied() {
- $processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
- $processor->next_tag();
- $processor->set_attribute( 'test-attribute', 'test-value' );
- $processor->remove_attribute( 'test-attribute' );
-
- $this->assertNull(
- $processor->get_attribute( 'test-attribute' ),
- 'get_attribute() (called before get_updated_xml()) returned attribute that was added via set_attribute() and then removed by remove_attribute()'
- );
- $this->assertSame(
- self::XML_SIMPLE,
- $processor->get_updated_xml(),
- 'Updated XML includes attribute that was added via set_attribute() and then removed by remove_attribute()'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::get_attribute
- */
- public function test_get_attribute_reflects_setting_and_then_removing_an_existing_attribute_before_those_updates_are_applied() {
- $processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
- $processor->next_tag();
- $processor->set_attribute( 'id', 'test-value' );
- $processor->remove_attribute( 'id' );
-
- $this->assertNull(
- $processor->get_attribute( 'id' ),
- 'get_attribute() (called before get_updated_xml()) returned attribute that was overwritten by set_attribute() and then removed by remove_attribute()'
- );
- $this->assertSame(
- 'Text',
- $processor->get_updated_xml(),
- 'Updated XML includes attribute that was overwritten by set_attribute() and then removed by remove_attribute()'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::set_attribute
- */
- public function test_set_attribute_with_an_existing_attribute_name_updates_its_value_in_the_markup() {
- $processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
- $processor->next_tag();
- $processor->set_attribute( 'id', 'new-id' );
- $this->assertSame(
- 'Text',
- $processor->get_updated_xml(),
- 'Existing attribute was not updated'
- );
- }
-
- /**
- * Ensures that when setting an attribute multiple times that only
- * one update flushes out into the updated XML.
- *
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::set_attribute
- */
- public function test_set_attribute_with_case_variants_updates_only_the_original_first_copy() {
- $processor = new WP_XML_Tag_Processor( '' );
- $processor->next_tag();
- $processor->set_attribute( 'data-enabled', 'canary1' );
- $processor->set_attribute( 'data-enabled', 'canary2' );
- $processor->set_attribute( 'data-enabled', 'canary3' );
-
- $this->assertSame( '', strtolower( $processor->get_updated_xml() ) );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::next_tag
- * @covers WP_XML_Tag_Processor::set_attribute
- */
- public function test_next_tag_and_set_attribute_in_a_loop_update_all_tags_in_the_markup() {
- $processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
- while ( $processor->next_tag() ) {
- $processor->set_attribute( 'data-foo', 'bar' );
- }
-
- $this->assertSame(
- 'Text',
- $processor->get_updated_xml(),
- 'Not all tags were updated when looping with next_tag() and set_attribute()'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::remove_attribute
- */
- public function test_remove_attribute_with_an_existing_attribute_name_removes_it_from_the_markup() {
- $processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
- $processor->next_tag();
- $processor->remove_attribute( 'id' );
-
- $this->assertSame(
- 'Text',
- $processor->get_updated_xml(),
- 'Attribute was not removed'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::remove_attribute
- */
- public function test_remove_attribute_with_a_non_existing_attribute_name_does_not_change_the_markup() {
- $processor = new WP_XML_Tag_Processor( self::XML_SIMPLE );
- $processor->next_tag();
- $processor->remove_attribute( 'no-such-attribute' );
-
- $this->assertSame(
- self::XML_SIMPLE,
- $processor->get_updated_xml(),
- 'Content was changed when attempting to remove an attribute that did not exist'
- );
- }
-
- /**
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::next_tag
- */
- public function test_correctly_parses_xml_attributes_wrapped_in_single_quotation_marks() {
- $processor = new WP_XML_Tag_Processor(
- 'Text'
- );
- $processor->next_tag(
- array(
- 'tag_name' => 'wp:content',
- 'id' => 'first',
- )
- );
- $processor->remove_attribute( 'id' );
- $processor->next_tag(
- array(
- 'tag_name' => 'wp:text',
- 'id' => 'second',
- )
- );
- $processor->set_attribute( 'id', 'single-quote' );
- $this->assertSame(
- 'Text',
- $processor->get_updated_xml(),
- 'Did not remove single-quoted attribute'
- );
- }
-
- /**
- * @ticket 61365
- * @expectedIncorrectUsage WP_XML_Tag_Processor::parse_next_attribute
- * @expectedIncorrectUsage WP_XML_Tag_Processor::set_attribute
- *
- * @covers WP_XML_Tag_Processor::set_attribute
- */
- public function test_setting_an_attribute_to_false_is_rejected() {
- $processor = new WP_XML_Tag_Processor(
- ''
- );
- $processor->next_tag( 'input' );
- $this->assertFalse(
- $processor->set_attribute( 'checked', false ),
- 'Accepted a boolean attribute name.'
- );
- }
-
- /**
- * @ticket 61365
- * @expectedIncorrectUsage WP_XML_Tag_Processor::set_attribute
- *
- * @covers WP_XML_Tag_Processor::set_attribute
- */
- public function test_setting_a_missing_attribute_to_false_does_not_change_the_markup() {
- $xml_input = '';
- $processor = new WP_XML_Tag_Processor( $xml_input );
- $processor->next_tag( 'input' );
- $processor->set_attribute( 'checked', false );
- $this->assertSame(
- $xml_input,
- $processor->get_updated_xml(),
- 'Changed the markup unexpectedly when setting a non-existing attribute to false'
- );
- }
-
- /**
- * Ensures that unclosed and invalid comments trigger warnings or errors.
- *
- * @ticket 61365
- *
- * @covers WP_XML_Tag_Processor::next_tag
- * @covers WP_XML_Tag_Processor::paused_at_incomplete_token
- *
- * @dataProvider data_xml_with_unclosed_comments
- *
- * @param string $xml_ending_before_comment_close XML with opened comments that aren't closed.
- */
- public function test_documents_may_end_with_unclosed_comment( $xml_ending_before_comment_close ) {
- $processor = new WP_XML_Tag_Processor( $xml_ending_before_comment_close );
-
- $this->assertFalse(
- $processor->next_tag(),
- "Should not have found any tag, but found {$processor->get_tag()}."
- );
-
- $this->assertTrue(
- $processor->is_paused_at_incomplete_input(),
- "Should have indicated that the parser found an incomplete token but didn't."
- );
- }
-
- /**
- * Data provider.
- *
- * @return array[]
- */
- public static function data_xml_with_unclosed_comments() {
- return array(
- 'Shortest open valid comment' => array( '' );
- $this->assertFalse( $processor->next_token(), 'Did not reject a malformed XML comment.' );
- }
-
- /**
- * @covers WP_XML_Tag_Processor::next_tag
- */
- public function test_handles_malformed_taglike_open_short_xml() {
- $processor = new WP_XML_Tag_Processor( '<' );
- $result = $processor->next_tag();
- $this->assertFalse( $result, 'Did not handle "<" xml properly.' );
- }
-
- /**
- * @covers WP_XML_Tag_Processor::next_tag
- */
- public function test_handles_malformed_taglike_close_short_xml() {
- $processor = new WP_XML_Tag_Processor( ' ' );
- $result = $processor->next_tag();
- $this->assertFalse( $result, 'Did not handle " " xml properly.' );
- }
-
- /**
- * @expectedIncorrectUsage WP_XML_Tag_Processor::base_class_next_token
- * @covers WP_XML_Tag_Processor::next_tag
- */
- public function test_rejects_empty_element_that_is_also_a_closer() {
- $processor = new WP_XML_Tag_Processor( ' ' );
- $result = $processor->next_tag();
- $this->assertFalse( $result, 'Did not handle "" xml properly.' );
- }
-
- /**
- * Ensures that non-tag syntax starting with `<` is rejected.
- *
- * @ticket 61365
- */
- public function test_single_text_node_with_taglike_text() {
- $processor = new WP_XML_Tag_Processor( 'This is a text node< /A>' );
- $this->assertTrue( $processor->next_token(), 'A valid text node was not found.' );
- $this->assertEquals( 'This is a text node', $processor->get_modifiable_text(), 'The contents of a valid text node were not correctly captured.' );
- $this->assertFalse( $processor->next_tag(), 'A malformed XML markup was not rejected.' );
- }
-
- /**
- * Ensures that non-tag syntax starting with `<` is rejected.
- *
- * @ticket 61365
- */
- public function test_parses_CDATA() {
- $processor = new WP_XML_Tag_Processor( '' );
- $processor->next_tag();
- $this->assertTrue( $processor->next_token(), 'The first text node was not found.' ); $this->assertEquals(
- 'This is a CDATA text node.',
- $processor->get_modifiable_text(),
- 'The contents of a a CDATA text node were not correctly captured.'
- );
- }
-
- /**
- * @ticket 61365
- */
- public function test_yields_CDATA_a_separate_text_node() {
- $processor = new WP_XML_Tag_Processor( 'This is the first text node and this is the third text node.' );
-
- $processor->next_token();
- $this->assertTrue( $processor->next_token(), 'The first text node was not found.' );
- $this->assertEquals(
- 'This is the first text node ',
- $processor->get_modifiable_text(),
- 'The contents of a valid text node were not correctly captured.'
- );
-
- $this->assertTrue( $processor->next_token(), 'The CDATA text node was not found.' );
- $this->assertEquals(
- ' and this is a second text node ',
- $processor->get_modifiable_text(),
- 'The contents of a a CDATA text node were not correctly captured.'
- );
-
- $this->assertTrue( $processor->next_token(), 'The text node was not found.' );
- $this->assertEquals(
- ' and this is the third text node.',
- $processor->get_modifiable_text(),
- 'The contents of a valid text node were not correctly captured.'
- );
- }
-
- /**
- *
- * @ticket 61365
- */
- public function test_xml_declaration() {
- $processor = new WP_XML_Tag_Processor( '' );
- $this->assertTrue( $processor->next_token(), 'The XML declaration was not found.' );
- $this->assertEquals(
- '#xml-declaration',
- $processor->get_token_type(),
- 'The XML declaration was not correctly identified.'
- );
- $this->assertEquals( '1.0', $processor->get_attribute( 'version' ), 'The version attribute was not correctly captured.' );
- $this->assertEquals( 'UTF-8', $processor->get_attribute( 'encoding' ), 'The encoding attribute was not correctly captured.' );
- }
-
- /**
- *
- * @ticket 61365
- */
- public function test_xml_declaration_with_single_quotes() {
- $processor = new WP_XML_Tag_Processor( "" );
- $this->assertTrue( $processor->next_token(), 'The XML declaration was not found.' );
- $this->assertEquals(
- '#xml-declaration',
- $processor->get_token_type(),
- 'The XML declaration was not correctly identified.'
- );
- $this->assertEquals( '1.0', $processor->get_attribute( 'version' ), 'The version attribute was not correctly captured.' );
- $this->assertEquals( 'UTF-8', $processor->get_attribute( 'encoding' ), 'The encoding attribute was not correctly captured.' );
- }
-
- /**
- *
- * @ticket 61365
- */
- public function test_processor_instructions() {
- $processor = new WP_XML_Tag_Processor(
- // The first ' .
- // The second '
- );
- $this->assertTrue( $processor->next_token(), 'The XML declaration was not found.' );
- $this->assertTrue( $processor->next_token(), 'The processing instruction was not found.' );
- $this->assertEquals(
- '#processing-instructions',
- $processor->get_token_type(),
- 'The processing instruction was not correctly identified.'
- );
- $this->assertEquals( ' stylesheet type="text/xsl" href="style.xsl" ', $processor->get_modifiable_text(), 'The modifiable text was not correctly captured.' );
- }
-
- /**
- * Ensures that updates which are enqueued in front of the cursor
- * are applied before moving forward in the document.
- *
- * @ticket 61365
- */
- public function test_applies_updates_before_proceeding() {
- $xml = '';
-
- $subclass = new class( $xml ) extends WP_XML_Tag_Processor {
- /**
- * Inserts raw text after the current token.
- *
- * @param string $new_xml Raw text to insert.
- */
- public function insert_after( $new_xml ) {
- $this->set_bookmark( 'here' );
- $this->lexical_updates[] = new WP_HTML_Text_Replacement(
- $this->bookmarks['here']->start + $this->bookmarks['here']->length,
- 0,
- $new_xml
- );
- }
- };
-
- $subclass->next_tag( 'photo' );
- $subclass->insert_after( '
snow-capped
' );
-
- $subclass->next_tag();
- $this->assertSame(
- 'p',
- $subclass->get_tag(),
- 'Should have matched inserted XML as next tag.'
- );
-
- $subclass->next_tag( 'photo' );
- $subclass->set_attribute( 'alt', 'mountain' );
-
- $this->assertSame(
- '
snow-capped
',
- $subclass->get_updated_xml(),
- 'Should have properly applied the update from in front of the cursor.'
- );
- }
-}
\ No newline at end of file
diff --git a/packages/playground/data-liberation/tests/fixtures/wxr-simple.xml b/packages/playground/data-liberation/tests/fixtures/wxr-simple.xml
index f7c4b13b07..2edf266984 100644
--- a/packages/playground/data-liberation/tests/fixtures/wxr-simple.xml
+++ b/packages/playground/data-liberation/tests/fixtures/wxr-simple.xml
@@ -92,5 +92,4 @@ https://playground.internal/path-not-taken was the second best choice.
-
-
\ No newline at end of file
+
\ No newline at end of file