diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php
index 1234abcb9dfe4..fb3d98912fbf2 100644
--- a/src/wp-includes/html-api/class-wp-html-open-elements.php
+++ b/src/wp-includes/html-api/class-wp-html-open-elements.php
@@ -51,6 +51,18 @@ class WP_HTML_Open_Elements {
*/
private $has_p_in_button_scope = false;
+ private $pop_handler = null;
+
+ private $push_handler = null;
+
+ public function add_pop_handler( Closure $handler ) {
+ $this->pop_handler = $handler;
+ }
+
+ public function add_push_handler( Closure $handler ) {
+ $this->push_handler = $handler;
+ }
+
/**
* Reports if a specific node is in the stack of open elements.
*
@@ -429,6 +441,10 @@ public function after_element_push( $item ) {
$this->has_p_in_button_scope = true;
break;
}
+
+ if ( null !== $this->push_handler ) {
+ ( $this->push_handler )( $item );
+ }
}
/**
@@ -458,5 +474,9 @@ public function after_element_pop( $item ) {
$this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' );
break;
}
+
+ if ( null !== $this->pop_handler ) {
+ ( $this->pop_handler )( $item );
+ }
}
}
diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php
index c76cc192b12a4..4bf0a10343a17 100644
--- a/src/wp-includes/html-api/class-wp-html-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-processor.php
@@ -7,6 +7,16 @@
* @since 6.4.0
*/
+class WP_HTML_Element_Operation {
+ public $token;
+ public $operation;
+
+ public function __construct( $token, $operation ) {
+ $this->token = $token;
+ $this->operation = $operation;
+ }
+}
+
/**
* Core class used to safely parse and modify an HTML document.
*
@@ -201,6 +211,11 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
*/
private $release_internal_bookmark_on_destruct = null;
+ private $element_queue = array();
+
+ /** @var WP_HTML_Element_Operation */
+ private $current_element = null;
+
/*
* Public Interface Functions
*/
@@ -299,6 +314,14 @@ public function __construct( $html, $use_the_static_create_methods_instead = nul
$this->state = new WP_HTML_Processor_State();
+ $this->state->stack_of_open_elements->add_push_handler( function ( WP_HTML_Token $token ) {
+ $this->element_queue[] = new WP_HTML_Element_Operation( $token, 'open' );
+ } );
+
+ $this->state->stack_of_open_elements->add_pop_handler( function ( WP_HTML_Token $token ) {
+ $this->element_queue[] = new WP_HTML_Element_Operation( $token, 'close' );
+ } );
+
/*
* Create this wrapper so that it's possible to pass
* a private method into WP_HTML_Token classes without
@@ -365,7 +388,7 @@ public function next_tag( $query = null ) {
continue;
}
- if ( ! $this->is_tag_closer() ) {
+ if ( ! parent::is_tag_closer() ) {
return true;
}
}
@@ -392,7 +415,7 @@ public function next_tag( $query = null ) {
continue;
}
- if ( ! $this->is_tag_closer() ) {
+ if ( ! parent::is_tag_closer() ) {
return true;
}
}
@@ -440,7 +463,22 @@ public function next_tag( $query = null ) {
* @return bool
*/
public function next_token() {
- return $this->step();
+ $this->current_element = null;
+
+ if ( 0 === count( $this->element_queue ) && ! $this->step() ) {
+ while ( $this->state->stack_of_open_elements->pop() ) {
+ continue;
+ }
+ }
+
+ $this->current_element = array_shift( $this->element_queue );
+ return null !== $this->current_element;
+ }
+
+ public function is_tag_closer() {
+ return isset( $this->current_element )
+ ? ( 'close' === $this->current_element->operation )
+ : parent::is_tag_closer();
}
/**
@@ -629,7 +667,7 @@ public function get_breadcrumbs() {
private function step_in_body() {
$token_name = $this->get_token_name();
$token_type = $this->get_token_type();
- $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : '';
+ $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : '';
$op = "{$op_sigil}{$token_name}";
switch ( $op ) {
@@ -1152,7 +1190,7 @@ private function step_in_body() {
throw new WP_HTML_Unsupported_Exception( "Cannot process {$token_name} element." );
}
- if ( ! $this->is_tag_closer() ) {
+ if ( ! parent::is_tag_closer() ) {
/*
* > Any other start tag
*/
@@ -1248,6 +1286,10 @@ public function get_tag() {
return null;
}
+ if ( isset( $this->current_element ) ) {
+ return $this->current_element->token->node_name;
+ }
+
$tag_name = parent::get_tag();
switch ( $tag_name ) {
@@ -1263,6 +1305,14 @@ public function get_tag() {
}
}
+ public function get_token_name() {
+ if ( isset( $this->current_element ) ) {
+ return $this->current_element->token->node_name;
+ }
+
+ return parent::get_token_name();
+ }
+
/**
* Removes a bookmark that is no longer needed.
*
diff --git a/src/wp-includes/html-api/class-wp-html-to-markdown-converter.php b/src/wp-includes/html-api/class-wp-html-to-markdown-converter.php
new file mode 100644
index 0000000000000..961f35540e7ac
--- /dev/null
+++ b/src/wp-includes/html-api/class-wp-html-to-markdown-converter.php
@@ -0,0 +1,155 @@
+next_token() ) {
+ $indent = str_pad( '', $depth * 2, ' ' );
+ $token_name = $processor->get_token_name();
+ $breadcrumbs = $processor->get_breadcrumbs();
+
+ $closer = $processor->is_tag_closer() ? '/' : '';
+ if ( 0 === --$node_count ) {
+ $node_count = 8;
+ echo "\n";
+ }
+ echo "\e[36m{$closer}\e[32m{$token_name}\e[m ";
+
+ if ( $processor->is_tag_closer() ) {
+ switch ( $token_name ) {
+ case 'H1':
+ case 'H2':
+ case 'H3':
+ case 'H4':
+ case 'H5':
+ case 'H6':
+ $md .= "\n";
+ break;
+
+ case 'B':
+ case 'STRONG':
+ $md .= '*';
+ break;
+
+ case 'I':
+ case 'EM':
+ $md .= '_';
+ break;
+
+ case 'OL':
+ case 'UL':
+ --$depth;
+ array_pop( $list_items );
+ break;
+ }
+
+ // Proceed to the next token.
+ continue;
+ }
+
+ switch ( $token_name ) {
+ case '#text':
+ $md .= $processor->get_modifiable_text();
+ break;
+
+ case 'P':
+ $md .= "\n";
+ break;
+
+ case 'H1':
+ case 'H2':
+ case 'H3':
+ case 'H4':
+ case 'H5':
+ case 'H6':
+ $hash_count = intval( $token_name[1] );
+ $hashes = str_pad( '', $hash_count, '#' );
+ $md .= "\n\n{$hashes} ";
+ break;
+
+ case 'B':
+ case 'STRONG':
+ $md .= '*';
+ break;
+
+ case 'I':
+ case 'EM':
+ $md .= '_';
+ break;
+
+ case 'LI':
+ $list_item = end( $list_items );
+ $md .= "\n{$indent}{$list_item} ";
+ break;
+
+ case 'OL':
+ ++$depth;
+ $list_items[] = '*';
+ break;
+
+ case 'UL':
+ ++$depth;
+ $list_items[] = '-';
+ break;
+ }
+
+ $last_breadcrumbs = $breadcrumbs;
+ }
+
+ if ( null !== $processor->get_last_error() ) {
+ die( "Encountered unsupported HTML: failed to convert.\n" );
+ }
+
+ $closed_elements = array();
+ for ( $i = 0; $i < count( $last_breadcrumbs ); $i++ ) {
+ if (
+ isset( $last_breadcrumbs[ $i ], $breadcrumbs[ $i ] ) &&
+ $last_breadcrumbs[ $i ] === $breadcrumbs[ $i ]
+ ) {
+ continue;
+ }
+
+ $closed_elements = array_slice( $last_breadcrumbs, $i );
+ break;
+ }
+
+ $closed_elements = array_reverse( $closed_elements );
+ foreach ( $closed_elements as $element ) {
+ switch ( $element ) {
+ case 'H1':
+ case 'H2':
+ case 'H3':
+ case 'H4':
+ case 'H5':
+ case 'H6':
+ $md .= "\n";
+ break;
+
+ case 'B':
+ case 'STRONG':
+ $md .= '*';
+ break;
+
+ case 'I':
+ case 'EM':
+ $md .= '_';
+ break;
+
+ case 'OL':
+ case 'UL':
+ --$depth;
+ array_pop( $list_items );
+ break;
+ }
+ }
+
+ return $md;
+ }
+}
diff --git a/src/wp-settings.php b/src/wp-settings.php
index 9673479bfab76..e5168eaebc116 100644
--- a/src/wp-settings.php
+++ b/src/wp-settings.php
@@ -258,6 +258,7 @@
require ABSPATH . WPINC . '/html-api/class-wp-html-token.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-processor-state.php';
require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php';
+require ABSPATH . WPINC . '/html-api/class-wp-html-to-markdown-converter.php';
require ABSPATH . WPINC . '/class-wp-http.php';
require ABSPATH . WPINC . '/class-wp-http-streams.php';
require ABSPATH . WPINC . '/class-wp-http-curl.php';