diff --git a/src/wp-includes/html-api/class-wp-html-open-elements.php b/src/wp-includes/html-api/class-wp-html-open-elements.php index 1234abcb9dfe4..fb3d98912fbf2 100644 --- a/src/wp-includes/html-api/class-wp-html-open-elements.php +++ b/src/wp-includes/html-api/class-wp-html-open-elements.php @@ -51,6 +51,18 @@ class WP_HTML_Open_Elements { */ private $has_p_in_button_scope = false; + private $pop_handler = null; + + private $push_handler = null; + + public function add_pop_handler( Closure $handler ) { + $this->pop_handler = $handler; + } + + public function add_push_handler( Closure $handler ) { + $this->push_handler = $handler; + } + /** * Reports if a specific node is in the stack of open elements. * @@ -429,6 +441,10 @@ public function after_element_push( $item ) { $this->has_p_in_button_scope = true; break; } + + if ( null !== $this->push_handler ) { + ( $this->push_handler )( $item ); + } } /** @@ -458,5 +474,9 @@ public function after_element_pop( $item ) { $this->has_p_in_button_scope = $this->has_element_in_button_scope( 'P' ); break; } + + if ( null !== $this->pop_handler ) { + ( $this->pop_handler )( $item ); + } } } diff --git a/src/wp-includes/html-api/class-wp-html-processor.php b/src/wp-includes/html-api/class-wp-html-processor.php index c76cc192b12a4..4bf0a10343a17 100644 --- a/src/wp-includes/html-api/class-wp-html-processor.php +++ b/src/wp-includes/html-api/class-wp-html-processor.php @@ -7,6 +7,16 @@ * @since 6.4.0 */ +class WP_HTML_Element_Operation { + public $token; + public $operation; + + public function __construct( $token, $operation ) { + $this->token = $token; + $this->operation = $operation; + } +} + /** * Core class used to safely parse and modify an HTML document. * @@ -201,6 +211,11 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor { */ private $release_internal_bookmark_on_destruct = null; + private $element_queue = array(); + + /** @var WP_HTML_Element_Operation */ + private $current_element = null; + /* * Public Interface Functions */ @@ -299,6 +314,14 @@ public function __construct( $html, $use_the_static_create_methods_instead = nul $this->state = new WP_HTML_Processor_State(); + $this->state->stack_of_open_elements->add_push_handler( function ( WP_HTML_Token $token ) { + $this->element_queue[] = new WP_HTML_Element_Operation( $token, 'open' ); + } ); + + $this->state->stack_of_open_elements->add_pop_handler( function ( WP_HTML_Token $token ) { + $this->element_queue[] = new WP_HTML_Element_Operation( $token, 'close' ); + } ); + /* * Create this wrapper so that it's possible to pass * a private method into WP_HTML_Token classes without @@ -365,7 +388,7 @@ public function next_tag( $query = null ) { continue; } - if ( ! $this->is_tag_closer() ) { + if ( ! parent::is_tag_closer() ) { return true; } } @@ -392,7 +415,7 @@ public function next_tag( $query = null ) { continue; } - if ( ! $this->is_tag_closer() ) { + if ( ! parent::is_tag_closer() ) { return true; } } @@ -440,7 +463,22 @@ public function next_tag( $query = null ) { * @return bool */ public function next_token() { - return $this->step(); + $this->current_element = null; + + if ( 0 === count( $this->element_queue ) && ! $this->step() ) { + while ( $this->state->stack_of_open_elements->pop() ) { + continue; + } + } + + $this->current_element = array_shift( $this->element_queue ); + return null !== $this->current_element; + } + + public function is_tag_closer() { + return isset( $this->current_element ) + ? ( 'close' === $this->current_element->operation ) + : parent::is_tag_closer(); } /** @@ -629,7 +667,7 @@ public function get_breadcrumbs() { private function step_in_body() { $token_name = $this->get_token_name(); $token_type = $this->get_token_type(); - $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; + $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; $op = "{$op_sigil}{$token_name}"; switch ( $op ) { @@ -1152,7 +1190,7 @@ private function step_in_body() { throw new WP_HTML_Unsupported_Exception( "Cannot process {$token_name} element." ); } - if ( ! $this->is_tag_closer() ) { + if ( ! parent::is_tag_closer() ) { /* * > Any other start tag */ @@ -1248,6 +1286,10 @@ public function get_tag() { return null; } + if ( isset( $this->current_element ) ) { + return $this->current_element->token->node_name; + } + $tag_name = parent::get_tag(); switch ( $tag_name ) { @@ -1263,6 +1305,14 @@ public function get_tag() { } } + public function get_token_name() { + if ( isset( $this->current_element ) ) { + return $this->current_element->token->node_name; + } + + return parent::get_token_name(); + } + /** * Removes a bookmark that is no longer needed. * diff --git a/src/wp-includes/html-api/class-wp-html-to-markdown-converter.php b/src/wp-includes/html-api/class-wp-html-to-markdown-converter.php new file mode 100644 index 0000000000000..961f35540e7ac --- /dev/null +++ b/src/wp-includes/html-api/class-wp-html-to-markdown-converter.php @@ -0,0 +1,155 @@ +next_token() ) { + $indent = str_pad( '', $depth * 2, ' ' ); + $token_name = $processor->get_token_name(); + $breadcrumbs = $processor->get_breadcrumbs(); + + $closer = $processor->is_tag_closer() ? '/' : ''; + if ( 0 === --$node_count ) { + $node_count = 8; + echo "\n"; + } + echo "\e[36m{$closer}\e[32m{$token_name}\e[m "; + + if ( $processor->is_tag_closer() ) { + switch ( $token_name ) { + case 'H1': + case 'H2': + case 'H3': + case 'H4': + case 'H5': + case 'H6': + $md .= "\n"; + break; + + case 'B': + case 'STRONG': + $md .= '*'; + break; + + case 'I': + case 'EM': + $md .= '_'; + break; + + case 'OL': + case 'UL': + --$depth; + array_pop( $list_items ); + break; + } + + // Proceed to the next token. + continue; + } + + switch ( $token_name ) { + case '#text': + $md .= $processor->get_modifiable_text(); + break; + + case 'P': + $md .= "\n"; + break; + + case 'H1': + case 'H2': + case 'H3': + case 'H4': + case 'H5': + case 'H6': + $hash_count = intval( $token_name[1] ); + $hashes = str_pad( '', $hash_count, '#' ); + $md .= "\n\n{$hashes} "; + break; + + case 'B': + case 'STRONG': + $md .= '*'; + break; + + case 'I': + case 'EM': + $md .= '_'; + break; + + case 'LI': + $list_item = end( $list_items ); + $md .= "\n{$indent}{$list_item} "; + break; + + case 'OL': + ++$depth; + $list_items[] = '*'; + break; + + case 'UL': + ++$depth; + $list_items[] = '-'; + break; + } + + $last_breadcrumbs = $breadcrumbs; + } + + if ( null !== $processor->get_last_error() ) { + die( "Encountered unsupported HTML: failed to convert.\n" ); + } + + $closed_elements = array(); + for ( $i = 0; $i < count( $last_breadcrumbs ); $i++ ) { + if ( + isset( $last_breadcrumbs[ $i ], $breadcrumbs[ $i ] ) && + $last_breadcrumbs[ $i ] === $breadcrumbs[ $i ] + ) { + continue; + } + + $closed_elements = array_slice( $last_breadcrumbs, $i ); + break; + } + + $closed_elements = array_reverse( $closed_elements ); + foreach ( $closed_elements as $element ) { + switch ( $element ) { + case 'H1': + case 'H2': + case 'H3': + case 'H4': + case 'H5': + case 'H6': + $md .= "\n"; + break; + + case 'B': + case 'STRONG': + $md .= '*'; + break; + + case 'I': + case 'EM': + $md .= '_'; + break; + + case 'OL': + case 'UL': + --$depth; + array_pop( $list_items ); + break; + } + } + + return $md; + } +} diff --git a/src/wp-settings.php b/src/wp-settings.php index 9673479bfab76..e5168eaebc116 100644 --- a/src/wp-settings.php +++ b/src/wp-settings.php @@ -258,6 +258,7 @@ require ABSPATH . WPINC . '/html-api/class-wp-html-token.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor-state.php'; require ABSPATH . WPINC . '/html-api/class-wp-html-processor.php'; +require ABSPATH . WPINC . '/html-api/class-wp-html-to-markdown-converter.php'; require ABSPATH . WPINC . '/class-wp-http.php'; require ABSPATH . WPINC . '/class-wp-http-streams.php'; require ABSPATH . WPINC . '/class-wp-http-curl.php';