From d545d4f5dfd9c77b01055a7adaab05ad76656dce Mon Sep 17 00:00:00 2001 From: Jeremy Herve Date: Thu, 3 Jan 2019 17:51:18 +0100 Subject: [PATCH] Post Images: add new method to detect images from Gutenberg blocks (#11000) * Post Images: add new method to detect images from Gutenberg blocks Fixes #10501 - This new method parses all HTML content using WP's parse_blocks. - We only select Core image blocks. - We also remove any image blocks that do not include a post ID. This is because image blocks that do not have a post ID are currently inserted using the "from image URL" option in the image block picker. As such, all the info in that block is the image URL; we have no data about image size for those images. * Use new method to get data when extracting from attachments * Unit Tests: avoid failures for WP versions that do not support GB. * Add support for core Gallery blocks * Add support for Tiled Gallery block * Return empty string when no content instead of using undefined var * Fix the behaviour of get_post_html - It should be able to return the post URL when possible. - When it does not return any post content (like an empty string), it should be handled properly. - We should fetch the post URL and not the post title. * Fixed block image retrieval to not return empty arrays. This resulted in warnings because an opengraph tag function has tried to access the 'src' attribute of an empty array. Now the block parser doesn't return an empty array in case the image data is not found or not good enough. * Fixed one more case where false results needed to be filtered out. --- class.jetpack-post-images.php | 195 +++++++++++++++---- tests/php/test_class.jetpack-post-images.php | 191 ++++++++++++++++++ 2 files changed, 348 insertions(+), 38 deletions(-) diff --git a/class.jetpack-post-images.php b/class.jetpack-post-images.php index cccc4b8af20a2..02260616d6f89 100644 --- a/class.jetpack-post-images.php +++ b/class.jetpack-post-images.php @@ -56,17 +56,17 @@ static function from_slideshow( $post_id, $width = 200, $height = 200 ) { foreach ( $post_images as $post_image ) { if ( !$post_image_id = absint( $post_image->id ) ) continue; - + $meta = wp_get_attachment_metadata( $post_image_id ); - + // Must be larger than 200x200 (or user-specified) if ( !isset( $meta['width'] ) || $meta['width'] < $width ) continue; if ( !isset( $meta['height'] ) || $meta['height'] < $height ) continue; - + $url = wp_get_attachment_url( $post_image_id ); - + $images[] = array( 'type' => 'image', 'from' => 'slideshow', @@ -187,23 +187,10 @@ static function from_attachment( $post_id, $width = 200, $height = 200 ) { $permalink = get_permalink( $post_id ); foreach ( $post_images as $post_image ) { - $meta = wp_get_attachment_metadata( $post_image->ID ); - // Must be larger than 200x200 - if ( !isset( $meta['width'] ) || $meta['width'] < $width ) - continue; - if ( !isset( $meta['height'] ) || $meta['height'] < $height ) - continue; - - $url = wp_get_attachment_url( $post_image->ID ); - - $images[] = array( - 'type' => 'image', - 'from' => 'attachment', - 'src' => $url, - 'src_width' => $meta['width'], - 'src_height' => $meta['height'], - 'href' => $permalink, - ); + $current_image = self::get_attachment_data( $post_image->ID, $permalink, $width, $height ); + if ( false !== $current_image ) { + $images[] = $current_image; + } } /* @@ -319,6 +306,70 @@ static function from_thumbnail( $post_id, $width = 200, $height = 200 ) { return $images; } + /** + * Get images from Gutenberg Image blocks. + * + * @since 6.9.0 + * + * @param mixed $html_or_id The HTML string to parse for images, or a post id. + * @param int $width Minimum Image width. + * @param int $height Minimum Image height. + */ + public static function from_blocks( $html_or_id, $width = 200, $height = 200 ) { + $images = array(); + + // Bail early if the site does not support the block editor. + if ( ! function_exists( 'parse_blocks' ) ) { + return $images; + } + + $html_info = self::get_post_html( $html_or_id ); + + if ( empty( $html_info['html'] ) ) { + return $images; + } + + // Look for block information in the HTML. + $blocks = parse_blocks( $html_info['html'] ); + if ( empty( $blocks ) ) { + return $images; + } + + foreach ( $blocks as $block ) { + /** + * Parse content from Core Image blocks. + * If it is an image block for an image hosted on our site, it will have an ID. + * If it does not have an ID, let `from_html` parse that content later, + * and extract an image if it has size parameters. + */ + if ( + 'core/image' === $block['blockName'] + && ! empty( $block['attrs']['id'] ) + ) { + $images[] = self::get_attachment_data( $block['attrs']['id'], $html_info['post_url'], $width, $height ); + } + + /** + * Parse content from Core Gallery blocks and Jetpack's Tiled Gallery blocks. + * Gallery blocks include the ID of each one of the images in the gallery. + */ + if ( + ( 'core/gallery' === $block['blockName'] || 'jetpack/tiled-gallery' === $block['blockName'] ) + && ! empty( $block['attrs']['ids'] ) + ) { + foreach ( $block['attrs']['ids'] as $img_id ) { + $images[] = self::get_attachment_data( $img_id, $html_info['post_url'], $width, $height ); + } + } + } + + /** + * Returning a filtered array because get_attachment_data returns false + * for unsuccessful attempts. + */ + return array_filter( $images ); + } + /** * Very raw -- just parse the HTML and pull out any/all img tags and return their src * @@ -333,19 +384,9 @@ static function from_thumbnail( $post_id, $width = 200, $height = 200 ) { static function from_html( $html_or_id, $width = 200, $height = 200 ) { $images = array(); - if ( is_numeric( $html_or_id ) ) { - $post = get_post( $html_or_id ); + $html_info = self::get_post_html( $html_or_id ); - if ( empty( $post ) || ! empty( $post->post_password ) ) { - return $images; - } - - $html = $post->post_content; // DO NOT apply the_content filters here, it will cause loops. - } else { - $html = $html_or_id; - } - - if ( ! $html ) { + if ( empty( $html_info['html'] ) ) { return $images; } @@ -360,7 +401,7 @@ static function from_html( $html_or_id, $width = 200, $height = 200 ) { // The @ is not enough to suppress errors when dealing with libxml, // we have to tell it directly how we want to handle errors. libxml_use_internal_errors( true ); - @$dom_doc->loadHTML( $html ); + @$dom_doc->loadHTML( $html_info['html'] ); libxml_use_internal_errors( false ); $image_tags = $dom_doc->getElementsByTagName( 'img' ); @@ -409,12 +450,12 @@ static function from_html( $html_or_id, $width = 200, $height = 200 ) { } $images[] = array( - 'type' => 'image', - 'from' => 'html', - 'src' => $img_src, + 'type' => 'image', + 'from' => 'html', + 'src' => $img_src, 'src_width' => $meta['width'], 'src_height' => $meta['height'], - 'href' => '', // No link to apply to these. Might potentially parse for that as well, but not for now. + 'href' => $html_info['post_url'], ); } return $images; @@ -568,6 +609,7 @@ static function get_images( $post_id, $args = array() ) { 'from_slideshow' => true, 'from_gallery' => true, 'from_attachment' => true, + 'from_blocks' => true, 'from_html' => true, 'html_content' => '' // HTML string to pass to from_html() @@ -583,6 +625,12 @@ static function get_images( $post_id, $args = array() ) { $media = self::from_gallery( $post_id ); if ( !$media && $args['from_attachment'] ) $media = self::from_attachment( $post_id, $args['width'], $args['height'] ); + if ( ! $media && $args['from_blocks'] ) { + if ( empty( $args['html_content'] ) ) + $media = self::from_blocks( $post_id, $args['width'], $args['height'] ); // Use the post_id, which will load the content + else + $media = self::from_blocks( $args['html_content'], $args['width'], $args['height'] ); // If html_content is provided, use that + } if ( !$media && $args['from_html'] ) { if ( empty( $args['html_content'] ) ) $media = self::from_html( $post_id, $args['width'], $args['height'] ); // Use the post_id, which will load the content @@ -653,4 +701,75 @@ static function fit_image_url( $src, $width, $height ) { // Arg... no way to resize image using WordPress.com infrastructure! return $src; } + + /** + * Get HTML from given post content. + * + * @since 6.9.0 + * + * @param mixed $html_or_id The HTML string to parse for images, or a post id. + * + * @return array $html_info { + * @type string $html Post content. + * @type string $post_url Post URL. + * } + */ + static function get_post_html( $html_or_id ) { + if ( is_numeric( $html_or_id ) ) { + $post = get_post( $html_or_id ); + + if ( empty( $post ) || ! empty( $post->post_password ) ) { + return ''; + } + + $html_info = array( + 'html' => $post->post_content, // DO NOT apply the_content filters here, it will cause loops. + 'post_url' => get_permalink( $post->ID ), + ); + } else { + $html_info = array( + 'html' => $html_or_id, + 'post_url' => '', + ); + } + return $html_info; + } + + /** + * Get info about a WordPress attachment. + * + * @since 6.9.0 + * + * @param int $attachment_id Attachment ID. + * @param string $post_url URL of the post, if we have one. + * @param int $width Minimum Image width. + * @param int $height Minimum Image height. + * @return array|bool Image data or false if unavailable. + */ + public static function get_attachment_data( $attachment_id, $post_url = '', $width, $height ) { + if ( empty( $attachment_id ) ) { + return false; + } + + $meta = wp_get_attachment_metadata( $attachment_id ); + + // The image must be larger than 200x200. + if ( ! isset( $meta['width'] ) || $meta['width'] < $width ) { + return false; + } + if ( ! isset( $meta['height'] ) || $meta['height'] < $height ) { + return false; + } + + $url = wp_get_attachment_url( $attachment_id ); + + return array( + 'type' => 'image', + 'from' => 'attachment', + 'src' => $url, + 'src_width' => $meta['width'], + 'src_height' => $meta['height'], + 'href' => $post_url, + ); + } } diff --git a/tests/php/test_class.jetpack-post-images.php b/tests/php/test_class.jetpack-post-images.php index 10e4e2e7c1335..e9b93629174c9 100644 --- a/tests/php/test_class.jetpack-post-images.php +++ b/tests/php/test_class.jetpack-post-images.php @@ -91,4 +91,195 @@ public function test_from_attachment_is_correct_array() { $this->assertEquals( $images[ 0 ][ 'src' ], $img_url ); } + /** + * Create a post with an image block containing a large image attached to another post. + * + * @since 6.9.0 + * + * @return array $post_info { + * An array of information about our post. + * @type int $post_id Post ID. + * @type string $img_url Image URL we'll look to extract. + * } + */ + protected function get_post_with_image_block() { + $img_name = 'image.jpg'; + $img_url = 'http://' . WP_TESTS_DOMAIN . '/wp-content/uploads/' . $img_name; + $img_dimensions = array( 'width' => 250, 'height' => 250 ); + + $post_id = $this->factory->post->create(); + $attachment_id = $this->factory->attachment->create_object( $img_name, $post_id, array( + 'post_mime_type' => 'image/jpeg', + 'post_type' => 'attachment' + ) ); + wp_update_attachment_metadata( $attachment_id, $img_dimensions ); + + // Create another post with that picture. + $post_html = sprintf( + '
', + $img_url, + $attachment_id + ); + $second_post_id = $this->factory->post->create( array( + 'post_content' => $post_html, + ) ); + + return array( + 'post_id' => $second_post_id, + 'img_url' => $img_url, + ); + } + + /** + * Test if an array of images can be extracted from Image blocks in the new block editor. + * + * @covers Jetpack_PostImages::from_blocks + * @since 6.9.0 + */ + public function test_from_image_block_from_post_id_is_array() { + if ( ! function_exists( 'parse_blocks' ) ) { + $this->markTestSkipped( 'parse_blocks not available. Block editor not available' ); + return; + } + + $post_info = $this->get_post_with_image_block(); + + $images = Jetpack_PostImages::from_blocks( $post_info['post_id'] ); + + $this->assertEquals( count( $images ), 1 ); + } + + /** + * Test if the array extracted from Image blocks include the image URL. + * + * @covers Jetpack_PostImages::from_blocks + * @since 6.9.0 + */ + public function test_from_image_block_from_post_id_is_correct_array() { + if ( ! function_exists( 'parse_blocks' ) ) { + $this->markTestSkipped( 'parse_blocks not available. Block editor not available' ); + return; + } + + $post_info = $this->get_post_with_image_block(); + + $images = Jetpack_PostImages::from_blocks( $post_info['post_id'] ); + + $this->assertEquals( $images[ 0 ][ 'src' ], $post_info['img_url'] ); + } + + /** + * Test if an image block with an externally hosted image is not extracted by Post Images. + * + * @covers Jetpack_PostImages::from_blocks + * @since 6.9.0 + */ + public function test_from_image_block_from_html_is_empty_array() { + if ( ! function_exists( 'parse_blocks' ) ) { + $this->markTestSkipped( 'parse_blocks not available. Block editor not available' ); + return; + } + + $html = '
'; + + $images = Jetpack_PostImages::from_blocks( $html ); + + $this->assertEmpty( $images ); + } + + /** + * Create a post with a gallery block containing a few images attached to another post. + * + * @since 6.9.0 + * + * @return array $post_info { + * An array of information about our post. + * @type int $post_id Post ID. + * @type array $img_urls Image URLs we'll look to extract. + * } + */ + protected function get_post_with_gallery_block() { + $img_urls = array( + 'image.jpg' => 'http://' . WP_TESTS_DOMAIN . '/wp-content/uploads/image.jpg', + 'image2.jpg' => 'http://' . WP_TESTS_DOMAIN . '/wp-content/uploads/image2.jpg', + ); + $img_dimensions = array( 'width' => 250, 'height' => 250 ); + + // Create post. + $post_id = $this->factory->post->create(); + // Attach images. + foreach( $img_urls as $img_name => $img_url ) { + $attachment_id = $this->factory->attachment->create_object( $img_name, $post_id, array( + 'post_mime_type' => 'image/jpeg', + 'post_type' => 'attachment' + ) ); + wp_update_attachment_metadata( $attachment_id, $img_dimensions ); + + // Update our array to store attachment IDs. We'll need them later. + $img_urls[ $attachment_id ] = $img_url; + unset( $img_urls[ $img_name ] ); + } + + // Gallery markup. + $gallery_html = sprintf( + ''; + + // Create another post with those pictures. + $second_post_id = $this->factory->post->create( array( + 'post_content' => $gallery_html, + ) ); + + return array( + 'post_id' => $second_post_id, + 'img_urls' => array_values( $img_urls ), + ); + } + + /** + * Test if the array extracted from Gallery blocks include the image URL. + * + * @covers Jetpack_PostImages::from_blocks + * @since 6.9.0 + */ + public function test_from_gallery_block_from_post_id_is_correct_array() { + if ( ! function_exists( 'parse_blocks' ) ) { + $this->markTestSkipped( 'parse_blocks not available. Block editor not available' ); + return; + } + + $post_info = $this->get_post_with_gallery_block(); + + $images = Jetpack_PostImages::from_blocks( $post_info['post_id'] ); + + $this->assertEquals( $images[0]['src'], $post_info['img_urls'][0] ); + $this->assertEquals( $images[1]['src'], $post_info['img_urls'][1] ); + } + + /** + * Test if the array extracted from Gallery blocks include the image URL. + * + * @covers Jetpack_PostImages::get_attachment_data + * @since 6.9.0 + */ + public function test_get_attachment_data_returns_false_on_unavailable_data() { + $this->assertEquals( false, Jetpack_PostImages::get_attachment_data( PHP_INT_MAX, '', 200, 200 ) ); + + $post = $this->get_post_with_image_block(); + + // Testing the height condition. + $this->assertEquals( false, Jetpack_PostImages::get_attachment_data( $post['post_id'], '', 200, PHP_INT_MAX ) ); + + // Testing the width condition. + $this->assertEquals( false, Jetpack_PostImages::get_attachment_data( $post['post_id'], '', PHP_INT_MAX, 200 ) ); + } } // end class