Skip to content

Commit

Permalink
Post Images: add new method to detect images from Gutenberg blocks (#…
Browse files Browse the repository at this point in the history
…11000)

* Post Images: add new method to detect images from Gutenberg blocks

Fixes #10501

- This new method parses all HTML content using WP's parse_blocks.
- We only select Core image blocks.
- We also remove any image blocks that do not include a post ID. This is because image
blocks that do not have a post ID are currently inserted using the "from image URL"
option in the image block picker. As such, all the info in that block is the image URL;
we have no data about image size for those images.


* Use new method to get data when extracting from attachments

* Unit Tests: avoid failures for WP versions that do not support GB.

* Add support for core Gallery blocks

* Add support for Tiled Gallery block

* Return empty string when no content instead of using undefined var

* Fix the behaviour of get_post_html

- It should be able to return the post URL when possible.
- When it does not return any post content (like an empty string), it should be handled
properly.
- We should fetch the post URL and not the post title.

* Fixed block image retrieval to not return empty arrays.
  This resulted in warnings because an opengraph tag function has tried to access the 'src' attribute of an empty array. Now the block parser doesn't return an empty array in case the image data is not found or not good enough.

* Fixed one more case where false results needed to be filtered out.
  • Loading branch information
jeherve authored and oskosk committed Jan 3, 2019
1 parent 6189a00 commit d545d4f
Show file tree
Hide file tree
Showing 2 changed files with 348 additions and 38 deletions.
195 changes: 157 additions & 38 deletions class.jetpack-post-images.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,17 @@ static function from_slideshow( $post_id, $width = 200, $height = 200 ) {
foreach ( $post_images as $post_image ) {
if ( !$post_image_id = absint( $post_image->id ) )
continue;

$meta = wp_get_attachment_metadata( $post_image_id );

// Must be larger than 200x200 (or user-specified)
if ( !isset( $meta['width'] ) || $meta['width'] < $width )
continue;
if ( !isset( $meta['height'] ) || $meta['height'] < $height )
continue;

$url = wp_get_attachment_url( $post_image_id );

$images[] = array(
'type' => 'image',
'from' => 'slideshow',
Expand Down Expand Up @@ -187,23 +187,10 @@ static function from_attachment( $post_id, $width = 200, $height = 200 ) {
$permalink = get_permalink( $post_id );

foreach ( $post_images as $post_image ) {
$meta = wp_get_attachment_metadata( $post_image->ID );
// Must be larger than 200x200
if ( !isset( $meta['width'] ) || $meta['width'] < $width )
continue;
if ( !isset( $meta['height'] ) || $meta['height'] < $height )
continue;

$url = wp_get_attachment_url( $post_image->ID );

$images[] = array(
'type' => 'image',
'from' => 'attachment',
'src' => $url,
'src_width' => $meta['width'],
'src_height' => $meta['height'],
'href' => $permalink,
);
$current_image = self::get_attachment_data( $post_image->ID, $permalink, $width, $height );
if ( false !== $current_image ) {
$images[] = $current_image;
}
}

/*
Expand Down Expand Up @@ -319,6 +306,70 @@ static function from_thumbnail( $post_id, $width = 200, $height = 200 ) {
return $images;
}

/**
* Get images from Gutenberg Image blocks.
*
* @since 6.9.0
*
* @param mixed $html_or_id The HTML string to parse for images, or a post id.
* @param int $width Minimum Image width.
* @param int $height Minimum Image height.
*/
public static function from_blocks( $html_or_id, $width = 200, $height = 200 ) {
$images = array();

// Bail early if the site does not support the block editor.
if ( ! function_exists( 'parse_blocks' ) ) {
return $images;
}

$html_info = self::get_post_html( $html_or_id );

if ( empty( $html_info['html'] ) ) {
return $images;
}

// Look for block information in the HTML.
$blocks = parse_blocks( $html_info['html'] );
if ( empty( $blocks ) ) {
return $images;
}

foreach ( $blocks as $block ) {
/**
* Parse content from Core Image blocks.
* If it is an image block for an image hosted on our site, it will have an ID.
* If it does not have an ID, let `from_html` parse that content later,
* and extract an image if it has size parameters.
*/
if (
'core/image' === $block['blockName']
&& ! empty( $block['attrs']['id'] )
) {
$images[] = self::get_attachment_data( $block['attrs']['id'], $html_info['post_url'], $width, $height );
}

/**
* Parse content from Core Gallery blocks and Jetpack's Tiled Gallery blocks.
* Gallery blocks include the ID of each one of the images in the gallery.
*/
if (
( 'core/gallery' === $block['blockName'] || 'jetpack/tiled-gallery' === $block['blockName'] )
&& ! empty( $block['attrs']['ids'] )
) {
foreach ( $block['attrs']['ids'] as $img_id ) {
$images[] = self::get_attachment_data( $img_id, $html_info['post_url'], $width, $height );
}
}
}

/**
* Returning a filtered array because get_attachment_data returns false
* for unsuccessful attempts.
*/
return array_filter( $images );
}

/**
* Very raw -- just parse the HTML and pull out any/all img tags and return their src
*
Expand All @@ -333,19 +384,9 @@ static function from_thumbnail( $post_id, $width = 200, $height = 200 ) {
static function from_html( $html_or_id, $width = 200, $height = 200 ) {
$images = array();

if ( is_numeric( $html_or_id ) ) {
$post = get_post( $html_or_id );
$html_info = self::get_post_html( $html_or_id );

if ( empty( $post ) || ! empty( $post->post_password ) ) {
return $images;
}

$html = $post->post_content; // DO NOT apply the_content filters here, it will cause loops.
} else {
$html = $html_or_id;
}

if ( ! $html ) {
if ( empty( $html_info['html'] ) ) {
return $images;
}

Expand All @@ -360,7 +401,7 @@ static function from_html( $html_or_id, $width = 200, $height = 200 ) {
// The @ is not enough to suppress errors when dealing with libxml,
// we have to tell it directly how we want to handle errors.
libxml_use_internal_errors( true );
@$dom_doc->loadHTML( $html );
@$dom_doc->loadHTML( $html_info['html'] );
libxml_use_internal_errors( false );

$image_tags = $dom_doc->getElementsByTagName( 'img' );
Expand Down Expand Up @@ -409,12 +450,12 @@ static function from_html( $html_or_id, $width = 200, $height = 200 ) {
}

$images[] = array(
'type' => 'image',
'from' => 'html',
'src' => $img_src,
'type' => 'image',
'from' => 'html',
'src' => $img_src,
'src_width' => $meta['width'],
'src_height' => $meta['height'],
'href' => '', // No link to apply to these. Might potentially parse for that as well, but not for now.
'href' => $html_info['post_url'],
);
}
return $images;
Expand Down Expand Up @@ -568,6 +609,7 @@ static function get_images( $post_id, $args = array() ) {
'from_slideshow' => true,
'from_gallery' => true,
'from_attachment' => true,
'from_blocks' => true,
'from_html' => true,

'html_content' => '' // HTML string to pass to from_html()
Expand All @@ -583,6 +625,12 @@ static function get_images( $post_id, $args = array() ) {
$media = self::from_gallery( $post_id );
if ( !$media && $args['from_attachment'] )
$media = self::from_attachment( $post_id, $args['width'], $args['height'] );
if ( ! $media && $args['from_blocks'] ) {
if ( empty( $args['html_content'] ) )
$media = self::from_blocks( $post_id, $args['width'], $args['height'] ); // Use the post_id, which will load the content
else
$media = self::from_blocks( $args['html_content'], $args['width'], $args['height'] ); // If html_content is provided, use that
}
if ( !$media && $args['from_html'] ) {
if ( empty( $args['html_content'] ) )
$media = self::from_html( $post_id, $args['width'], $args['height'] ); // Use the post_id, which will load the content
Expand Down Expand Up @@ -653,4 +701,75 @@ static function fit_image_url( $src, $width, $height ) {
// Arg... no way to resize image using WordPress.com infrastructure!
return $src;
}

/**
* Get HTML from given post content.
*
* @since 6.9.0
*
* @param mixed $html_or_id The HTML string to parse for images, or a post id.
*
* @return array $html_info {
* @type string $html Post content.
* @type string $post_url Post URL.
* }
*/
static function get_post_html( $html_or_id ) {
if ( is_numeric( $html_or_id ) ) {
$post = get_post( $html_or_id );

if ( empty( $post ) || ! empty( $post->post_password ) ) {
return '';
}

$html_info = array(
'html' => $post->post_content, // DO NOT apply the_content filters here, it will cause loops.
'post_url' => get_permalink( $post->ID ),
);
} else {
$html_info = array(
'html' => $html_or_id,
'post_url' => '',
);
}
return $html_info;
}

/**
* Get info about a WordPress attachment.
*
* @since 6.9.0
*
* @param int $attachment_id Attachment ID.
* @param string $post_url URL of the post, if we have one.
* @param int $width Minimum Image width.
* @param int $height Minimum Image height.
* @return array|bool Image data or false if unavailable.
*/
public static function get_attachment_data( $attachment_id, $post_url = '', $width, $height ) {
if ( empty( $attachment_id ) ) {
return false;
}

$meta = wp_get_attachment_metadata( $attachment_id );

// The image must be larger than 200x200.
if ( ! isset( $meta['width'] ) || $meta['width'] < $width ) {
return false;
}
if ( ! isset( $meta['height'] ) || $meta['height'] < $height ) {
return false;
}

$url = wp_get_attachment_url( $attachment_id );

return array(
'type' => 'image',
'from' => 'attachment',
'src' => $url,
'src_width' => $meta['width'],
'src_height' => $meta['height'],
'href' => $post_url,
);
}
}
Loading

0 comments on commit d545d4f

Please sign in to comment.