diff --git a/includes/amp-helper-functions.php b/includes/amp-helper-functions.php index a408add9e57..b06a2bbebbc 100644 --- a/includes/amp-helper-functions.php +++ b/includes/amp-helper-functions.php @@ -293,7 +293,7 @@ function amp_get_content_sanitizers( $post = null ) { * @param array $handlers Handlers. * @param WP_Post $post Post. Deprecated. */ - return apply_filters( 'amp_content_sanitizers', + $sanitizers = apply_filters( 'amp_content_sanitizers', array( 'AMP_Img_Sanitizer' => array(), 'AMP_Form_Sanitizer' => array(), @@ -309,6 +309,17 @@ function amp_get_content_sanitizers( $post = null ) { ), $post ); + + // Force style sanitizer and whitelist sanitizer to be at end. + foreach ( array( 'AMP_Style_Sanitizer', 'AMP_Tag_And_Attribute_Sanitizer' ) as $class_name ) { + if ( isset( $sanitizers[ $class_name ] ) ) { + $sanitizer = $sanitizers[ $class_name ]; + unset( $sanitizers[ $class_name ] ); + $sanitizers[ $class_name ] = $sanitizer; + } + } + + return $sanitizers; } /** diff --git a/includes/class-amp-autoloader.php b/includes/class-amp-autoloader.php index f9dfb3dc175..16880ed1325 100644 --- a/includes/class-amp-autoloader.php +++ b/includes/class-amp-autoloader.php @@ -31,7 +31,6 @@ class AMP_Autoloader { private static $_classmap = array( 'AMP_Theme_Support' => 'includes/class-amp-theme-support', 'AMP_Comment_Walker' => 'includes/class-amp-comment-walker', - 'AMP_WP_Styles' => 'includes/class-amp-wp-styles', 'AMP_Template_Customizer' => 'includes/admin/class-amp-customizer', 'AMP_Post_Meta_Box' => 'includes/admin/class-amp-post-meta-box', 'AMP_Post_Type_Support' => 'includes/class-amp-post-type-support', diff --git a/includes/class-amp-theme-support.php b/includes/class-amp-theme-support.php index ffae0b69ea0..9a71c112404 100644 --- a/includes/class-amp-theme-support.php +++ b/includes/class-amp-theme-support.php @@ -19,13 +19,6 @@ class AMP_Theme_Support { */ const SCRIPTS_PLACEHOLDER = ''; - /** - * Replaced with the necessary styles. - * - * @var string - */ - const STYLES_PLACEHOLDER = ''; - /** * Sanitizer classes. * @@ -152,16 +145,11 @@ public static function register_hooks() { // Remove core actions which are invalid AMP. remove_action( 'wp_head', 'wp_post_preview_js', 1 ); - remove_action( 'wp_head', 'locale_stylesheet' ); // Replaced below in add_amp_styles_placeholder() method. remove_action( 'wp_head', 'print_emoji_detection_script', 7 ); - remove_action( 'wp_head', 'wp_print_styles', 8 ); // Replaced below in add_amp_styles_placeholder() method. remove_action( 'wp_head', 'wp_print_head_scripts', 9 ); - remove_action( 'wp_head', 'wp_custom_css_cb', 101 ); // Replaced below in add_amp_styles_placeholder() method. remove_action( 'wp_footer', 'wp_print_footer_scripts', 20 ); remove_action( 'wp_print_styles', 'print_emoji_styles' ); - add_action( 'wp_enqueue_scripts', array( __CLASS__, 'override_wp_styles' ), -1 ); - /* * Add additional markup required by AMP . * Note that the meta[name=viewport] is not added here because a theme may want to define one with additional @@ -171,8 +159,8 @@ public static function register_hooks() { * in this case too we should defer to the theme as well to output the meta charset because it is possible the * install is not on utf-8 and we may need to do a encoding conversion. */ - add_action( 'wp_head', array( __CLASS__, 'add_amp_styles_placeholder' ), 8 ); // Because wp_print_styles() normally happens at 8. add_action( 'wp_head', array( __CLASS__, 'add_amp_component_scripts' ), 10 ); + add_action( 'wp_head', array( __CLASS__, 'print_amp_styles' ) ); add_action( 'wp_head', 'amp_add_generator_metadata', 20 ); add_action( 'wp_head', 'amp_print_schemaorg_metadata' ); @@ -313,21 +301,6 @@ public static function register_widgets() { } } - /** - * Override $wp_styles as AMP_WP_Styles, ideally before first instantiated as WP_Styles. - * - * @see wp_styles() - * @global AMP_WP_Styles $wp_styles - * @return AMP_WP_Styles Instance. - */ - public static function override_wp_styles() { - global $wp_styles; - if ( ! ( $wp_styles instanceof AMP_WP_Styles ) ) { - $wp_styles = new AMP_WP_Styles(); // WPCS: global override ok. - } - return $wp_styles; - } - /** * Register content embed handlers. * @@ -621,53 +594,11 @@ public static function filter_cancel_comment_reply_link( $formatted_link, $link, } /** - * Print placeholder for Custom AMP styles. - * - * The actual styles for the page injected into the placeholder when output buffering is completed. - * - * @see AMP_Theme_Support::finish_output_buffering() + * Print AMP boilerplate and custom styles. */ - public static function add_amp_styles_placeholder() { - echo self::STYLES_PLACEHOLDER; // WPCS: XSS OK. - - $wp_styles = wp_styles(); - if ( ! ( $wp_styles instanceof AMP_WP_Styles ) ) { - trigger_error( esc_html__( 'wp_styles() does not return an instance of AMP_WP_Styles as required.', 'amp' ), E_USER_WARNING ); // phpcs:ignore - return; - } - - $wp_styles->do_items(); // Normally done at wp_head priority 8. - $wp_styles->do_locale_stylesheet(); // Normally done at wp_head priority 10. - $wp_styles->do_custom_css(); // Normally done at wp_head priority 101. - } - - /** - * Get AMP boilerplate and custom styles. - * - * @param string[] $stylesheets Initial stylesheets. - * @see wp_custom_css_cb() - * @return string Concatenated stylesheets. - */ - public static function get_amp_styles( $stylesheets ) { - $css = wp_styles()->print_code; - - $css .= join( $stylesheets ); - - /** - * Filters AMP custom CSS before it is injected onto the output buffer for the response. - * - * Plugins may add their own styles, such as for rendered widgets, by amending them via this filter. - * - * @since 0.7 - * - * @param string $css AMP CSS. - */ - $css = apply_filters( 'amp_custom_styles', $css ); - - $css = wp_strip_all_tags( $css ); - - return amp_get_boilerplate_code() . "\n" - . ''; + public static function print_amp_styles() { + echo amp_get_boilerplate_code() . "\n"; // WPCS: XSS OK. + echo "\n"; // This will by populated by AMP_Style_Sanitizer. } /** @@ -880,14 +811,6 @@ public static function prepare_response( $response ) { 1 ); - // Inject styles. - $response = preg_replace( - '#' . preg_quote( self::STYLES_PLACEHOLDER, '#' ) . '#', - self::get_amp_styles( $assets['stylesheets'] ), - $response, - 1 - ); - return $response; } } diff --git a/includes/class-amp-wp-styles.php b/includes/class-amp-wp-styles.php deleted file mode 100644 index c397ce4c28b..00000000000 --- a/includes/class-amp-wp-styles.php +++ /dev/null @@ -1,251 +0,0 @@ -allowed_font_src_regex = '@^(' . $spec_rule[ AMP_Rule_Spec::ATTR_SPEC_LIST ]['href']['value_regex'] . ')$@'; - break; - } - } - } - - /** - * Generates an enqueued style's fully-qualified file path. - * - * @since 0.7 - * @see WP_Styles::_css_href() - * - * @param string $src The source URL of the enqueued style. - * @param string $handle The style's registered handle. - * @return string|WP_Error Style's absolute validated filesystem path, or WP_Error when error. - */ - public function get_validated_css_file_path( $src, $handle ) { - $needs_base_url = ( - ! is_bool( $src ) - && - ! preg_match( '|^(https?:)?//|', $src ) - && - ! ( $this->content_url && 0 === strpos( $src, $this->content_url ) ) - ); - if ( $needs_base_url ) { - $src = $this->base_url . $src; - } - - /** This filter is documented in wp-includes/class.wp-styles.php */ - $src = apply_filters( 'style_loader_src', $src, $handle ); - $src = esc_url_raw( $src ); - - // Strip query and fragment from URL. - $src = preg_replace( ':[\?#].*$:', '', $src ); - - if ( ! preg_match( '/\.(css|less|scss|sass)$/i', $src ) ) { - /* translators: %1$s is stylesheet handle, %2$s is stylesheet URL */ - return new WP_Error( 'amp_css_bad_file_extension', sprintf( __( 'Skipped stylesheet %1$s which does not have recognized CSS file extension (%2$s).', 'amp' ), $handle, $src ) ); - } - - $includes_url = includes_url( '/' ); - $content_url = content_url( '/' ); - $admin_url = get_admin_url( null, '/' ); - $css_path = null; - if ( 0 === strpos( $src, $content_url ) ) { - $css_path = WP_CONTENT_DIR . substr( $src, strlen( $content_url ) - 1 ); - } elseif ( 0 === strpos( $src, $includes_url ) ) { - $css_path = ABSPATH . WPINC . substr( $src, strlen( $includes_url ) - 1 ); - } elseif ( 0 === strpos( $src, $admin_url ) ) { - $css_path = ABSPATH . 'wp-admin' . substr( $src, strlen( $admin_url ) - 1 ); - } - - if ( ! $css_path || false !== strpos( '../', $css_path ) || 0 !== validate_file( $css_path ) || ! file_exists( $css_path ) ) { - /* translators: %1$s is stylesheet handle, %2$s is stylesheet URL */ - return new WP_Error( 'amp_css_path_not_found', sprintf( __( 'Unable to locate filesystem path for stylesheet %1$s (%2$s).', 'amp' ), $handle, $src ) ); - } - - return $css_path; - } - - /** - * Processes a style dependency. - * - * @since 0.7 - * @see WP_Styles::do_item() - * - * @param string $handle The style's registered handle. - * @return bool True on success, false on failure. - */ - public function do_item( $handle ) { - if ( ! WP_Dependencies::do_item( $handle ) ) { - return false; - } - $obj = $this->registered[ $handle ]; - - // Conditional styles and alternate styles aren't supported in AMP. - if ( isset( $obj->extra['conditional'] ) || isset( $obj->extra['alt'] ) ) { - return false; - } - - if ( isset( $obj->args ) ) { - $media = esc_attr( $obj->args ); - } else { - $media = 'all'; - } - - // A single item may alias a set of items, by having dependencies, but no source. - if ( ! $obj->src ) { - $inline_style = $this->print_inline_style( $handle, false ); - if ( $inline_style ) { - $this->print_code .= $inline_style; - } - return true; - } - - // Allow font URLs. - if ( $this->allowed_font_src_regex && preg_match( $this->allowed_font_src_regex, $obj->src ) ) { - $this->do_concat = false; - $result = parent::do_item( $handle ); - $this->do_concat = true; - return $result; - } - - $css_file_path = $this->get_validated_css_file_path( $obj->src, $handle ); - if ( is_wp_error( $css_file_path ) ) { - trigger_error( esc_html( $css_file_path->get_error_message() ), E_USER_WARNING ); // phpcs:ignore - return false; - } - $css_rtl_file_path = ''; - - // Handle RTL styles. - if ( 'rtl' === $this->text_direction && isset( $obj->extra['rtl'] ) && $obj->extra['rtl'] ) { - if ( is_bool( $obj->extra['rtl'] ) || 'replace' === $obj->extra['rtl'] ) { - $suffix = isset( $obj->extra['suffix'] ) ? $obj->extra['suffix'] : ''; - $css_rtl_file_path = $this->get_validated_css_file_path( - str_replace( "{$suffix}.css", "-rtl{$suffix}.css", $obj->src ), - "$handle-rtl" - ); - } else { - $css_rtl_file_path = $this->get_validated_css_file_path( $obj->extra['rtl'], "$handle-rtl" ); - } - - if ( is_wp_error( $css_rtl_file_path ) ) { - trigger_error( esc_html( $css_rtl_file_path->get_error_message() ), E_USER_WARNING ); // phpcs:ignore - $css_rtl_file_path = null; - } elseif ( 'replace' === $obj->extra['rtl'] ) { - $css_file_path = null; - } - } - - // Load the CSS from the filesystem. - foreach ( array_filter( array( $css_file_path, $css_rtl_file_path ) ) as $css_path ) { - $css = file_get_contents( $css_path ); // phpcs:ignore -- It's a local filesystem path not a remote request. - if ( 'all' !== $media ) { - $css = sprintf( '@media %s { %s }', $media, $css ); - } - $this->print_code .= $css; - } - - // Add inline styles. - $inline_style = $this->print_inline_style( $handle, false ); - if ( $inline_style ) { - $this->print_code .= $inline_style; - } - - return true; - } - - /** - * Get the locale stylesheet if it exists. - * - * @since 0.7 - * @see locale_stylesheet() - * @return bool Whether locale stylesheet was done. - */ - public function do_locale_stylesheet() { - if ( $this->did_locale_stylesheet ) { - return true; - } - - $src = get_locale_stylesheet_uri(); - if ( ! $src ) { - return false; - } - $path = $this->get_validated_css_file_path( $src, get_stylesheet() . '-' . get_locale() ); - if ( is_wp_error( $path ) ) { - return false; - } - $this->print_code .= file_get_contents( $path ); // phpcs:ignore -- The path has been validated, and it is not a remote path. - $this->did_locale_stylesheet = true; - return true; - } - - /** - * Append Customizer Custom CSS. - * - * @since 0.7 - * @see wp_custom_css() - * @see wp_custom_css_cb() - * @return bool Whether locale Custom CSS was done. - */ - public function do_custom_css() { - if ( $this->did_custom_css ) { - return true; - } - - $css = trim( wp_get_custom_css() ); - if ( ! $css ) { - return false; - } - - $this->print_code .= $css; - - $this->did_custom_css = true; - return true; - } -} diff --git a/includes/sanitizers/class-amp-style-sanitizer.php b/includes/sanitizers/class-amp-style-sanitizer.php index 6e926518b1e..f643a1d7f0b 100644 --- a/includes/sanitizers/class-amp-style-sanitizer.php +++ b/includes/sanitizers/class-amp-style-sanitizer.php @@ -8,7 +8,6 @@ /** * Class AMP_Style_Sanitizer * - * @todo This needs to also run on the CSS that is gathered for amp-custom. * Collects inline styles and outputs them in the amp-custom stylesheet. */ class AMP_Style_Sanitizer extends AMP_Base_Sanitizer { @@ -40,6 +39,44 @@ class AMP_Style_Sanitizer extends AMP_Base_Sanitizer { */ private $keyframes_max_size; + /** + * Maximum number of bytes allowed for a AMP Custom style. + * + * @since 0.7 + * @var int + */ + private $custom_max_size; + + /** + * The style[amp-custom] element. + * + * @var DOMElement + */ + private $amp_custom_style_element; + + /** + * Regex for allowed font stylesheet URL. + * + * @var string + */ + private $allowed_font_src_regex; + + /** + * Base URL for styles. + * + * Full URL with trailing slash. + * + * @var string + */ + private $base_url; + + /** + * URL of the content directory. + * + * @var string + */ + private $content_url; + /** * AMP_Base_Sanitizer constructor. * @@ -58,6 +95,29 @@ public function __construct( DOMDocument $dom, array $args = array() ) { break; } } + + $spec_name = 'style amp-custom'; + foreach ( AMP_Allowed_Tags_Generated::get_allowed_tag( 'style' ) as $spec_rule ) { + if ( isset( $spec_rule[ AMP_Rule_Spec::TAG_SPEC ]['spec_name'] ) && $spec_name === $spec_rule[ AMP_Rule_Spec::TAG_SPEC ]['spec_name'] ) { + $this->custom_max_size = $spec_rule[ AMP_Rule_Spec::CDATA ]['max_bytes']; + break; + } + } + + $spec_name = 'link rel=stylesheet for fonts'; // phpcs:ignore WordPress.WP.EnqueuedResources.NonEnqueuedStylesheet + foreach ( AMP_Allowed_Tags_Generated::get_allowed_tag( 'link' ) as $spec_rule ) { + if ( isset( $spec_rule[ AMP_Rule_Spec::TAG_SPEC ]['spec_name'] ) && $spec_name === $spec_rule[ AMP_Rule_Spec::TAG_SPEC ]['spec_name'] ) { + $this->allowed_font_src_regex = '@^(' . $spec_rule[ AMP_Rule_Spec::ATTR_SPEC_LIST ]['href']['value_regex'] . ')$@'; + break; + } + } + + $guessurl = site_url(); + if ( ! $guessurl ) { + $guessurl = wp_guess_url(); + } + $this->base_url = $guessurl; + $this->content_url = WP_CONTENT_URL; } /** @@ -81,7 +141,7 @@ public function get_styles() { * @returns array Values are the CSS stylesheets. Keys are MD5 hashes of the stylesheets. */ public function get_stylesheets() { - return array_merge( parent::get_stylesheets(), $this->stylesheets ); + return array_merge( $this->stylesheets, parent::get_stylesheets() ); } /** @@ -90,50 +150,225 @@ public function get_stylesheets() { * @since 0.4 */ public function sanitize() { - $body = $this->root_element; + $elements = array(); - $this->collect_style_elements(); + /* + * Note that xpath is used to query the DOM so that the link and style elements will be + * in document order. DOMNode::compareDocumentPosition() is not yet implemented. + */ + $xpath = new DOMXPath( $this->dom ); + + $lower_case = 'translate( %s, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz" )'; // In XPath 2.0 this is lower-case(). + $predicates = array( + sprintf( '( self::style and not( @amp-boilerplate ) and ( not( @type ) or %s = "text/css" ) )', sprintf( $lower_case, '@type' ) ), + sprintf( '( self::link and @href and %s = "stylesheet" )', sprintf( $lower_case, '@rel' ) ), + ); + + foreach ( $xpath->query( '//*[ ' . implode( ' or ', $predicates ) . ' ]' ) as $element ) { + $elements[] = $element; + } - $this->collect_styles_recursive( $body ); + /** + * Element. + * + * @var DOMElement $element + */ + foreach ( $elements as $element ) { + $node_name = strtolower( $element->nodeName ); + if ( 'style' === $node_name ) { + $this->process_style_element( $element ); + } elseif ( 'link' === $node_name ) { + $this->process_link_element( $element ); + } + } + + $elements = array(); + foreach ( $xpath->query( '//*[ @style ]' ) as $element ) { + $elements[] = $element; + } + foreach ( $elements as $element ) { + $this->collect_inline_styles( $element ); + } $this->did_convert_elements = true; + + // Now make sure the amp-custom style is in the DOM and populated, if we're working with the document element. + if ( ! empty( $this->args['use_document_element'] ) ) { + if ( ! $this->amp_custom_style_element ) { + $this->amp_custom_style_element = $this->dom->createElement( 'style' ); + $this->amp_custom_style_element->setAttribute( 'amp-custom', '' ); + $this->dom->getElementsByTagName( 'head' )->item( 0 )->appendChild( $this->amp_custom_style_element ); + } + + // Gather stylesheets to print as long as they don't surpass the limit. + $skipped = array(); + $css = ''; + $total_size = 0; + foreach ( $this->get_stylesheets() as $key => $stylesheet ) { + $sheet_size = strlen( $stylesheet ); + if ( $total_size + $sheet_size > $this->custom_max_size ) { + $skipped[] = $key; + } else { + if ( $total_size ) { + $css .= ' '; + } + $css .= $stylesheet; + $total_size += $sheet_size; + } + } + + /* + * Let the style[amp-custom] be populated with the concatenated CSS. + * !important: Updating the contents of this style element by setting textContent is not + * reliable across PHP/libxml versions, so this is why the children are removed and the + * text node is then explicitly added containing the CSS. + */ + while ( $this->amp_custom_style_element->firstChild ) { + $this->amp_custom_style_element->removeChild( $this->amp_custom_style_element->firstChild ); + } + $this->amp_custom_style_element->appendChild( $this->dom->createTextNode( $css ) ); + + // @todo This would be a candidate for sanitization reporting. + // Add comments to indicate which sheets were not included. + foreach ( array_reverse( $skipped ) as $skip ) { + $this->amp_custom_style_element->parentNode->insertBefore( + $this->dom->createComment( sprintf( 'Skipped including %s stylesheet since too large.', $skip ) ), + $this->amp_custom_style_element->nextSibling + ); + } + } } /** - * Collect and sanitize all style elements. + * Generates an enqueued style's fully-qualified file path. + * + * @since 0.7 + * @see WP_Styles::_css_href() + * + * @param string $src The source URL of the enqueued style. + * @return string|WP_Error Style's absolute validated filesystem path, or WP_Error when error. */ - public function collect_style_elements() { - $style_elements = $this->dom->getElementsByTagName( 'style' ); - $nodes_to_remove = array(); - - foreach ( $style_elements as $style_element ) { - /** - * Style element. - * - * @var DOMElement $style_element - */ + public function get_validated_css_file_path( $src ) { + $needs_base_url = ( + ! is_bool( $src ) + && + ! preg_match( '|^(https?:)?//|', $src ) + && + ! ( $this->content_url && 0 === strpos( $src, $this->content_url ) ) + ); + if ( $needs_base_url ) { + $src = $this->base_url . $src; + } - if ( 'body' === $style_element->parentNode->nodeName && $style_element->hasAttribute( 'amp-keyframes' ) ) { - $validity = $this->validate_amp_keyframe( $style_element ); - if ( true === $validity ) { - continue; - } + // Strip query and fragment from URL. + $src = preg_replace( ':[\?#].*$:', '', $src ); + + if ( ! preg_match( '/\.(css|less|scss|sass)$/i', $src ) ) { + /* translators: %s is stylesheet URL */ + return new WP_Error( 'amp_css_bad_file_extension', sprintf( __( 'Skipped stylesheet which does not have recognized CSS file extension (%s).', 'amp' ), $src ) ); + } + + $includes_url = includes_url( '/' ); + $content_url = content_url( '/' ); + $admin_url = get_admin_url( null, '/' ); + $css_path = null; + if ( 0 === strpos( $src, $content_url ) ) { + $css_path = WP_CONTENT_DIR . substr( $src, strlen( $content_url ) - 1 ); + } elseif ( 0 === strpos( $src, $includes_url ) ) { + $css_path = ABSPATH . WPINC . substr( $src, strlen( $includes_url ) - 1 ); + } elseif ( 0 === strpos( $src, $admin_url ) ) { + $css_path = ABSPATH . 'wp-admin' . substr( $src, strlen( $admin_url ) - 1 ); + } + + if ( ! $css_path || false !== strpos( '../', $css_path ) || 0 !== validate_file( $css_path ) || ! file_exists( $css_path ) ) { + /* translators: %s is stylesheet URL */ + return new WP_Error( 'amp_css_path_not_found', sprintf( __( 'Unable to locate filesystem path for stylesheet %s.', 'amp' ), $src ) ); + } + + return $css_path; + } + + /** + * Process style element. + * + * @param DOMElement $element Style element. + */ + private function process_style_element( DOMElement $element ) { + if ( 'body' === $element->parentNode->nodeName && $element->hasAttribute( 'amp-keyframes' ) ) { + $validity = $this->validate_amp_keyframe( $element ); + if ( true !== $validity ) { + $element->parentNode->removeChild( $element ); // @todo Add reporting. } + return; + } - $nodes_to_remove[] = $style_element; + $rules = trim( $element->textContent ); + $rules = $this->remove_illegal_css( $rules ); - // @todo This should perhaps be done in document order to ensure proper cascade. - $rules = trim( $style_element->textContent ); + $this->stylesheets[ md5( $rules ) ] = $rules; - // @todo This needs proper CSS parser, and de-duplication with \AMP_Style_Sanitizer::filter_style(). - $rules = preg_replace( '/\s*!important\s*(?=\s*;|})/', '', $rules ); - $rules = preg_replace( '/overflow\s*:\s*(auto|scroll)\s*;?\s*/', '', $rules ); + if ( $element->hasAttribute( 'amp-custom' ) ) { + if ( ! $this->amp_custom_style_element ) { + $this->amp_custom_style_element = $element; + } else { + $element->parentNode->removeChild( $element ); // There can only be one. #highlander. + } + } else { - $this->stylesheets[ md5( $rules ) ] = $rules; + // Remove from DOM since we'll be adding it to amp-custom. + $element->parentNode->removeChild( $element ); } + } - foreach ( $nodes_to_remove as $node_to_remove ) { - $node_to_remove->parentNode->removeChild( $node_to_remove ); + /** + * Process link element. + * + * @param DOMElement $element Link element. + */ + private function process_link_element( DOMElement $element ) { + $href = $element->getAttribute( 'href' ); + + // Allow font URLs. + if ( $this->allowed_font_src_regex && preg_match( $this->allowed_font_src_regex, $href ) ) { + return; } + + $css_file_path = $this->get_validated_css_file_path( $href ); + if ( is_wp_error( $css_file_path ) ) { + $element->parentNode->removeChild( $element ); // @todo Report removal. Show HTML comment? + return; + } + + // Load the CSS from the filesystem. + $css = "\n/* $href */\n"; + $css .= file_get_contents( $css_file_path ); // phpcs:ignore -- It's a local filesystem path not a remote request. + + $css = $this->remove_illegal_css( $css ); + + $media = $element->getAttribute( 'media' ); + if ( $media && 'all' !== $media ) { + $css = sprintf( '@media %s { %s }', $media, $css ); + } + + $this->stylesheets[ $href ] = $css; + + // Remove now that styles have been processed. + $element->parentNode->removeChild( $element ); + } + + /** + * Remove illegal CSS from the stylesheet. + * + * @since 0.7 + * + * @todo This needs proper CSS parser and to take an alternative approach to removing !important by extracting + * the rule into a separate style rule with a very specific selector. + * @param string $stylesheet Stylesheet. + * @return string Scrubbed stylesheet. + */ + private function remove_illegal_css( $stylesheet ) { + $stylesheet = preg_replace( '/\s*!important/', '', $stylesheet ); // Note this has to also replace inside comments to be valid. + $stylesheet = preg_replace( '/overflow\s*:\s*(auto|scroll)\s*;?\s*/', '', $stylesheet ); + return $stylesheet; } /** @@ -171,39 +406,28 @@ private function validate_amp_keyframe( $style ) { * @see Retrieve array of styles using $this->get_styles() after calling this method. * * @since 0.4 + * @since 0.7 Modified to use element passed by XPath query. * * @note Uses recursion to traverse down the tree of DOMDocument nodes. - * @todo This could use XPath to more efficiently find all elements with style attributes. * - * @param DOMNode $node Node. + * @param DOMElement $element Node. */ - private function collect_styles_recursive( $node ) { - if ( XML_ELEMENT_NODE !== $node->nodeType ) { + private function collect_inline_styles( $element ) { + $style = $element->getAttribute( 'style' ); + if ( ! $style ) { return; } + $class = $element->getAttribute( 'class' ); - if ( $node->hasAttributes() && $node instanceof DOMElement ) { - $style = $node->getAttribute( 'style' ); - $class = $node->getAttribute( 'class' ); - - if ( $style ) { - $style = $this->process_style( $style ); - if ( ! empty( $style ) ) { - $class_name = $this->generate_class_name( $style ); - $new_class = trim( $class . ' ' . $class_name ); - - $node->setAttribute( 'class', $new_class ); - $this->styles[ '.' . $class_name ] = $style; - } - $node->removeAttribute( 'style' ); - } - } + $style = $this->process_style( $style ); + if ( ! empty( $style ) ) { + $class_name = $this->generate_class_name( $style ); + $new_class = trim( $class . ' ' . $class_name ); - $length = $node->childNodes->length; - for ( $i = $length - 1; $i >= 0; $i -- ) { - $child_node = $node->childNodes->item( $i ); - $this->collect_styles_recursive( $child_node ); + $element->setAttribute( 'class', $new_class ); + $this->styles[ '.' . $class_name ] = $style; } + $element->removeAttribute( 'style' ); } /** diff --git a/includes/utils/class-amp-dom-utils.php b/includes/utils/class-amp-dom-utils.php index d60aa6da72a..5288c044f2f 100644 --- a/includes/utils/class-amp-dom-utils.php +++ b/includes/utils/class-amp-dom-utils.php @@ -42,13 +42,21 @@ class AMP_DOM_Utils { 'wbr', ); + /** + * Stored noscript/comment replacements for libxml<2.8. + * + * @since 0.7 + * @var array + */ + public static $noscript_placeholder_comments = array(); + /** * Return a valid DOMDocument representing HTML document passed as a parameter. * * @since 0.7 + * @see AMP_DOM_Utils::get_content_from_dom_node() * * @param string $document Valid HTML document to be represented by a DOMDocument. - * * @return DOMDocument|false Returns DOMDocument, or false if conversion failed. */ public static function get_dom( $document ) { @@ -82,6 +90,24 @@ public static function get_dom( $document ) { $document ); + /* + * Replace noscript elements with placeholders since libxml<2.8 can parse them incorrectly. + * When appearing in the head element, a noscript can cause the head to close prematurely + * and the noscript gets moved to the body and anything after it which was in the head. + * See . + */ + if ( version_compare( LIBXML_DOTTED_VERSION, '2.8', '<' ) ) { + $document = preg_replace_callback( + '#]*>.*?#si', + function( $matches ) { + $placeholder = sprintf( '', (string) wp_rand() ); + AMP_DOM_Utils::$noscript_placeholder_comments[ $placeholder ] = $matches[0]; + return $placeholder; + }, + $document + ); + } + /* * Wrap in dummy tags, since XML needs one parent node. * It also makes it easier to loop through nodes. @@ -266,15 +292,14 @@ public static function get_dom_from_content( $content ) { } /** - * Return valid HTML content extracted from the DOMDocument passed as a parameter. - * - * @see Reciprocal function get_dom_from_content() + * Return valid HTML *body* content extracted from the DOMDocument passed as a parameter. * * @since 0.2 + * @see AMP_DOM_Utils::get_content_from_dom_node() Reciprocal function. * * @param DOMDocument $dom Represents an HTML document from which to extract HTML content. * - * @return string Returns the HTML content represented in the DOMDocument + * @return string Returns the HTML content of the body element represented in the DOMDocument. */ public static function get_content_from_dom( $dom ) { @@ -305,9 +330,9 @@ public static function get_content_from_dom( $dom ) { /** * Return valid HTML content extracted from the DOMNode passed as a parameter. * - * @see Called by function get_content_from_dom() - * * @since 0.6 + * @see AMP_DOM_Utils::get_dom() Where the operations in this method are mirrored. + * @see AMP_DOM_Utils::get_content_from_dom() Reciprocal function. * @todo In the future consider an AMP_DOMDocument subclass that does this automatically at saveHTML(). See . * * @param DOMDocument $dom Represents an HTML document. @@ -338,6 +363,15 @@ public static function get_content_from_dom_node( $dom, $node ) { return ''; } + // Restore noscript elements which were temporarily removed to prevent libxml<2.8 parsing problems. + if ( version_compare( LIBXML_DOTTED_VERSION, '2.8', '<' ) ) { + $html = str_replace( + array_keys( self::$noscript_placeholder_comments ), + array_values( self::$noscript_placeholder_comments ), + $html + ); + } + $html = self::restore_amp_bind_attributes( $html ); // Restore amp-mustache placeholders which were replaced to prevent URL-encoded corruption by saveHTML. diff --git a/tests/test-amp-helper-functions.php b/tests/test-amp-helper-functions.php index f7cde3d0e7a..d6846c33708 100644 --- a/tests/test-amp-helper-functions.php +++ b/tests/test-amp-helper-functions.php @@ -196,6 +196,16 @@ public function test_amp_get_content_sanitizers() { $this->assertEquals( 'amp_content_sanitizers', $this->last_filter_call['current_filter'] ); $this->assertEquals( $handlers, $this->last_filter_call['args'][0] ); $this->assertEquals( $post, $this->last_filter_call['args'][1] ); + + // Make sure the style and whitelist sanitizers are always at the end, even after filtering. + add_filter( 'amp_content_sanitizers', function( $classes ) { + $classes['Even_After_Whitelist_Sanitizer'] = array(); + return $classes; + } ); + $orderd_sanitizers = array_keys( amp_get_content_sanitizers() ); + $this->assertEquals( 'Even_After_Whitelist_Sanitizer', $orderd_sanitizers[ count( $orderd_sanitizers ) - 3 ] ); + $this->assertEquals( 'AMP_Style_Sanitizer', $orderd_sanitizers[ count( $orderd_sanitizers ) - 2 ] ); + $this->assertEquals( 'AMP_Tag_And_Attribute_Sanitizer', $orderd_sanitizers[ count( $orderd_sanitizers ) - 1 ] ); } /** diff --git a/tests/test-amp-style-sanitizer.php b/tests/test-amp-style-sanitizer.php index 98d8e868f53..9280a2117b4 100644 --- a/tests/test-amp-style-sanitizer.php +++ b/tests/test-amp-style-sanitizer.php @@ -5,6 +5,8 @@ * @package AMP */ +// phpcs:disable WordPress.Arrays.MultipleStatementAlignment.DoubleArrowNotAligned + /** * Test AMP_Style_Sanitizer. */ @@ -15,7 +17,7 @@ class AMP_Style_Sanitizer_Test extends WP_UnitTestCase { * * @return array */ - public function get_data() { + public function get_body_style_attribute_data() { return array( 'empty' => array( '', @@ -116,10 +118,41 @@ public function get_data() { 'div > span { font-weight:bold; font-style: italic; }', ), ), + ); + } + + /** + * Test sanitizer for style attributes that appear in the body. + * + * @dataProvider get_body_style_attribute_data + * @param string $source Source. + * @param string $expected_content Expected content. + * @param string $expected_stylesheets Expected stylesheets. + */ + public function test_body_style_attribute_sanitizer( $source, $expected_content, $expected_stylesheets ) { + $dom = AMP_DOM_Utils::get_dom_from_content( $source ); - 'styles_in_head_and_body_both_handled' => array( - '', - '', + $sanitizer = new AMP_Style_Sanitizer( $dom ); + $sanitizer->sanitize(); + + // Test content. + $content = AMP_DOM_Utils::get_content_from_dom( $dom ); + $content = preg_replace( '/(?<=>)\s+(?=<)/', '', $content ); + $this->assertEquals( $expected_content, $content ); + + // Test stylesheet. + $this->assertEquals( $expected_stylesheets, array_values( $sanitizer->get_stylesheets() ) ); + } + + /** + * Get link and style test data. + * + * @return array + */ + public function get_link_and_style_test_data() { + return array( + 'multiple_amp_custom_and_other_styles' => array( + '', array( 'b {color:red}', 'i {color:blue}', @@ -127,24 +160,29 @@ public function get_data() { 's {color:yellow}', ), ), + 'style_eleemnts_with_link_elements' => array( + sprintf( + '', // phpcs:ignore WordPress.WP.EnqueuedResources.NonEnqueuedStylesheet + includes_url( 'css/dashicons.css' ) + ), + array( + 'strong.before-dashicon', + '.dashicons-dashboard:before', + 'strong.after-dashicon', + 's {color:yellow}', + ), + ), ); } /** - * Test sanitizer. + * Test style elements and link elements. * - * @dataProvider get_data + * @dataProvider get_link_and_style_test_data * @param string $source Source. - * @param string $expected_content Expected content. - * @param string $expected_stylesheets Expected stylesheets. + * @param array $expected_stylesheets Expected stylesheets. */ - public function test_sanitizer( $source, $expected_content, $expected_stylesheets ) { - $html_doc_format = '%s'; - if ( false === strpos( $source, 'sanitize(); - $whitelist_sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom ); + $whitelist_sanitizer = new AMP_Tag_And_Attribute_Sanitizer( $dom, array( + 'use_document_element' => true, + ) ); $whitelist_sanitizer->sanitize(); - // Test content. - $content = AMP_DOM_Utils::get_content_from_dom_node( $dom, $dom->documentElement ); - $content = preg_replace( '/(?<=>)\s+(?=<)/', '', $content ); - $this->assertEquals( $expected_content, $content ); - - // Test stylesheet. - $this->assertEquals( $expected_stylesheets, array_values( $sanitizer->get_stylesheets() ) ); + $sanitized_html = AMP_DOM_Utils::get_content_from_dom_node( $dom, $dom->documentElement ); + $actual_stylesheets = array_values( $sanitizer->get_stylesheets() ); + $this->assertCount( count( $expected_stylesheets ), $actual_stylesheets ); + foreach ( $expected_stylesheets as $i => $expected_stylesheet ) { + $this->assertContains( $expected_stylesheet, $actual_stylesheets[ $i ] ); + $this->assertContains( $expected_stylesheet, $sanitized_html ); + } } /** @@ -212,4 +252,124 @@ public function test_keyframe_sanitizer( $source, $expected = null ) { $content = preg_replace( '/(?<=>)\s+(?=<)/', '', $content ); $this->assertEquals( $expected, $content ); } + + /** + * Get stylesheet URLs. + * + * @returns array Stylesheet URL data. + */ + public function get_stylesheet_urls() { + return array( + 'theme_stylesheet_without_host' => array( + '/wp-content/themes/twentyseventeen/style.css', + WP_CONTENT_DIR . '/themes/twentyseventeen/style.css', + ), + 'theme_stylesheet_with_host' => array( + WP_CONTENT_URL . '/themes/twentyseventeen/style.css', + WP_CONTENT_DIR . '/themes/twentyseventeen/style.css', + ), + 'dashicons_without_host' => array( + '/wp-includes/css/dashicons.css', + ABSPATH . WPINC . '/css/dashicons.css', + ), + 'dashicons_with_host' => array( + includes_url( 'css/dashicons.css' ), + ABSPATH . WPINC . '/css/dashicons.css', + ), + 'admin_without_host' => array( + '/wp-admin/css/common.css', + ABSPATH . 'wp-admin/css/common.css', + ), + 'admin_with_host' => array( + admin_url( 'css/common.css' ), + ABSPATH . 'wp-admin/css/common.css', + ), + 'amp_css_bad_file_extension' => array( + content_url( 'themes/twentyseventeen/index.php' ), + null, + 'amp_css_bad_file_extension', + ), + 'amp_css_path_not_found' => array( + content_url( 'themes/twentyseventeen/404.css' ), + null, + 'amp_css_path_not_found', + ), + ); + } + + /** + * Tests get_validated_css_file_path. + * + * @dataProvider get_stylesheet_urls + * @covers AMP_Style_Sanitizer::get_validated_css_file_path() + * @param string $source Source URL. + * @param string|null $expected Expected path or null if error. + * @param string $error_code Error code. Optional. + */ + public function test_get_validated_css_file_path( $source, $expected, $error_code = null ) { + $dom = AMP_DOM_Utils::get_dom( '' ); + + $sanitizer = new AMP_Style_Sanitizer( $dom ); + $actual = $sanitizer->get_validated_css_file_path( $source ); + if ( isset( $error_code ) ) { + $this->assertInstanceOf( 'WP_Error', $actual ); + $this->assertEquals( $error_code, $actual->get_error_code() ); + } else { + $this->assertEquals( $expected, $actual ); + } + } + + /** + * Get font url test data. + * + * @return array Data. + */ + public function get_font_urls() { + return array( + 'tangerine' => array( + 'https://fonts.googleapis.com/css?family=Tangerine', + true, + ), + 'typekit' => array( + 'https://use.typekit.net/abc.css', + true, + ), + 'fontscom' => array( + 'https://fast.fonts.net/abc.css', + true, + ), + 'fontawesome' => array( + 'https://maxcdn.bootstrapcdn.com/font-awesome/123/css/font-awesome.min.css', + true, + ), + 'fontbad' => array( + 'https://bad.example.com/font.css', + false, + ), + ); + } + + /** + * Tests that font URLs get validated. + * + * @dataProvider get_font_urls + * @param string $url Font URL. + * @param bool $pass Whether the font URL is ok. + */ + public function test_font_urls( $url, $pass ) { + $dom = AMP_DOM_Utils::get_dom( sprintf( '', $url ) ); // phpcs:ignore WordPress.WP.EnqueuedResources.NonEnqueuedStylesheet + + $sanitizer = new AMP_Style_Sanitizer( $dom, array( + 'use_document_element' => true, + ) ); + $sanitizer->sanitize(); + + $link = $dom->getElementsByTagName( 'link' )->item( 0 ); + if ( $pass ) { + $this->assertInstanceOf( 'DOMElement', $link ); + $this->assertEquals( $url, $link->getAttribute( 'href' ) ); + } else { + $this->assertEmpty( $link ); + } + } } diff --git a/tests/test-class-amp-theme-support.php b/tests/test-class-amp-theme-support.php index 84dc66300be..3429ab836cb 100644 --- a/tests/test-class-amp-theme-support.php +++ b/tests/test-class-amp-theme-support.php @@ -113,6 +113,8 @@ public function test_prepare_response() { data-aax_pubname="test123" data-aax_src="302"> + + assertContains( '', $sanitized_html ); $this->assertContains( '', $sanitized_html ); $this->assertContains( '