Skip to content

Commit

Permalink
Update handling of blacklisted_cdata_regex
Browse files Browse the repository at this point in the history
* Add support for blacklisted_cdata_regex being plural.
* Remove obsolete INVALID_CDATA_CSS_IMPORTANT error code.
* Add new INVALID_CDATA_CSS_I_AMPHTML_NAME error code.

Fixes #4319
  • Loading branch information
westonruter committed Apr 8, 2020
1 parent fac2a76 commit 2c9c68e
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 68 deletions.
15 changes: 9 additions & 6 deletions bin/amphtml-update.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,10 +448,12 @@ def GetTagSpec(tag_spec, attr_lists):
if isinstance(field_value, (unicode, str, bool, int)):
cdata_dict[ field_descriptor.name ] = field_value
elif isinstance( field_value, google.protobuf.pyext._message.RepeatedCompositeContainer ):
cdata_dict[ field_descriptor.name ] = {}
cdata_dict[ field_descriptor.name ] = []
for value in field_value:
entry = {}
for (key,val) in value.ListFields():
cdata_dict[ field_descriptor.name ][ key.name ] = val
entry[ key.name ] = val
cdata_dict[ field_descriptor.name ].append( entry )
elif hasattr( field_value, '_values' ):
cdata_dict[ field_descriptor.name ] = {}
for _value in field_value._values:
Expand Down Expand Up @@ -485,10 +487,11 @@ def GetTagSpec(tag_spec, attr_lists):
cdata_dict['css_spec'] = css_spec
if len( cdata_dict ) > 0:
if 'blacklisted_cdata_regex' in cdata_dict:
if 'error_message' not in cdata_dict['blacklisted_cdata_regex']:
raise Exception( 'Missing error_message for blacklisted_cdata_regex.' );
if cdata_dict['blacklisted_cdata_regex']['error_message'] not in ( 'CSS !important', 'contents', 'html comments', 'CSS i-amphtml- name prefix' ):
raise Exception( 'Unexpected error_message "%s" for blacklisted_cdata_regex.' % cdata_dict['blacklisted_cdata_regex']['error_message'] );
for entry in cdata_dict['blacklisted_cdata_regex']:
if 'error_message' not in entry:
raise Exception( 'Missing error_message for blacklisted_cdata_regex.' );
if entry['error_message'] not in ( 'contents', 'html comments', 'CSS i-amphtml- name prefix' ):
raise Exception( 'Unexpected error_message "%s" for blacklisted_cdata_regex.' % entry['error_message'] );
tag_spec_dict['cdata'] = cdata_dict

if 'spec_name' not in tag_spec_dict['tag_spec']:
Expand Down
136 changes: 92 additions & 44 deletions includes/sanitizers/class-amp-allowed-tags-generated.php
Original file line number Diff line number Diff line change
Expand Up @@ -11960,8 +11960,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'contents',
'regex' => '.',
array(
'error_message' => 'contents',
'regex' => '.',
),
),
),
'tag_spec' => array(
Expand Down Expand Up @@ -12001,8 +12003,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'contents',
'regex' => '.',
array(
'error_message' => 'contents',
'regex' => '.',
),
),
),
'tag_spec' => array(
Expand All @@ -12026,8 +12030,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
),
'tag_spec' => array(
Expand All @@ -12053,8 +12059,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
),
'tag_spec' => array(
Expand All @@ -12075,8 +12083,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
),
'tag_spec' => array(
Expand Down Expand Up @@ -12293,8 +12303,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
),
'tag_spec' => array(
Expand Down Expand Up @@ -12501,8 +12513,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
),
'tag_spec' => array(
Expand Down Expand Up @@ -12587,8 +12601,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
),
'tag_spec' => array(
Expand Down Expand Up @@ -12762,8 +12778,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
),
'tag_spec' => array(
Expand Down Expand Up @@ -12877,8 +12895,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
'max_bytes' => 100000,
'max_bytes_spec_url' => 'https://amp.dev/documentation/components/amp-bind#state',
Expand Down Expand Up @@ -13146,8 +13166,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
),
'tag_spec' => array(
Expand Down Expand Up @@ -13413,8 +13435,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
'max_bytes' => 15000,
'max_bytes_spec_url' => 'https://amp.dev/documentation/components/amp-experiment#configuration',
Expand Down Expand Up @@ -13738,8 +13762,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
),
'tag_spec' => array(
Expand Down Expand Up @@ -14334,8 +14360,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
),
'tag_spec' => array(
Expand Down Expand Up @@ -14611,8 +14639,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
),
'tag_spec' => array(
Expand Down Expand Up @@ -15183,8 +15213,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
'max_bytes' => 10000,
'max_bytes_spec_url' => 'https://amp.dev/documentation/components/amp-script#faq',
Expand Down Expand Up @@ -15481,8 +15513,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
),
'tag_spec' => array(
Expand Down Expand Up @@ -15557,8 +15591,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
),
'tag_spec' => array(
Expand All @@ -15584,8 +15620,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
'max_bytes' => 15000,
'max_bytes_spec_url' => 'https://amp.dev/documentation/components/amp-experiment#configuration',
Expand Down Expand Up @@ -15645,8 +15683,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
),
'tag_spec' => array(
Expand Down Expand Up @@ -15734,8 +15774,10 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'html comments',
'regex' => '<!--',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
),
),
'tag_spec' => array(
Expand Down Expand Up @@ -16626,8 +16668,14 @@ class AMP_Allowed_Tags_Generated {
),
'cdata' => array(
'blacklisted_cdata_regex' => array(
'error_message' => 'CSS i-amphtml- name prefix',
'regex' => '(^|\\W)i-amphtml-',
array(
'error_message' => 'html comments',
'regex' => '<!--',
),
array(
'error_message' => 'CSS i-amphtml- name prefix',
'regex' => '(^|\\W)i-amphtml-',
),
),
'css_spec' => array(
'allowed_at_rules' => array(
Expand Down
30 changes: 16 additions & 14 deletions includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class AMP_Tag_And_Attribute_Sanitizer extends AMP_Base_Sanitizer {
const DUPLICATE_UNIQUE_TAG = 'DUPLICATE_UNIQUE_TAG';
const MANDATORY_CDATA_MISSING_OR_INCORRECT = 'MANDATORY_CDATA_MISSING_OR_INCORRECT';
const CDATA_TOO_LONG = 'CDATA_TOO_LONG';
const INVALID_CDATA_CSS_IMPORTANT = 'INVALID_CDATA_CSS_IMPORTANT';
const INVALID_CDATA_CSS_I_AMPHTML_NAME = 'INVALID_CDATA_CSS_I_AMPHTML_NAME';
const INVALID_CDATA_CONTENTS = 'INVALID_CDATA_CONTENTS';
const INVALID_CDATA_HTML_COMMENTS = 'INVALID_CDATA_HTML_COMMENTS';
const JSON_ERROR_CTRL_CHAR = 'JSON_ERROR_CTRL_CHAR';
Expand Down Expand Up @@ -919,21 +919,23 @@ private function validate_cdata_for_node( DOMElement $element, $cdata_spec ) {
];
}
if ( isset( $cdata_spec['blacklisted_cdata_regex'] ) ) {
if ( preg_match( '@' . $cdata_spec['blacklisted_cdata_regex']['regex'] . '@u', $element->textContent ) ) {
if ( isset( $cdata_spec['blacklisted_cdata_regex']['error_message'] ) ) {
// There are only a few error messages, so map them to error codes.
switch ( $cdata_spec['blacklisted_cdata_regex']['error_message'] ) {
case 'CSS !important':
return [ 'code' => self::INVALID_CDATA_CSS_IMPORTANT ];
case 'contents':
return [ 'code' => self::INVALID_CDATA_CONTENTS ];
case 'html comments':
return [ 'code' => self::INVALID_CDATA_HTML_COMMENTS ];
foreach ( $cdata_spec['blacklisted_cdata_regex'] as $blacklisted_cdata_regex ) {
if ( preg_match( '@' . $blacklisted_cdata_regex['regex'] . '@u', $element->textContent ) ) {
if ( isset( $blacklisted_cdata_regex['error_message'] ) ) {
// There are only a few error messages, so map them to error codes.
switch ( $blacklisted_cdata_regex['error_message'] ) {
case 'CSS i-amphtml- name prefix':
return [ 'code' => self::INVALID_CDATA_CSS_I_AMPHTML_NAME ]; // @todo This really should be done as part of the CSS sanitizer.
case 'contents':
return [ 'code' => self::INVALID_CDATA_CONTENTS ];
case 'html comments':
return [ 'code' => self::INVALID_CDATA_HTML_COMMENTS ];
}
}
}

// Note: This fallback case is not currently reachable because all error messages are accounted for in the switch statement.
return [ 'code' => self::CDATA_VIOLATES_BLACKLIST ];
// Note: This fallback case is not currently reachable because all error messages are accounted for in the switch statement.
return [ 'code' => self::CDATA_VIOLATES_BLACKLIST ];
}
}
} elseif ( isset( $cdata_spec['cdata_regex'] ) ) {
$delimiter = false === strpos( $cdata_spec['cdata_regex'], '@' ) ? '@' : '#';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3042,7 +3042,7 @@ public static function get_error_title_from_code( $validation_error ) {
case AMP_Tag_And_Attribute_Sanitizer::CDATA_TOO_LONG:
case AMP_Tag_And_Attribute_Sanitizer::MANDATORY_CDATA_MISSING_OR_INCORRECT:
case AMP_Tag_And_Attribute_Sanitizer::INVALID_CDATA_HTML_COMMENTS:
case AMP_Tag_And_Attribute_Sanitizer::INVALID_CDATA_CSS_IMPORTANT:
case AMP_Tag_And_Attribute_Sanitizer::INVALID_CDATA_CSS_I_AMPHTML_NAME:
case AMP_Tag_And_Attribute_Sanitizer::INVALID_CDATA_CONTENTS:
case AMP_Tag_And_Attribute_Sanitizer::CDATA_VIOLATES_BLACKLIST:
return esc_html__( 'Illegal text content', 'amp' );
Expand Down
12 changes: 9 additions & 3 deletions tests/php/test-tag-and-attribute-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -2907,11 +2907,17 @@ public function get_html_data() {
],
],
],
'cdata_css_important' => [
'<html amp><head><meta charset="utf-8"><style amp-custom>body { outline: solid 1px red !important; }</style></head><body></body></html>',
'cdata_css_i_amphtml_name' => [
'<html amp><head><meta charset="utf-8"><style amp-custom>i-amphtml-scroll-container { outline: solid 1px red; }</style></head><body></body></html>',
'<html amp><head><meta charset="utf-8"></head><body></body></html>',
[],
[ AMP_Tag_And_Attribute_Sanitizer::INVALID_CDATA_CSS_IMPORTANT ],
[ AMP_Tag_And_Attribute_Sanitizer::INVALID_CDATA_CSS_I_AMPHTML_NAME ],
],
'cdata_css_html_comment' => [
'<html amp><head><meta charset="utf-8"><style amp-custom>/* <!-- not good --> */</style></head><body></body></html>',
'<html amp><head><meta charset="utf-8"></head><body></body></html>',
[],
[ AMP_Tag_And_Attribute_Sanitizer::INVALID_CDATA_HTML_COMMENTS ],
],
'cdata_contents_bad_comment' => [
'<html><head><meta charset="utf-8"><script type="application/ld+json"><!--{"@context":"http:\/\/schema.org"}--></script></head><body></body></html>',
Expand Down

0 comments on commit 2c9c68e

Please sign in to comment.