Skip to content

Commit

Permalink
Merge pull request #7078 from ampproject/update/amphtml-spec-22032814…
Browse files Browse the repository at this point in the history
…22000

Update amphtml spec to 2204221712000
  • Loading branch information
westonruter authored Apr 26, 2022
2 parents b13356a + 024eaf0 commit f751041
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 82 deletions.
113 changes: 71 additions & 42 deletions bin/amphtml-update.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import collections
import json
import google
import imp
from collections.abc import Sequence

seen_spec_names = set()

Expand All @@ -38,49 +38,57 @@ def Die(msg):

def SetupOutDir(out_dir):
"""Sets up a clean output directory.
Args:
out_dir: directory name of the output directory.
out_dir: directory name of the output directory. Must not have slashes,
dots, etc.
"""
logging.info('entering ...')
assert re.match(r'^[a-zA-Z_\-0-9]+$', out_dir), 'bad out_dir: %s' % out_dir

if os.path.exists(out_dir):
subprocess.check_call(['rm', '-rf', out_dir])
os.mkdir(out_dir)
logging.info('... done')


def GenValidatorPb2Py(validator_directory, out_dir):
"""Calls the proto compiler to generate validator_pb2.py.
Args:
validator_directory: directory name of the validator.
out_dir: directory name of the output directory.
out_dir: directory name of the output directory. Must not have slashes,
dots, etc.
"""
logging.info('entering ...')
assert re.match(r'^[a-zA-Z_\-0-9]+$', out_dir), 'bad out_dir: %s' % out_dir

os.chdir( validator_directory )
subprocess.check_call(['protoc', 'validator.proto', '--python_out=%s' % out_dir])
os.chdir( out_dir )
open('__init__.py', 'w').close()
subprocess.check_call(
['protoc', os.path.join(validator_directory, 'validator.proto'), '--proto_path=%s' % validator_directory, '--python_out=%s' % out_dir])
open('%s/__init__.py' % out_dir, 'w').close()
logging.info('... done')

def GenValidatorProtoascii(validator_directory, out_dir):
"""Assembles the validator protoascii file from the main and extensions.
Args:
validator_directory: directory for where the validator is located, inside the amphtml repo.
out_dir: directory name of the output directory.
out_dir: directory name of the output directory. Must not have slashes,
dots, etc.
"""

protoascii_segments = [
open(os.path.join(validator_directory, 'validator-main.protoascii')).read(),
open(os.path.join(validator_directory, 'validator-svg.protoascii')).read(),
open(os.path.join(validator_directory, 'validator-css.protoascii')).read()
]
extensions = glob.glob(os.path.join(validator_directory, '../extensions/*/validator-*.protoascii'))
logging.info('entering ...')
assert re.match(r'^[a-zA-Z_\-0-9]+$', out_dir), 'bad out_dir: %s' % out_dir

protoascii_segments = [open(os.path.join(validator_directory, 'validator-main.protoascii')).read()]
protoascii_segments.append(open(os.path.join(validator_directory, 'validator-css.protoascii')).read())
protoascii_segments.append(open(os.path.join(validator_directory, 'validator-svg.protoascii')).read())
extensions = glob.glob(os.path.join(validator_directory, 'extensions/*/validator-*.protoascii'))
# In the Github project, the extensions are located in a sibling directory
# to the validator rather than a child directory.
if not extensions:
extensions = glob.glob(os.path.join(validator_directory, '../extensions/*/validator-*.protoascii'))
extensions.sort()
for extension in extensions:
protoascii_segments.append(open(extension).read())
f = open('%s/validator.protoascii' % out_dir, 'w')
f.write(''.join(protoascii_segments))
f.close()
logging.info('... done')


def GeneratePHP(repo_directory, out_dir):
Expand All @@ -94,8 +102,8 @@ def GeneratePHP(repo_directory, out_dir):
allowed_tags, attr_lists, descendant_lists, reference_points, versions = ParseRules(repo_directory, out_dir)

expected_spec_names = (
'style amp-custom',
'style[amp-keyframes]',
b'style amp-custom',
b'style[amp-keyframes]',
)
for expected_spec_name in expected_spec_names:
if expected_spec_name not in seen_spec_names:
Expand All @@ -120,7 +128,7 @@ def GeneratePHP(repo_directory, out_dir):
output = re.sub("'False'", "false", output)

# Write the php file to STDOUT.
print output
print( output )

def GenerateHeaderPHP(out):
# Output the file's header
Expand Down Expand Up @@ -163,14 +171,14 @@ def GenerateAllowedTagsPHP(out, allowed_tags):
def GenerateLayoutAttributesPHP(out, attr_lists):
# Output the attribute list allowed for layouts.
out.append('')
out.append('\tprivate static $layout_allowed_attrs = %s;' % Phpize( attr_lists['$AMP_LAYOUT_ATTRS'], 1 ).lstrip() )
out.append('\tprivate static $layout_allowed_attrs = %s;' % Phpize( attr_lists[b'$AMP_LAYOUT_ATTRS'], 1 ).lstrip() )
out.append('')


def GenerateGlobalAttributesPHP(out, attr_lists):
# Output the globally allowed attribute list.
out.append('')
out.append('\tprivate static $globally_allowed_attrs = %s;' % Phpize( attr_lists['$GLOBAL_ATTRS'], 1 ).lstrip() )
out.append('\tprivate static $globally_allowed_attrs = %s;' % Phpize( attr_lists[b'$GLOBAL_ATTRS'], 1 ).lstrip() )
out.append('')

def GenerateReferencePointsPHP(out, reference_points):
Expand Down Expand Up @@ -301,9 +309,14 @@ def ParseRules(repo_directory, out_dir):
# These imports happen late, within this method because they don't necessarily
# exist when the module starts running, and the ones that probably do
# are checked by CheckPrereqs.
# pylint: disable=g-import-not-at-top

from google.protobuf import text_format
validator_pb2 = imp.load_source('validator_pb2', os.path.join( out_dir, 'validator_pb2.py' ))
from google.protobuf import json_format
from google.protobuf import descriptor

sys.path.append(os.getcwd())
from amp_validator_dist import validator_pb2

allowed_tags = {}
attr_lists = {}
Expand Down Expand Up @@ -361,15 +374,16 @@ def ParseRules(repo_directory, out_dir):
continue

# If we made it here, then start adding the tag_spec
if tag_spec.tag_name.lower() not in allowed_tags:
tag_name = tag_spec.tag_name.lower()
if tag_name not in allowed_tags:
tag_list = []
else:
tag_list = allowed_tags[UnicodeEscape(tag_spec.tag_name).lower()]
tag_list = allowed_tags[tag_name]

gotten_tag_spec = GetTagSpec(tag_spec, attr_lists)
if gotten_tag_spec is not None:
tag_list.append(gotten_tag_spec)
allowed_tags[UnicodeEscape(tag_spec.tag_name).lower()] = tag_list
allowed_tags[tag_name] = tag_list
elif 'descendant_tag_list' == field_desc.name:
for _list in field_val:
descendant_lists[_list.name] = []
Expand Down Expand Up @@ -517,7 +531,7 @@ def ParseRules(repo_directory, out_dir):
# Remove redundant information.
del extension_script_list[0]['tag_spec']['extension_spec']['bento_supported_version']

validator_versions = sorted( validator_versions, key=lambda version: map(int, version.split('.') ) )
validator_versions = sorted( validator_versions, key=lambda version: list(map(int, version.split('.') )) )
extension_script_list[0]['tag_spec']['extension_spec']['version'] = validator_versions

if 'bento' in extensions_versions[extension_name] and extensions_versions[extension_name]['bento']['version'] in validator_versions:
Expand Down Expand Up @@ -593,7 +607,7 @@ def GetTagSpec(tag_spec, attr_lists):
if tag_spec.HasField('cdata'):
cdata_dict = {}
for (field_descriptor, field_value) in tag_spec.cdata.ListFields():
if isinstance(field_value, (unicode, str, bool, int)):
if isinstance(field_value, (str, bytes, bool, int)):
cdata_dict[ field_descriptor.name ] = field_value
elif isinstance( field_value, google.protobuf.pyext._message.RepeatedCompositeContainer ):
cdata_dict[ field_descriptor.name ] = []
Expand All @@ -620,12 +634,12 @@ def GetTagSpec(tag_spec, attr_lists):
if not hasattr( field_value, css_spec_field_name ):
continue
css_spec_field_value = getattr( field_value, css_spec_field_name )
if isinstance(css_spec_field_value, (list, collections.Sequence, google.protobuf.internal.containers.RepeatedScalarFieldContainer, google.protobuf.pyext._message.RepeatedScalarContainer)):
if isinstance(css_spec_field_value, (list, Sequence, google.protobuf.internal.containers.RepeatedScalarFieldContainer, google.protobuf.pyext._message.RepeatedScalarContainer)):
css_spec[ css_spec_field_name ] = [ val for val in css_spec_field_value ]
elif hasattr( css_spec_field_value, 'ListFields' ):
css_spec[ css_spec_field_name ] = {}
for (css_spec_field_item_descriptor, css_spec_field_item_value) in getattr( field_value, css_spec_field_name ).ListFields():
if isinstance(css_spec_field_item_value, (list, collections.Sequence, google.protobuf.internal.containers.RepeatedScalarFieldContainer, google.protobuf.pyext._message.RepeatedScalarContainer)):
if isinstance(css_spec_field_item_value, (list, Sequence, google.protobuf.internal.containers.RepeatedScalarFieldContainer, google.protobuf.pyext._message.RepeatedScalarContainer)):
css_spec[ css_spec_field_name ][ css_spec_field_item_descriptor.name ] = [ val for val in css_spec_field_item_value ]
else:
css_spec[ css_spec_field_name ][ css_spec_field_item_descriptor.name ] = css_spec_field_item_value
Expand Down Expand Up @@ -715,7 +729,7 @@ def GetTagRules(tag_spec):

# Ignore amp-custom-length-check because the AMP plugin will indicate how close they are to the limit.
# TODO: Remove the AMP4EMAIL check once this change is released: <https://github.com/ampproject/amphtml/pull/25246>.
if tag_spec.HasField('spec_name') and ( str(tag_spec.spec_name) == 'style amp-custom-length-check' or 'AMP4EMAIL' in str(tag_spec.spec_name) ):
if tag_spec.HasField('spec_name') and ( tag_spec.spec_name == 'style amp-custom-length-check' or 'AMP4EMAIL' in tag_spec.spec_name ):
return None

if tag_spec.HasField('extension_spec'):
Expand Down Expand Up @@ -892,7 +906,7 @@ def GetValues(attr_spec):
# print 'value_property.name: %s' % value_property.name
for (key,val) in value_property.ListFields():
if val != value_property.name:
if isinstance(val, unicode):
if isinstance(val, str):
val = UnicodeEscape(val)
property_dict[UnicodeEscape(key.name)] = val
value_properties_dict[UnicodeEscape(value_property.name)] = property_dict
Expand All @@ -902,7 +916,7 @@ def GetValues(attr_spec):
if attr_spec.HasField('value_url'):
value_url_dict = {}
for (value_url_key, value_url_val) in attr_spec.value_url.ListFields():
if isinstance(value_url_val, (list, collections.Sequence, google.protobuf.internal.containers.RepeatedScalarFieldContainer, google.protobuf.pyext._message.RepeatedScalarContainer)):
if isinstance(value_url_val, (list, Sequence, google.protobuf.internal.containers.RepeatedScalarFieldContainer, google.protobuf.pyext._message.RepeatedScalarContainer)):
value_url_val_val = []
for val in value_url_val:
value_url_val_val.append(UnicodeEscape(val))
Expand Down Expand Up @@ -956,6 +970,23 @@ def GetMandatoryOf( attr, constraint ):

return sorted(attributes)

def to_json(data):
"""Make data JSON-serializable
Hat tip to AYHarano via https://stackoverflow.com/a/44060541/93579
"""
if data is None or isinstance(data, (bool, int, str)):
return data
if isinstance( data, bytes ):
return data.decode('utf8')
if isinstance( data, (tuple, range, list) ):
return [to_json(item) for item in data]
if isinstance(data, (set, frozenset)):
return sorted(data)
if isinstance(data, (dict, collections.defaultdict)):
return {to_json(key): to_json(data[key]) for key in data}
raise TypeError

def Phpize(data, indent=0):
"""Helper function to convert JSON-serializable data into PHP literals.
Expand All @@ -964,10 +995,10 @@ def Phpize(data, indent=0):
Returns:
String formatted as PHP literal.
"""
json_string = json.dumps(data, sort_keys=True, ensure_ascii=False)
json_string = json.dumps(to_json(data), sort_keys=True, ensure_ascii=False)

pipe = subprocess.Popen(['php', '-r', 'var_export( json_decode( file_get_contents( "php://stdin" ), true ) );'], stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.STDOUT)
php_stdout = pipe.communicate(input=json_string)[0]
php_stdout = pipe.communicate(input=bytes(json_string, 'utf8'))[0]
php_exported = php_stdout.decode()

# Clean up formatting.
Expand All @@ -987,12 +1018,10 @@ def Main( repo_directory, out_dir ):
logging.basicConfig(format='[[%(filename)s %(funcName)s]] - %(message)s', level=logging.INFO)

validator_directory = os.path.realpath( os.path.join( repo_directory, 'validator' ) )
out_dir = os.path.realpath(out_dir)

SetupOutDir(out_dir)
GenValidatorProtoascii(validator_directory, out_dir)
GenValidatorPb2Py(validator_directory, out_dir)
GenValidatorProtoascii(validator_directory,out_dir)
GeneratePHP(repo_directory, out_dir)

if __name__ == '__main__':
Expand All @@ -1002,5 +1031,5 @@ def Main( repo_directory, out_dir ):
if not os.path.exists( repo_directory ):
Die( "Error: The amphtml directory does not exist: %s" % validator_directory )
repo_directory = os.path.realpath( repo_directory )
out_dir = os.path.join( tempfile.gettempdir(), 'amp_wp' )
Main( repo_directory, out_dir )
os.chdir( tempfile.gettempdir() )
Main( repo_directory, 'amp_validator_dist' )
6 changes: 3 additions & 3 deletions bin/amphtml-update.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ BIN_PATH="$(dirname "$0")"
PROJECT_PATH=$(dirname $BIN_PATH)
AMPHTML_LOCATION="$1"

if ! command -v python >/dev/null 2>&1 || ! python -c "import google.protobuf" 2>/dev/null; then
if ! command -v python3 >/dev/null 2>&1 || ! python3 -c "import google.protobuf" 2>/dev/null; then
echo "Error: The google.protobuf Python module is not installed."
echo
echo "On Linux, you can install the required dependencies via:"
echo "# apt-get install python protobuf-compiler python-protobuf"
echo "# apt-get install python3 protobuf-compiler python3-protobuf"
echo
echo "On MacOS, Python is already installed but you may install via:"
echo "$ pip install --upgrade protobuf"
Expand Down Expand Up @@ -52,7 +52,7 @@ else
fi

# Run script.
python "$BIN_PATH/amphtml-update.py" "$AMPHTML_LOCATION" > "$PROJECT_PATH/includes/sanitizers/class-amp-allowed-tags-generated.php"
python3 "$BIN_PATH/amphtml-update.py" "$AMPHTML_LOCATION" > "$PROJECT_PATH/includes/sanitizers/class-amp-allowed-tags-generated.php"

if [[ $CLEANUP == 1 ]]; then
rm -r "$AMPHTML_LOCATION"
Expand Down
Loading

0 comments on commit f751041

Please sign in to comment.