Skip to content

Commit

Permalink
Merge pull request #309 from jgraham/html5lib_update
Browse files Browse the repository at this point in the history
Update html5lib tests
  • Loading branch information
jgraham committed Nov 5, 2013
2 parents 082d265 + aa045d2 commit 4f2e4e5
Show file tree
Hide file tree
Showing 3 changed files with 203 additions and 0 deletions.
25 changes: 25 additions & 0 deletions scripts/html5lib_test.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<html xmlns:py="http://genshi.edgewall.org/">
<head>
<meta charset="utf8"/>
<title>HTML 5 Parser tests ${file_name}</title>
</head>
<body>
<h1>html5lib Parser Test</h1>
<div id="log"></div>
<script src="common.js"></script>
<script src="test.js"></script>
<script src="template.js"></script>
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
<script>
var num_iframes = 8;

var order = [<py:for each="test in tests">'${test.id}',</py:for>];
var tests = {
<py:for each="test in tests">"${test.id}":[async_test('${file_name} ${test.id}', {'timeout':${test_timeout}}), ${test.string_uri_encoded_input}, ${test.string_escaped_expected}],</py:for>
}
init_tests(get_type(), ${file_timeout});
</script>

</body>
</html>
26 changes: 26 additions & 0 deletions scripts/html5lib_test_fragment.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
<html xmlns:py="http://genshi.edgewall.org/">
<head>
<meta charset="utf8"/>
<title>HTML 5 Parser tests ${file_name}</title>
</head>
<body>
<h1>html5lib Parser Test</h1>
<div id="log"></div>
<script src="common.js"></script>
<script src="test.js"></script>
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
<script>

var num_iframes = 8;

var order = [<py:for each="test in tests">'${test.id}',</py:for>];
var tests = {
<py:for each="test in tests">"${test.id}":[async_test('${file_name} ${test.id}', {'timeout':${test_timeout}}), ${test.string_uri_encoded_input}, ${test.string_escaped_expected}, '${test.container}'],</py:for>
}

init_tests("innerHTML", ${file_timeout});

</script>
</body>
</html>
152 changes: 152 additions & 0 deletions scripts/update_html5lib_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import sys
import os
import hashlib
import urllib
import itertools
import re
import json
import glob
import shutil

try:
import genshi
from genshi.template import MarkupTemplate

from html5lib.tests import support
except ImportError:
print """This script requires the Genshi templating library and html5lib source
It is recommended that these are installed in a virtualenv:
virtualenv venv
source venv/bin/activate
pip install genshi
cd venv
git clone git@github.com:html5lib/html5lib-python.git html5lib
cd html5lib
git submodule init
git submodule update
pip install -e ./
Then run this script again, with the virtual environment still active.
When you are done, type "deactivate" to deactivate the virtual environment.
"""

TESTS_PATH = "html/syntax/parsing/"

def get_paths():
script_path = os.path.split(os.path.abspath(__file__))[0]
repo_base = get_repo_base(script_path)
tests_path = os.path.join(repo_base, TESTS_PATH)
return script_path, tests_path

def get_repo_base(path):
while path:
if os.path.exists(os.path.join(path, ".git")):
return path
else:
path = os.path.split(path)[0]

def get_expected(data):
data = "#document\n" + data
return data

def get_hash(data, container=None):
if container == None:
container = ""
return hashlib.sha1("#container%s#data%s"%(container.encode("utf8"),
data.encode("utf8"))).hexdigest()

def make_tests(script_dir, out_dir, input_file_name, test_data):
tests = []
innerHTML_tests = []
ids_seen = {}
print input_file_name
for test in test_data:
is_innerHTML = "document-fragment" in test
data = test["data"]
container = test["document-fragment"] if is_innerHTML else None
assert test["document"], test
expected = get_expected(test["document"])
test_list = innerHTML_tests if is_innerHTML else tests
test_id = get_hash(data, container)
if test_id in ids_seen:
print "WARNING: id %s seen multiple times in file %s this time for test (%s, %s) before for test %s, skipping"%(test_id, input_file_name, container, data, ids_seen[test_id])
continue
ids_seen[test_id] = (container, data)
test_list.append({'string_uri_encoded_input':"\"%s\""%urllib.quote(data.encode("utf8")),
'input':data,
'expected':expected,
'string_escaped_expected':json.dumps(urllib.quote(expected.encode("utf8"))),
'id':test_id,
'container':container
})
path_normal = None
if tests:
path_normal = write_test_file(script_dir, out_dir,
tests, "html5lib_%s"%input_file_name,
"html5lib_test.xml")
path_innerHTML = None
if innerHTML_tests:
path_innerHTML = write_test_file(script_dir, out_dir,
innerHTML_tests, "html5lib_innerHTML_%s"%input_file_name,
"html5lib_test_fragment.xml")

return path_normal, path_innerHTML

def write_test_file(script_dir, out_dir, tests, file_name, template_file_name):
file_name = os.path.join(out_dir, file_name + ".html")
short_name = os.path.split(file_name)[1]

with open(os.path.join(script_dir, template_file_name)) as f:
template = MarkupTemplate(f)

stream = template.generate(file_name=short_name, tests=tests,
file_timeout=min(1000*len(tests), 60*1000),
test_timeout=1000)

with open(file_name, "w") as f:
f.write(stream.render('html', doctype='html5',
encoding="utf8"))
return file_name

def escape_js_string(in_data):
return in_data.encode("utf8").encode("string-escape")

def serialize_filenames(test_filenames):
return "[" + ",\n".join("\"%s\""%item for item in test_filenames) + "]"

def main():

script_dir, out_dir = get_paths()

test_files = []
inner_html_files = []

if len(sys.argv) > 2:
test_iterator = itertools.izip(
itertools.repeat(False),
sorted(os.path.abspath(item) for item in
glob.glob(os.path.join(sys.argv[2], "*.dat"))))
else:
test_iterator = itertools.chain(
itertools.izip(itertools.repeat(False),
sorted(support.get_data_files("tree-construction"))),
itertools.izip(itertools.repeat(True),
sorted(support.get_data_files(
os.path.join("tree-construction", "scripted")))))

for (scripted, test_file) in test_iterator:
input_file_name = os.path.splitext(os.path.split(test_file)[1])[0]
if scripted:
input_file_name = "scripted_" + input_file_name
test_data = support.TestData(test_file)
test_filename, inner_html_file_name = make_tests(script_dir, out_dir,
input_file_name, test_data)
if test_filename is not None:
test_files.append(test_filename)
if inner_html_file_name is not None:
inner_html_files.append(inner_html_file_name)

if __name__ == "__main__":
main()

0 comments on commit 4f2e4e5

Please sign in to comment.