Skip to content

Commit

Permalink
Merge pull request #180 from gsnedders/css-dupes
Browse files Browse the repository at this point in the history
Lint for filename duplicates in CSS testsuite
  • Loading branch information
gsnedders authored Mar 15, 2017
2 parents 3a898ff + 9d0a6ea commit 369f52e
Show file tree
Hide file tree
Showing 19 changed files with 382 additions and 46 deletions.
203 changes: 186 additions & 17 deletions lint/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import abc
import argparse
import ast
import itertools
import json
import os
import re
Expand Down Expand Up @@ -46,6 +47,45 @@ def all_filesystem_paths(repo_root):
path_filter(os.path.relpath(os.path.join(dirpath, item) + "/",
repo_root))]


def _all_files_equal(paths):
"""
Checks all the paths are files that are byte-for-byte identical
:param paths: the list of paths to compare
:returns: True if they are all identical
"""
paths = list(paths)
if len(paths) < 2:
return True

first = paths.pop()
size = os.path.getsize(first)
if any(os.path.getsize(path) != size for path in paths):
return False

# Chunk this to avoid eating up memory and file descriptors
bufsize = 4096*4 # 16KB, a "reasonable" number of disk sectors
groupsize = 8 # Hypothesised to be large enough in the common case that everything fits in one group
with open(first, "rb") as first_f:
for start in range(0, len(paths), groupsize):
path_group = paths[start:start+groupsize]
first_f.seek(0)
try:
files = [open(x, "rb") for x in path_group]
for _ in range(0, size, bufsize):
a = first_f.read(bufsize)
for f in files:
b = f.read(bufsize)
if a != b:
return False
finally:
for f in files:
f.close()

return True


def check_path_length(repo_root, path, css_mode):
if len(path) + 1 > 150:
return [("PATH LENGTH", "/%s longer than maximum path length (%d > 150)" % (path, len(path) + 1), path, None)]
Expand All @@ -65,6 +105,114 @@ def check_worker_collision(repo_root, path, css_mode):
return []


drafts_csswg_re = re.compile(r"https?\:\/\/drafts\.csswg\.org\/([^/?#]+)")
w3c_tr_re = re.compile(r"https?\:\/\/www\.w3c?\.org\/TR\/([^/?#]+)")
w3c_dev_re = re.compile(r"https?\:\/\/dev\.w3c?\.org\/[^/?#]+\/([^/?#]+)")


def check_css_globally_unique(repo_root, paths, css_mode):
"""
Checks that CSS filenames are sufficiently unique
This groups files by path classifying them as "test", "reference", or
"support".
"test" files must have a unique name across files that share links to the
same spec.
"reference" and "support" files, on the other hand, must have globally
unique names.
:param repo_root: the repository root
:param paths: list of all paths
:param css_mode: whether we're in CSS testsuite mode
:returns: a list of errors found in ``paths``
"""
test_files = defaultdict(set)
ref_files = defaultdict(set)
support_files = defaultdict(set)

for path in paths:
if os.name == "nt":
path = path.replace("\\", "/")

if not css_mode:
if not path.startswith("css/"):
continue

# we're within css or in css_mode after all that
source_file = SourceFile(repo_root, path, "/")
if source_file.name_is_non_test:
# If we're name_is_non_test for a reason apart from support, ignore it.
# We care about support because of the requirement all support files in css/ to be in
# a support directory; see the start of check_parsed.
offset = path.find("/support/")
if offset == -1:
continue

parts = source_file.dir_path.split(os.path.sep)
if parts[0] in source_file.root_dir_non_test:
continue
elif any(item in source_file.dir_non_test - {"support"} for item in parts):
continue
else:
for non_test_path in source_file.dir_path_non_test:
if parts[:len(path)] == list(non_test_path):
continue

name = path[offset+1:]
support_files[name].add(path)
elif source_file.name_is_reference:
ref_files[source_file.name].add(path)
else:
test_files[source_file.name].add(path)

errors = []

for name, colliding in iteritems(test_files):
if len(colliding) > 1:
if not _all_files_equal([os.path.join(repo_root, x) for x in colliding]):
# Only compute by_spec if there are prima-facie collisions because of cost
by_spec = defaultdict(set)
for path in colliding:
source_file = SourceFile(repo_root, path, "/")
for link in source_file.spec_links:
for r in (drafts_csswg_re, w3c_tr_re, w3c_dev_re):
m = r.match(link)
if m:
spec = m.group(1)
break
else:
continue
by_spec[spec].add(path)

for spec, paths in iteritems(by_spec):
if not _all_files_equal([os.path.join(repo_root, x) for x in paths]):
for x in paths:
errors.append(("CSS-COLLIDING-TEST-NAME",
"The filename %s in the %s testsuite is shared by: %s"
% (name,
spec,
", ".join(sorted(paths))),
x,
None))

for error_name, d in [("CSS-COLLIDING-REF-NAME", ref_files),
("CSS-COLLIDING-SUPPORT-NAME", support_files)]:
for name, colliding in iteritems(d):
if len(colliding) > 1:
if not _all_files_equal([os.path.join(repo_root, x) for x in colliding]):
for x in colliding:
errors.append((error_name,
"The filename %s is shared by: %s" % (name,
", ".join(sorted(colliding))),
x,
None))

return errors


def parse_whitelist(f):
"""
Parse the whitelist file given by `f`, and return the parsed structure.
Expand All @@ -89,12 +237,12 @@ def parse_whitelist(f):
if error_type == "*":
ignored_files.add(file_match)
else:
data[file_match][error_type].add(line_number)
data[error_type][file_match].add(line_number)

return data, ignored_files


def filter_whitelist_errors(data, path, errors):
def filter_whitelist_errors(data, errors):
"""
Filter out those errors that are whitelisted in `data`.
"""
Expand All @@ -103,14 +251,14 @@ def filter_whitelist_errors(data, path, errors):
return []

whitelisted = [False for item in range(len(errors))]
normpath = os.path.normcase(path)

for file_match, whitelist_errors in iteritems(data):
if fnmatch.fnmatchcase(normpath, file_match):
for i, (error_type, msg, path, line) in enumerate(errors):
if error_type in whitelist_errors:
allowed_lines = whitelist_errors[error_type]
if None in allowed_lines or line in allowed_lines:

for i, (error_type, msg, path, line) in enumerate(errors):
normpath = os.path.normcase(path)
if error_type in data:
wl_files = data[error_type]
for file_match, allowed_lines in iteritems(wl_files):
if None in allowed_lines or line in allowed_lines:
if fnmatch.fnmatchcase(normpath, file_match):
whitelisted[i] = True

return [item for i, item in enumerate(errors) if not whitelisted[i]]
Expand Down Expand Up @@ -423,6 +571,22 @@ def check_path(repo_root, path, css_mode):
return errors


def check_all_paths(repo_root, paths, css_mode):
"""
Runs lints that check all paths globally.
:param repo_root: the repository root
:param paths: a list of all the paths within the repository
:param css_mode: whether we're in CSS testsuite mode
:returns: a list of errors found in ``f``
"""

errors = []
for paths_fn in all_paths_lints:
errors.extend(paths_fn(repo_root, paths, css_mode))
return errors


def check_file_contents(repo_root, path, f, css_mode):
"""
Runs lints that check the file contents.
Expand Down Expand Up @@ -476,7 +640,7 @@ def parse_args():

def main(force_css_mode=False):
args = parse_args()
paths = args.paths if args.paths else all_filesystem_paths(repo_root)
paths = list(args.paths if args.paths else all_filesystem_paths(repo_root))
return lint(repo_root, paths, args.json, force_css_mode or args.css_mode)

def lint(repo_root, paths, output_json, css_mode):
Expand All @@ -491,17 +655,16 @@ def lint(repo_root, paths, output_json, css_mode):
else:
output_errors = output_errors_text

def process_errors(path, errors):
def process_errors(errors):
"""
Filters and prints the errors, and updates the ``error_count`` object.
:param path: the path of the file that contains the errors
:param errors: a list of error tuples (error type, message, path, line number)
:returns: ``None`` if there were no errors, or
a tuple of the error type and the path otherwise
"""

errors = filter_whitelist_errors(whitelist, path, errors)
errors = filter_whitelist_errors(whitelist, errors)

if not errors:
return None
Expand All @@ -512,21 +675,26 @@ def process_errors(path, errors):

return (errors[-1][0], path)

for path in paths:
for path in paths[:]:
abs_path = os.path.join(repo_root, path)
if not os.path.exists(abs_path):
paths.remove(path)
continue

if any(fnmatch.fnmatch(path, file_match) for file_match in ignored_files):
paths.remove(path)
continue

errors = check_path(repo_root, path, css_mode)
last = process_errors(path, errors) or last
last = process_errors(errors) or last

if not os.path.isdir(abs_path):
with open(abs_path, 'rb') as f:
errors = check_file_contents(repo_root, path, f, css_mode)
last = process_errors(path, errors) or last
last = process_errors(errors) or last

errors = check_all_paths(repo_root, paths, css_mode)
last = process_errors(errors) or last

if not output_json:
output_error_count(error_count)
Expand All @@ -535,6 +703,7 @@ def process_errors(path, errors):
return sum(itervalues(error_count))

path_lints = [check_path_length, check_worker_collision]
all_paths_lints = [check_css_globally_unique]
file_lints = [check_regexp_line, check_parsed, check_python_ast, check_script_metadata]

if __name__ == "__main__":
Expand Down
1 change: 1 addition & 0 deletions lint/tests/dummy/css-unique/a-ref.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1
4 changes: 4 additions & 0 deletions lint/tests/dummy/css-unique/a.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<link rel="help" href="https://www.w3.org/TR/CSS21/aural.html#propdef-stress">
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
1
1 change: 1 addition & 0 deletions lint/tests/dummy/css-unique/match/a-ref.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1
4 changes: 4 additions & 0 deletions lint/tests/dummy/css-unique/match/a.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<link rel="help" href="https://www.w3.org/TR/CSS21/aural.html#propdef-stress">
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
1
1 change: 1 addition & 0 deletions lint/tests/dummy/css-unique/match/support/a.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1
1 change: 1 addition & 0 deletions lint/tests/dummy/css-unique/match/support/tools/a.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2
1 change: 1 addition & 0 deletions lint/tests/dummy/css-unique/match/tools/a.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1
1 change: 1 addition & 0 deletions lint/tests/dummy/css-unique/not-match/a-ref.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2
4 changes: 4 additions & 0 deletions lint/tests/dummy/css-unique/not-match/a.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<link rel="help" href="https://www.w3.org/TR/CSS21/aural.html#propdef-stress">
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
2
1 change: 1 addition & 0 deletions lint/tests/dummy/css-unique/not-match/support/a.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2
1 change: 1 addition & 0 deletions lint/tests/dummy/css-unique/not-match/tools/a.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2
4 changes: 4 additions & 0 deletions lint/tests/dummy/css-unique/selectors/a.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<link rel="help" href="https://drafts.csswg.org/selectors-3/#type-selectors">
<script src="/resources/testharness.js"></script>
<script src="/resources/testharnessreport.js"></script>
1
1 change: 1 addition & 0 deletions lint/tests/dummy/css-unique/support/a.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1
1 change: 1 addition & 0 deletions lint/tests/dummy/css-unique/support/tools/a.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2
1 change: 1 addition & 0 deletions lint/tests/dummy/css-unique/tools/a.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1
Loading

0 comments on commit 369f52e

Please sign in to comment.