Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read the existing MANIFEST.in file for files to ignore. #19

Merged
merged 16 commits into from
Oct 10, 2013
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ Changelog
0.17 (unreleased)
-----------------

* Read the existing MANIFEST.in file for files to ignore.


0.16 (2013-10-01)
-----------------
Expand Down
73 changes: 71 additions & 2 deletions check_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,12 @@ def add_directories(names):
'*.mo',
]

IGNORE_REGEXPS = [
# Regular expressions for filename to ignore. This is useful for
# filename patterns where the '*' part must not search in
# directories.
]
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The closing ] should be unindented.

I believe pep8 warns about this, but even if it doesn't, let's keep it consistent with the rest of the file.


WARN_ABOUT_FILES_IN_VCS = [
# generated files should not be committed into the VCS
'PKG-INFO',
Expand Down Expand Up @@ -394,15 +400,77 @@ def read_config():
IGNORE.extend(p for p in patterns if p)


def read_manifest():
"""Read existing configuration from MANIFEST.in.

We use that to ignore anything the MANIFEST.in ignores.
"""
# XXX modifies global state, which is kind of evil
if not os.path.isfile('MANIFEST.in'):
return
contents = open('MANIFEST.in').read()
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use the with statement to avoid ResourceWarnings on Python 3.x and leaking file descriptors on PyPy.

ignore, ignore_regexps = _get_ignore_from_manifest(contents)
IGNORE.extend(ignore)
IGNORE_REGEXPS.extend(ignore_regexps)


def _get_ignore_from_manifest(contents):
"""Gather the various ignore patterns from MANIFEST.in.

'contents' should be a string, which may contain newlines.

Returns a list of standard ignore patterns and a list of regular
expressions to ignore.
"""
ignore = []
ignore_regexps = []
for line in contents.splitlines():
if line.startswith('exclude '):
# An exclude of 'dirname/*css' can match 'dirname/foo.css'
# but not 'dirname/subdir/bar.css'. We need a regular
# expression for that.
rest = line[len('exclude '):].strip().split()
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The .strip() is redundant here, .split() already ignores leading and trailing whitespace.

for pat in rest:
if '*' in pat:
pat = pat.replace('*', '[^/]*')
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes . into a magical wildcard character, which is not ideal.

ignore_regexps.append(pat)
else:
# No need for special handling.
ignore.append(pat)
elif line.startswith('global-exclude '):
rest = line[len('global-exclude '):].strip().split()
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The .strip() is redundant.

When I mentioned it last time, I didn't highlight every occurrence, since I didn't want to spam you with too many Github emails.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I should have checked.

ignore.extend(rest)
elif line.startswith('recursive-exclude '):
rest = line[len('recursive-exclude '):].strip()
dirname, patterns = rest.split(' ', 1)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps rest.split(None, 1), in case we find someone who uses tabs or indents the patterns.

I'm assuming distutils/setuptools accepts multiple spaces here.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait, I see you already have a test that tests 'recursive-exclude dirname patterns' with extra spaces. It works because rest is already stripped (which would not be necessary if you used .split(None, 1) here), and because patterns handle leading whitespace in the next line (because of .split(), if we ignore the redundant .strip()).

So, no bug here, but I'd prefer .split(None, 1) with no .strip() on the line above, for conciseness.

Actually wait, if tabs are allowed, then there is a bug here, which would be fixed by using .split(None, 1). (I don't know if tabs are allowed by distutils. My gut feeling is "yes".)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, indeed, tabs are accepted too.

I have refactored this part slightly to make it clearer, by first trying cmd, rest = line.split(None, 1) on each line. Will push after I finish the other fixes.

for pattern in patterns.strip().split():
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.strip().split() again.

ignore.append(dirname + os.path.sep + pattern)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure this is right. If I do

recursive-exclude src README.txt

it will add src/README.txt to the ignore list, but it should add src/*/README.txt too, shouldn't it? Otherwise it's not recursive.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd expect a pattern here, but it indeed works for a simple name too: recursive-exclude plone metadata.xml will ignore all metadata.xml files recursively. Fixed.

elif line.startswith('prune '):
dirname = line[len('prune '):].strip()
ignore.append(dirname)
ignore.append(dirname + os.path.sep + '*')
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this work correctly if dirname already ends with a slash?

I think it will, but I should double-check before merging/releasing. A unit test would perhaps be good.

return ignore, ignore_regexps


def file_matches(filename, patterns):
"""Does this filename match any of the patterns?"""
return any(fnmatch.fnmatch(filename, pat) for pat in patterns)


def file_matches_regexps(filename, patterns):
"""Does this filename match any of the regular expressions?"""
return any(re.match(pat, filename) for pat in patterns)


def strip_sdist_extras(filelist):
"""Strip generated files that are only present in source distributions."""
"""Strip generated files that are only present in source distributions.

We also strip files that are ignored for other reasons, like
command line arguments, setup.cfg rules or MANIFEST.in rules.
"""
return [name for name in filelist
if not file_matches(name, IGNORE)]
if not file_matches(name, IGNORE)
and not file_matches_regexps(name, IGNORE_REGEXPS)]


def find_bad_ideas(filelist):
Expand Down Expand Up @@ -451,6 +519,7 @@ def check_manifest(source_tree='.', create=False, update=False,
if not is_package(source_tree):
raise Failure('This is not a Python project (no setup.py).')
read_config()
read_manifest()
info_begin("listing source files under version control")
all_source_files = sorted(get_vcs_files())
source_files = strip_sdist_extras(all_source_files)
Expand Down
127 changes: 127 additions & 0 deletions tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,69 @@ def test_strip_sdist_extras(self):
]
self.assertEqual(strip_sdist_extras(filelist), expected)

def test_strip_sdist_extras_with_manifest(self):
import check_manifest
from check_manifest import strip_sdist_extras
from check_manifest import _get_ignore_from_manifest as parse
orig_ignore = check_manifest.IGNORE
orig_ignore_regexps = check_manifest.IGNORE_REGEXPS
manifest_in = """
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use textwrap.dedent() or some other way to keep the indentation visually clear.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You used dedent() in the other place (thank you), but forgot this one :)

graft src
exclude *.cfg
global-exclude *.mo
prune src/dump
recursive-exclude src/zope *.sh
"""
filelist = [
'.gitignore',
'setup.py',
'setup.cfg',
'MANIFEST.in',
'README.txt',
'src',
'src/helper.sh',
'src/dump',
'src/dump/__init__.py',
'src/zope',
'src/zope/__init__.py',
'src/zope/zopehelper.sh',
'src/zope/foo',
'src/zope/foo/__init__.py',
'src/zope/foo/language.po',
'src/zope/foo/language.mo',
'src/zope/foo/config.cfg',
'src/zope/foo/foohelper.sh',
'src/zope.foo.egg-info',
'src/zope.foo.egg-info/SOURCES.txt',
]
expected = [
'setup.py',
'MANIFEST.in',
'README.txt',
'src',
'src/helper.sh',
'src/zope',
'src/zope/__init__.py',
'src/zope/foo',
'src/zope/foo/__init__.py',
'src/zope/foo/language.po',
'src/zope/foo/config.cfg',
]

# This will change the definitions.
try:
# This is normally done in read_manifest:
ignore, ignore_regexps = parse(manifest_in)
check_manifest.IGNORE.extend(ignore)
check_manifest.IGNORE_REGEXPS.extend(ignore_regexps)
# Filter the file list.
result = strip_sdist_extras(filelist)
finally:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After I merge this, please hit me with a fish until I refactor check-manifest to avoid mutable global state. OOP was invented in the last century, after all.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will see if I can find a large trout. :-)

# Restore the original definitions
check_manifest.IGNORE = orig_ignore
check_manifest.IGNORE_REGEXPS = orig_ignore_regexps
self.assertEqual(result, expected)

def test_find_bad_ideas(self):
from check_manifest import find_bad_ideas
filelist = [
Expand Down Expand Up @@ -137,6 +200,70 @@ def test_find_suggestions_generic_fallback_rules(self):
self.assertEqual(find_suggestions(['src/id-lang.map']),
(['recursive-include src *.map'], []))

def test_get_ignore_from_manifest(self):
from check_manifest import _get_ignore_from_manifest as parse
# The return value is a tuple with two lists:
# ([<list of filename ignores>], [<list of regular expressions>])
self.assertEqual(parse(''),
([], []))
self.assertEqual(parse(' \n '),
([], []))
self.assertEqual(parse('exclude *.cfg'),
([], ['[^/]*.cfg']))
self.assertEqual(parse('#exclude *.cfg'),
([], []))
self.assertEqual(parse('exclude *.cfg'),
([], ['[^/]*.cfg']))
self.assertEqual(parse('exclude *.cfg foo.* bar.txt'),
(['bar.txt'], ['[^/]*.cfg', 'foo.[^/]*']))
self.assertEqual(parse('exclude some/directory/*.cfg'),
([], ['some/directory/[^/]*.cfg']))
self.assertEqual(parse('include *.cfg'),
([], []))
self.assertEqual(parse('global-exclude *.pyc'),
(['*.pyc'], []))
self.assertEqual(parse('global-exclude *.pyc *.sh'),
(['*.pyc', '*.sh'], []))
self.assertEqual(parse('recursive-exclude dir *.pyc'),
(['dir/*.pyc'], []))
self.assertEqual(parse('recursive-exclude dir *.pyc *.sh'),
(['dir/*.pyc', 'dir/*.sh'], []))
self.assertEqual(parse('prune dir'),
(['dir', 'dir/*'], []))
text = """
#exclude *.01
exclude *.02
exclude *.03 04.* bar.txt
exclude *.05
exclude some/directory/*.cfg
global-exclude *.10 *.11
global-exclude *.12
include *.20
prune 30
recursive-exclude 40 *.41
recursive-exclude 42 *.43 44.*
"""
self.assertEqual(
parse(text),
([
'bar.txt',
'*.10',
'*.11',
'*.12',
'30',
'30/*',
'40/*.41',
'42/*.43',
'42/44.*',
],
[
'[^/]*.02',
'[^/]*.03',
'04.[^/]*',
'[^/]*.05',
'some/directory/[^/]*.cfg',
]))


class VCSMixin(object):

Expand Down