Skip to content

Commit

Permalink
Merge pull request #519 from mshawcroft/fix-518
Browse files Browse the repository at this point in the history
Fix #518, provide --enforce-all option to check_added_large_files
  • Loading branch information
asottile authored Sep 27, 2020
2 parents 31d41ff + 012bb06 commit 5bd9e74
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 4 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ Add this to your `.pre-commit-config.yaml`
#### `check-added-large-files`
Prevent giant files from being committed.
- Specify what is "too large" with `args: ['--maxkb=123']` (default=500kB).
- Limits checked files to those indicated as staged for addition by git.
- If `git-lfs` is installed, lfs files will be skipped
(requires `git-lfs>=2.2.1`)
- `--enforce-all` - Check all listed files not just those staged for
addition.

#### `check-ast`
Simply check whether files parse as valid python.
Expand Down
25 changes: 21 additions & 4 deletions pre_commit_hooks/check_added_large_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,20 @@ def lfs_files() -> Set[str]:
return set(json.loads(lfs_ret)['files'])


def find_large_added_files(filenames: Sequence[str], maxkb: int) -> int:
def find_large_added_files(
filenames: Sequence[str],
maxkb: int,
*,
enforce_all: bool = False,
) -> int:
# Find all added files that are also in the list of files pre-commit tells
# us about
retv = 0
for filename in (added_files() & set(filenames)) - lfs_files():
filenames_filtered = set(filenames) - lfs_files()
if not enforce_all:
filenames_filtered &= added_files()

for filename in filenames_filtered:
kb = int(math.ceil(os.stat(filename).st_size / 1024))
if kb > maxkb:
print(f'{filename} ({kb} KB) exceeds {maxkb} KB.')
Expand All @@ -40,13 +49,21 @@ def main(argv: Optional[Sequence[str]] = None) -> int:
'filenames', nargs='*',
help='Filenames pre-commit believes are changed.',
)
parser.add_argument(
'--enforce-all', action='store_true',
help='Enforce all files are checked, not just staged files.',
)
parser.add_argument(
'--maxkb', type=int, default=500,
help='Maxmimum allowable KB for added files',
)

args = parser.parse_args(argv)
return find_large_added_files(args.filenames, args.maxkb)

return find_large_added_files(
args.filenames,
args.maxkb,
enforce_all=args.enforce_all,
)


if __name__ == '__main__':
Expand Down
23 changes: 23 additions & 0 deletions tests/check_added_large_files_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,17 @@ def test_add_something_giant(temp_git_dir):
assert find_large_added_files(['f.py'], 10) == 0


def test_enforce_all(temp_git_dir):
with temp_git_dir.as_cwd():
temp_git_dir.join('f.py').write('a' * 10000)

# Should fail, when not staged with enforce_all
assert find_large_added_files(['f.py'], 0, enforce_all=True) == 1

# Should pass, when not staged without enforce_all
assert find_large_added_files(['f.py'], 0, enforce_all=False) == 0


def test_added_file_not_in_pre_commits_list(temp_git_dir):
with temp_git_dir.as_cwd():
temp_git_dir.join('f.py').write("print('hello world')")
Expand Down Expand Up @@ -97,3 +108,15 @@ def test_moves_with_gitlfs(temp_git_dir, monkeypatch): # pragma: no cover
# Now move it and make sure the hook still succeeds
cmd_output('git', 'mv', 'a.bin', 'b.bin')
assert main(('--maxkb', '9', 'b.bin')) == 0


@xfailif_no_gitlfs
def test_enforce_allows_gitlfs(temp_git_dir, monkeypatch): # pragma: no cover
with temp_git_dir.as_cwd():
monkeypatch.setenv('HOME', str(temp_git_dir))
cmd_output('git', 'lfs', 'install')
temp_git_dir.join('f.py').write('a' * 10000)
cmd_output('git', 'lfs', 'track', 'f.py')
cmd_output('git', 'add', '--', '.')
# With --enforce-all large files on git lfs should succeed
assert main(('--enforce-all', '--maxkb', '9', 'f.py')) == 0

0 comments on commit 5bd9e74

Please sign in to comment.