Skip to content

Commit

Permalink
CI: Add test case for unwanted patterns
Browse files Browse the repository at this point in the history
  • Loading branch information
MomIsBestFriend committed Dec 25, 2019
1 parent 1593023 commit 3ae2f2b
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 0 deletions.
4 changes: 4 additions & 0 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
invgrep -R --include=*.{py,pyx} 'xrange' pandas
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check for use of not concatenated strings' ; echo $MSG
python $BASE_DIR/scripts/validate_string_concatenation.py pandas
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
INVGREP_APPEND=" <- trailing whitespaces found"
invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *
Expand Down
131 changes: 131 additions & 0 deletions scripts/validate_string_concatenation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
#!/usr/bin/env python
"""
GH #30454
Check where there is a string that needs to be concatenated.
This is necessary after black formating,
where for example black transforms this:
>>> foo = (
... "bar "
... "baz"
... )
into this:
>>> foo = ("bar " "baz")
Black is not considering this as an
issue (see https://github.com/psf/black/issues/1051), so we are checking
it here.
"""

import os
import sys
import token
import tokenize

FILE_EXTENSIONS_TO_CHECK = [".pxd", ".py", ".pyx", ".pyx.ini"]


def main():
path = sys.argv[1]

if not os.path.exists(path):
raise ValueError("Please enter a valid path, to a file/directory.")

if os.path.isfile(path):
# Means that the given path is of a single file.
sys.exit(is_concatenated(path))

status_codes = set()
# Means that the given path is of a directory.
for subdir, _, files in os.walk(path):
for file_name in files:
ext = full_ext(os.path.join(subdir, file_name))
if ext in FILE_EXTENSIONS_TO_CHECK:
status_codes.add(is_concatenated(os.path.join(subdir, file_name)))

if 1 in status_codes:
sys.exit(1)

sys.exit(0)


def full_ext(path):
"""
Get the full file extention name.
Parameters
----------
path : str
File path.
Returns
-------
str
Full extention of a file.
Notes
-----
This function is needed only because of file extentions like
` .pxi.ini` for example.
Examples
-------
With one suffix:
>>> ext = full_ext('/full/path/to/file.py')
>>> ext
.py
Wuth two suffixes:
>>> ext = full_ext('/full/path/to/file.pxi.ini')
>>> ext
.pxi.ini
"""
ext_list = [".{suffix}".format(suffix=suffix) for suffix in path.split(".")[1:]]
return "".join(ext_list)


def is_concatenated(file_path):
"""
Checking if the file containing strings that needs to be concatenated.
Parameters
----------
file_path : str
File path pointing to a single file.
Returns
-------
int
Status code representing if the file needs a fix.
0 - All good.
1 - Needs to be fixed.
"""
need_fix = False
with open(file_path, "r") as file_name:
toks = list(tokenize.generate_tokens(file_name.readline))
for i in range(len(toks) - 1):
tok = toks[i]
tok2 = toks[i + 1]
if tok[0] == token.STRING and tok[0] == tok2[0]:
need_fix = True
print(
"{file_path}:{line_number}:\t{start} and {end}".format(
file_path=file_path,
line_number=tok[2][0],
start=tok[1],
end=tok2[1],
)
)

return int(need_fix)


if __name__ == "__main__":
main()

0 comments on commit 3ae2f2b

Please sign in to comment.