Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[INFRA] set up github action to detect latin phrases #636

Merged
merged 6 commits into from
Oct 5, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions .github/workflows/no-bad-latin.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Give your test a name!
Remi-Gau marked this conversation as resolved.
Show resolved Hide resolved
name: Check for Latin Phrases

# Decide when to run the tests
#
# This configuration sets the test to run on pushes to master
# and on pull requests that are opened to master
on:
push:
branches:
- master
pull_request:
branches:
- master

# Set up the Continuous Integration job
jobs:
latin-phrases:
# Run on the latest Ubuntu distribution
runs-on: ubuntu-latest
# This section collects together the steps involved in running the test
steps:
# Checkout the repository. Relies on another GH-Action.
- uses: actions/checkout@v2
# Set up the Python version. Relies on another GH-Action.
- name: Setup Python 3.7
uses: actions/setup-python@v1
with:
python-version: 3.7
# Install Python dependencies
- name: Install dependencies
working-directory: ./tools
run: |
python -m pip install --upgrade pip
python -m pip install -r requirements.txt
# Run a Python script
- name: Run Python script to check for latin phrases - Master
working-directory: ./tools
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
run: |
python no-bad-latin.py

- name: Run Python script to check for latin phrases - Pull Request
working-directory: ./tools
if: github.event.pull_request
run: |
python no-bad-latin.py --pull-request ${{ github.event.pull_request.number }}
156 changes: 156 additions & 0 deletions tools/no-bad-latin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import os
Remi-Gau marked this conversation as resolved.
Show resolved Hide resolved
import re
import argparse
from pull_files import filter_files

HERE = os.getcwd()
ABSOLUTE_HERE = os.path.dirname(HERE)
IGNORE_LIST = [ "_config.yml", "style.md", "contributors-record.md"]


def parse_args():
"""Construct command line interface for parsing Pull Request number"""
DESCRIPTION = "Script to check for latin phrases in Markdown files"
parser = argparse.ArgumentParser(description=DESCRIPTION)

parser.add_argument(
"--pull-request",
type=str,
default=None,
help="If the script is being run on a Pull Request, parse the PR number",
)

return parser.parse_args()


def remove_comments(text_string):
"""Function to omit html comment identifiers in a text string using
regular expression matches

Arguments:
text_string {string} -- The text to be matched

Returns:
{string} -- The input text string with html comments removed
"""
p = re.sub("(?s)<!--(.*?)-->", "", text_string)
return p


def get_lines(text_string, sub_string):
"""Get individual lines in a text file

Arguments:
text_string {string} -- The text string to test
sub_string {string} -- The conditional string to perform splitting on

Returns:
{list} -- A list of split strings
"""
lines = [line for line in text_string.split("\n") if sub_string in line]
return lines


def construct_error_message(files_dict):
"""Function to construct an error message pointing out where bad latin
phrases appear in lines of text

Arguments:
files_dict {dictionary} -- Dictionary of failing files containing the
bad latin phrases and offending lines

Returns:
{string} -- The error message to be raised
"""
error_message = ["Bad latin found in the following files:\n"]

for file in files_dict.keys():
error_message.append(
f"{file}:\t{files_dict[file]['latin_type']}\tfound in line\t[{files_dict[file]['line']}]\n"
)

return "\n".join(error_message)


def read_and_check_files(files):
"""Function to read in files, remove html comments and check for bad latin
phrases

Arguments:
files {list} -- List of filenames to be checked

Returns:
{dict} -- Dictionary: Top level keys are absolute filepaths to files
that failed the check. Each of these has two keys:
'latin_type' containing the unwanted latin phrase, and 'line'
containing the offending line.
"""
failing_files = {}
bad_latin = [
"i.e.", "i.e ", " ie ",
"e.g.", "e.g ",
"e.t.c.", " etc", "et cetera"]

for filename in files:
if os.path.basename(filename) in IGNORE_LIST:
pass
else:
try:
with open(
os.path.join(ABSOLUTE_HERE, filename), encoding="utf8",
errors="ignore") as f:
text = f.read()
text = remove_comments(text)

for latin_type in bad_latin:
if latin_type in text.lower():
lines = get_lines(text.lower(), latin_type)
for line in lines:
failing_files[os.path.abspath(filename)] = {
"latin_type": latin_type,
"line": line,
}
except FileNotFoundError:
pass

return failing_files


def get_all_files(directory=os.path.join(ABSOLUTE_HERE, "src")):
"""Get a list of files to be checked. Ignores image, javacript, css files.

Keyword Arguments:
directory {string} -- The directory containing the files to check

Returns:
{list} -- List of files to check
"""
files = []
filetypes_to_ignore = (".png", ".jpg", ".js", ".css")

for rootdir, _, filenames in os.walk(directory):
for filename in filenames:
if not filename.endswith(filetypes_to_ignore):
files.append(os.path.join(rootdir, filename))

return files


def main():
"""Main function"""
args = parse_args()

if args.pull_request is not None:
files = filter_files(args.pull_request)
else:
files = get_all_files()

failing_files = read_and_check_files(files)

if bool(failing_files):
error_message = construct_error_message(failing_files)
raise Exception(error_message)


if __name__ == "__main__":
main()
69 changes: 69 additions & 0 deletions tools/pull_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""
Script to pull changed files in a Pull Request using a GET resquest to the
Remi-Gau marked this conversation as resolved.
Show resolved Hide resolved
GitHub API.
"""
Remi-Gau marked this conversation as resolved.
Show resolved Hide resolved
import requests
import argparse


def parse_args():
"""Construct the command line interface for the script"""
DESCRIPTION = "Script to pull changed files in a Pull Request using a GET resquest to the GitHub API."
parser = argparse.ArgumentParser(description=DESCRIPTION)

parser.add_argument(
"--pull-request",
type=str,
default=None,
help="If the script is be run on files changed by a pull request, parse the PR number",
)

return parser.parse_args()


def get_files_from_pr(pr_num):
"""Return a list of changed files from a GitHub Pull Request

Arguments:
pr_num {str} -- Pull Request number to get modified files from

Returns:
{list} -- List of modified filenames
"""
files = []
pr_url = f"https://api.github.com/repos/bids-standard/bids-specification/pulls/{pr_num}/files"
resp = requests.get(pr_url)

for item in resp.json():
files.append(item["filename"])

return files


def filter_files(pr_num, start_phrase="src"):
"""Filter modified files from a Pull Request by a start phrase

Arguments:
pr_num {str} -- Number of the Pull Request to get modified files from

Keyword Arguments:
start_phrase {str} -- Start phrase to filter changed files by
(default: {"src"})

Returns:
{list} -- List of filenames that begin with the desired start phrase
"""
files = get_files_from_pr(pr_num)
filtered_files = []

for filename in files:
if filename.startswith(start_phrase):
filtered_files.append(filename)

return filtered_files


if __name__ == "__main__":
args = parse_args()
changed_files = filter_files(args.pull_request)
print(changed_files)
1 change: 1 addition & 0 deletions tools/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
requests