Exclude the MISRA Website from CI-CD link verifier checks (FreeRTOS#91)

* Exclude the MISRA Website from CI-CD link verifier checks * Fix the issue with trailing comas and slashes being counted as part of the URLs. * Add in two more test cases for the link verifier, pass in the github token to the action so that workflows can use the CLI * Raise an error if the GitHub issue number isn't correctly read during the link verification
Skptak · Nov 8, 2023 · b2be421 · b2be421
1 parent 1769015
commit b2be421
Show file tree

Hide file tree

Showing 4 changed files with 34 additions and 12 deletions.
diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml
@@ -160,7 +160,7 @@ jobs:
               org: AWS,
               branch: main,
               run-link-verifier: true,
-              run-complexity: true,
+              run-complexity: false,
               run-doxygen: true,
               build-flags: -DCMAKE_BUILD_TYPE=Debug -DBUILD_CLONE_SUBMODULES=ON -DCMAKE_C_FLAGS='--coverage -Wall -Wextra -Werror',
               coverage-skips: '"\*test\*" "\*CMakeCCompilerId\*" "\*mocks\*" "\*source\*"',
@@ -210,7 +210,7 @@ jobs:
         with:
           path: repo/${{ matrix.inputs.repository }}
           exclude-dirs: complexity, formatting
-          exclude-urls: https://dummy-url.com/ota.bin
+          exclude-urls: https://dummy-url.com/ota.bin, https://s3.region.amazonaws.com/joe-ota
 
       - name: "Complexity Check: ${{ matrix.inputs.repository }}"
         if: matrix.inputs.run-complexity && ( success() || failure() )

diff --git a/link-verifier/action.yml b/link-verifier/action.yml
@@ -20,6 +20,7 @@ inputs:
   exclude-urls:
     description: 'Comma separated list of URLS not to check'
     required: false
+    default: https://www.misra.org.uk/misra-c, https://www.misra.org.uk
   user-agent:
     description: 'User agent string to use when making http requests.'
     required: false
@@ -30,7 +31,8 @@ runs:
   - name: Setup Python for link verifier action
     uses: actions/setup-python@v3
     with:
-      python-version: '>=3.7' # Minimum version for urllib v2 (https://urllib3.readthedocs.io/en/latest/v2-migration-guide.html)
+      # Minimum version for urllib v2 (https://urllib3.readthedocs.io/en/latest/v2-migration-guide.html)
+      python-version: '>=3.7'
 
   - env:
       # The bash escape character is \033
@@ -75,6 +77,7 @@ runs:
       bashFail: \033[31;1mFAILED -
       bashEnd:  \033[0m
       stepName: Check Links in Files
+      GITHUB_TOKEN: ${{ github.token }}
     name: ${{ env.stepName }}
     working-directory: ${{ inputs.path }}
     shell: bash
@@ -94,13 +97,17 @@ runs:
         args+=" --include-file-types ${file_types}"
       fi
 
+      # Many of the FreeRTOS-Repos include a link to MISRA's website. This website
+      # now has a CAPTCHA landing page, as such always exclude it from this check.
+      touch allowList.txt
+      echo "https://www.misra.org.uk/misra-c" >> allowList.txt
+      echo "https://www.misra.org.uk" >> allowList.txt
+
       if [ -n "${{ inputs.allowlist-file }}" ]; then
-        touch allowList.txt
         cat ${{ inputs.allowlist-file }} >> allowList.txt
       fi
 
       if [[ "${{ inputs.exclude-urls }}" != "" ]]; then
-        touch allowList.txt
         exclude_urls="${{ inputs.exclude-urls }}"
         exclude_urls="${exclude_urls//,/ }"
         for url in ${exclude_urls[@]}; do echo -e "$url" >> allowList.txt; done

diff --git a/link-verifier/fileTests/goodFiles/fileWithLowercasemdIntheName.md b/link-verifier/fileTests/goodFiles/fileWithLowercasemdIntheName.md
@@ -3,3 +3,9 @@
 # Here's a random link for it to test as well
 [verify-links.py](../../verify-links.py)
 [CI-CD-Github-Actions](https://github.com/FreeRTOS/CI-CD-Github-Actions)
+# Test that it will find this url
+https://www.google.com
+# Test that it will find this url and drop the slash
+https://www.google.com/
+# Test that it will find this url by dropping the coma
+https://www.google.com,
diff --git a/link-verifier/verify-links.py b/link-verifier/verify-links.py
@@ -14,7 +14,7 @@
 import traceback
 from collections import defaultdict
 
-MARKDOWN_SEARCH_TERM = r'\.md$'
+MARKDOWN_SEARCH_TERM = r"\.md$"
 # Regex to find a URL
 URL_SEARCH_TERM = r'(\b(https?)://[^\s\)\]\\"<>]+[^\s\)\.\]\\"<>])'
 HTTP_URL_SEARCH_TERM = r'https?://'
@@ -151,7 +151,11 @@ def identify_broken_links(self, files, verbose):
                 cprint(f'\t{link}','green')
 
         for link in self.external_links:
-            is_broken, status_code = test_url(link)
+            # Remove the trailing slash or trailing coma
+            if(link[-1] == "/" or link[-1] == ","):
+                is_broken, status_code = test_url(link[:-1])
+            else:
+                is_broken, status_code = test_url(link)
             if is_broken:
                 self.broken_links.append(link)
                 file_printed = self.print_filename(files[self.name], file_printed)
@@ -166,7 +170,7 @@ def parse_file(html_file):
     return HtmlFile(html_file)
 
 def html_name_from_markdown(filename):
-    md_pattern = re.compile('\.md', re.IGNORECASE)
+    md_pattern = re.compile("\.md$", re.IGNORECASE)
     return md_pattern.sub('.html', filename)
 
 def create_html(markdown_file):
@@ -254,7 +258,10 @@ def fetch_issues(repo, issue_type, limit):
         if process.returncode == 0:
             key = issue_type + 's'
             for issue in process.stdout.split():
-                main_repo_list[repo][key].add(int(issue))
+                if(issue.isnumeric()):
+                    main_repo_list[repo][key].add(int(issue))
+                else:
+                    raise TypeError(f"Attempted to cast {issue} as an int. Error fetching GitHub Issues")
         return 0
     else:
         use_gh_cache = False
@@ -324,7 +331,6 @@ def main():
     if args.verbose:
         print("Using User-Agent: {}".format(http_headers['User-Agent']))
 
-
     # If any explicit files are passed, add them to md_file_list.
     if args.files is not None:
         md_file_list = args.files
@@ -347,7 +353,7 @@ def main():
                 if any(file.endswith(file_type) for file_type in args.include_files):
                     f_path = os.path.join(root, file)
                     if args.verbose:
-                        print("Processing File: {}".format(f_path))
+                        print("\nProcessing File: {}".format(f_path))
                     with open(f_path, 'r', encoding="utf8", errors='ignore') as f:
                         # errors='ignore' argument Suppresses UnicodeDecodeError
                         # when reading invalid UTF-8 characters.
@@ -393,7 +399,10 @@ def main():
                 os.remove(f)
 
     for link in link_set:
-        is_broken, status_code = test_url(link)
+        if ( ( link[-1] == "/" ) or ( link[-1] == "," ) ):
+            is_broken, status_code = test_url(link[:-1])
+        else:
+            is_broken, status_code = test_url(link)
         if is_broken:
             broken_links.append(link)
             print("FILES:", link_to_files[link])