From 9c43ac7d9154276f246ca83312fe9a1c3f4f8747 Mon Sep 17 00:00:00 2001 From: Kathleen Hogan <56691584+kathleen-hogan-slalom@users.noreply.github.com> Date: Fri, 22 Mar 2024 12:40:50 -0700 Subject: [PATCH] Feature/secureli-435: Implement custom PII scan (#496) secureli-435 This PR implements a new, custom PII scan. This scan occurs separately from our existing scans which utilize pre-commit hooks external to the secureli codebase. ## Changes * Creates new PII Scanner and adds the scan to the existing Scan Action ## Testing * Unit tests updated * Screenshot of manual test below: Screenshot 2024-03-22 at 9 49 55 AM ## Clean Code Checklist - [x] Meets acceptance criteria for issue - [x] New logic is covered with automated tests - [x] Appropriate exception handling added - [ ] Thoughtful logging included - [x] Documentation is updated - [ ] Follow-up work is documented in TODOs - [ ] TODOs have a ticket associated with them - [x] No commented-out code included --------- Co-authored-by: Kathleen Hogan --- CONTRIBUTING.md | 4 + README.md | 18 ++ pyproject.toml | 2 +- scripts/secureli-deployment.sh | 2 +- secureli/actions/action.py | 24 +- secureli/actions/scan.py | 31 ++- secureli/container.py | 19 +- .../modules/core/core_services/scanner.py | 29 +-- secureli/modules/pii_scanner/pii_scanner.py | 214 ++++++++++++++++++ secureli/modules/shared/consts/pii.py | 51 +++++ secureli/modules/shared/models/scan.py | 8 + secureli/modules/shared/utilities.py | 23 +- secureli/repositories/repo_files.py | 37 ++- tests/actions/test_action.py | 56 ++--- tests/actions/test_initializer_action.py | 10 +- tests/actions/test_scan_action.py | 108 ++++++--- tests/modules/core/test_scanner_service.py | 22 +- .../pii_scanner/test_pii_scanner_service.py | 129 +++++++++++ .../modules/shared/utilities/test_git_meta.py | 12 +- .../test_repo_files_repository.py | 24 ++ 20 files changed, 701 insertions(+), 122 deletions(-) create mode 100644 secureli/modules/pii_scanner/pii_scanner.py create mode 100644 secureli/modules/shared/consts/pii.py create mode 100644 tests/modules/pii_scanner/test_pii_scanner_service.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ca8fa0f6..0de4319c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -237,6 +237,10 @@ Services do not leverage 3rd Party or External Dependencies directly, not even t Unit tests of Services are done with mock services and abstractions. +### Scanning Services + +Scanning is largely done by way of pre-commit hooks as configured in `.pre-commit-config.yaml`. However, we do also implement our own custom scans in separate Scanner Services, e.g., the PII scan. The results of these multiple scans are then merged together at the Action layer for output. + ## Abstractions Abstractions are mini-services that are meant to encapsulate a 3rd-party dependency. This allows us to mock interfaces we don’t own easier than leveraging duck typing and magic mocks while testing our own logic. This also allows us to swap out the underlying dependency with less risk of disruption to our entire application. diff --git a/README.md b/README.md index 1a013c74..8f4e366f 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,24 @@ All you need to do is run: Running `secureli init` will allow seCureLI to detect the languages in your repo, install pre-commit, install all the appropriate pre-commit hooks for your local repo, run a scan for secrets in your local repo, and update the installed hooks. +## Scan + +To manually trigger a scan, run: + +```bash +% secureli scan +``` + +This will run through all hooks and custom scans, unless a `--specific-test` option is used. The default is to scan staged files only. To scan all files instead, use the `--mode all-files` option. + +### PII Scan + +seCureLI utilizes its own PII scan, rather than using an existing pre-commit hook. To exclude a line from being flagged by the PII scanner, you can use a `disable-pii-scan` marker in a comment to disable the scan for that line. + +``` +test_var = "some dummy data I don't want scanned" # disable-pii-scan +``` + # Upgrade ## Upgrading seCureLI via Homebrew diff --git a/pyproject.toml b/pyproject.toml index da9d65a6..07757e3b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "poetry.core.masonry.api" name = "secureli" version = "0.31.0" description = "Secure Project Manager" -authors = ["Caleb Tonn "] +authors = ["Caleb Tonn "] # disable-pii-scan license = "Apache-2.0" readme = "README.md" diff --git a/scripts/secureli-deployment.sh b/scripts/secureli-deployment.sh index 9af7969c..1fb1b6a8 100755 --- a/scripts/secureli-deployment.sh +++ b/scripts/secureli-deployment.sh @@ -13,7 +13,7 @@ else echo "Pulling down the most recent published secureli release v${secureliVersion}" gh release download v$secureliVersion export secureliSha256=$(sha256sum ./secureli-${secureliVersion}.tar.gz | awk '{print $1}') - git config --global user.email "secureli-automation@slalom.com" + git config --global user.email "secureli-automation@slalom.com" # disable-pii-scan git config --global user.name "Secureli Automation" python ./scripts/get-secureli-dependencies.py cd homebrew-secureli diff --git a/secureli/actions/action.py b/secureli/actions/action.py index 65526ec2..e4c7128b 100644 --- a/secureli/actions/action.py +++ b/secureli/actions/action.py @@ -9,7 +9,7 @@ from secureli.repositories import secureli_config from secureli.repositories.repo_settings import SecureliRepository, TelemetrySettings from secureli.modules.language_analyzer import language_analyzer, language_support -from secureli.modules.core.core_services.scanner import ScannerService +from secureli.modules.core.core_services.scanner import HooksScannerService from secureli.modules.core.core_services.updater import UpdaterService from secureli.modules.shared.utilities import format_sentence_list @@ -27,7 +27,7 @@ def __init__( echo: EchoAbstraction, language_analyzer: language_analyzer.LanguageAnalyzerService, language_support: language_support.LanguageSupportService, - scanner: ScannerService, + hooks_scanner: HooksScannerService, secureli_config: secureli_config.SecureliConfigRepository, settings: SecureliRepository, updater: UpdaterService, @@ -35,7 +35,7 @@ def __init__( self.echo = echo self.language_analyzer = language_analyzer self.language_support = language_support - self.scanner = scanner + self.hooks_scanner = hooks_scanner self.secureli_config = secureli_config self.settings = settings self.updater = updater @@ -76,10 +76,8 @@ def verify_install( outcome=update_config.outcome, ) - pre_commit_config_location = ( - self.action_deps.scanner.pre_commit.get_preferred_pre_commit_config_path( - folder_path - ) + pre_commit_config_location = self.action_deps.hooks_scanner.pre_commit.get_preferred_pre_commit_config_path( + folder_path ) if not pre_commit_config_location.exists(): update_result: VerifyResult = ( @@ -289,7 +287,7 @@ def _run_post_install_scan( ) self.action_deps.echo.print(f"running {secret_test_id}.") - scan_result = self.action_deps.scanner.scan_repo( + scan_result = self.action_deps.hooks_scanner.scan_repo( folder_path, ScanMode.ALL_FILES, specific_test=secret_test_id, @@ -426,8 +424,10 @@ def _update_secureli_pre_commit_config_location( ) if response: try: - new_file_path = self.action_deps.scanner.pre_commit.migrate_config_file( - folder_path + new_file_path = ( + self.action_deps.hooks_scanner.pre_commit.migrate_config_file( + folder_path + ) ) return VerifyResult( outcome=VerifyOutcome.UPDATE_SUCCEEDED, file_path=new_file_path @@ -436,8 +436,8 @@ def _update_secureli_pre_commit_config_location( return VerifyResult(outcome=VerifyOutcome.UPDATE_FAILED) else: self.action_deps.echo.warning(".pre-commit-config.yaml migration declined") - deprecated_location = self.action_deps.scanner.get_pre_commit_config_path( - folder_path + deprecated_location = ( + self.action_deps.hooks_scanner.get_pre_commit_config_path(folder_path) ) return VerifyResult( outcome=VerifyOutcome.UPDATE_CANCELED, file_path=deprecated_location diff --git a/secureli/actions/scan.py b/secureli/actions/scan.py index 4d8e2e32..f12fd66e 100644 --- a/secureli/actions/scan.py +++ b/secureli/actions/scan.py @@ -14,8 +14,9 @@ from secureli.modules.shared.models.publish_results import PublishResultsOption from secureli.modules.shared.models.result import Result from secureli.modules.observability.observability_services.logging import LoggingService -from secureli.modules.core.core_services.scanner import ScannerService -from secureli.modules.shared.models.scan import ScanMode +from secureli.modules.core.core_services.scanner import HooksScannerService +from secureli.modules.pii_scanner.pii_scanner import PiiScannerService +from secureli.modules.shared.models.scan import ScanMode, ScanResult from secureli.settings import Settings from secureli.modules.shared import utilities @@ -36,11 +37,13 @@ def __init__( action_deps: action.ActionDependencies, echo: EchoAbstraction, logging: LoggingService, - scanner: ScannerService, + hooks_scanner: HooksScannerService, + pii_scanner: PiiScannerService, git_repo: GitRepo, ): super().__init__(action_deps) - self.scanner = scanner + self.hooks_scanner = hooks_scanner + self.pii_scanner = pii_scanner self.echo = echo self.logging = logging self.git_repo = git_repo @@ -52,10 +55,12 @@ def _check_secureli_hook_updates(self, folder_path: Path) -> VerifyResult: :param folder_path: The folder path containing the .secureli/ folder """ - self.action_deps.echo.print("Checking for pre-commit hook updates...") - pre_commit_config = self.scanner.pre_commit.get_pre_commit_config(folder_path) + self.action_deps.echo.info("Checking for pre-commit hook updates...") + pre_commit_config = self.hooks_scanner.pre_commit.get_pre_commit_config( + folder_path + ) - repos_to_update = self.scanner.pre_commit.check_for_hook_updates( + repos_to_update = self.hooks_scanner.pre_commit.check_for_hook_updates( pre_commit_config ) @@ -156,10 +161,20 @@ def scan_repo( if verify_result.outcome in self.halting_outcomes: return - scan_result = self.scanner.scan_repo( + # Execute PII scan (unless `specific_test` is provided, in which case it will be for a hook below) + pii_scan_result: ScanResult | None = None + if not specific_test: + pii_scan_result = self.pii_scanner.scan_repo( + folder_path, scan_mode, files=files + ) + + # Execute hooks + hooks_scan_result = self.hooks_scanner.scan_repo( folder_path, scan_mode, specific_test, files=files ) + scan_result = utilities.merge_scan_results([pii_scan_result, hooks_scan_result]) + details = scan_result.output or "Unknown output during scan" self.echo.print(details) diff --git a/secureli/container.py b/secureli/container.py index 337bb559..38d5b1da 100644 --- a/secureli/container.py +++ b/secureli/container.py @@ -15,8 +15,9 @@ from secureli.modules.shared.resources import read_resource from secureli.modules import language_analyzer from secureli.modules.observability.observability_services.logging import LoggingService -from secureli.modules.core.core_services.scanner import ScannerService +from secureli.modules.core.core_services.scanner import HooksScannerService from secureli.modules.core.core_services.updater import UpdaterService +from secureli.modules.pii_scanner.pii_scanner import PiiScannerService from secureli.modules.secureli_ignore import SecureliIgnoreService from secureli.settings import Settings @@ -130,11 +131,18 @@ class Container(containers.DeclarativeContainer): ) """The service that scans the repository using pre-commit configuration""" - scanner_service = providers.Factory( - ScannerService, + hooks_scanner_service = providers.Factory( + HooksScannerService, pre_commit=pre_commit_abstraction, ) + """The service that scans the repository for potential PII""" + pii_scanner_service = providers.Factory( + PiiScannerService, + repo_files=repo_files_repository, + echo=echo, + ) + updater_service = providers.Factory( UpdaterService, pre_commit=pre_commit_abstraction, @@ -148,7 +156,7 @@ class Container(containers.DeclarativeContainer): echo=echo, language_analyzer=language_analyzer_service, language_support=language_support_service, - scanner=scanner_service, + hooks_scanner=hooks_scanner_service, secureli_config=secureli_config_repository, settings=settings_repository, updater=updater_service, @@ -175,7 +183,8 @@ class Container(containers.DeclarativeContainer): action_deps=action_deps, echo=echo, logging=logging_service, - scanner=scanner_service, + hooks_scanner=hooks_scanner_service, + pii_scanner=pii_scanner_service, git_repo=git_repo, ) diff --git a/secureli/modules/core/core_services/scanner.py b/secureli/modules/core/core_services/scanner.py index ab9e6a0b..c706207e 100644 --- a/secureli/modules/core/core_services/scanner.py +++ b/secureli/modules/core/core_services/scanner.py @@ -2,11 +2,10 @@ from typing import Optional from pathlib import Path -import pydantic import re +import secureli.modules.shared.models.scan as scan from secureli.modules.shared.abstractions.pre_commit import PreCommitAbstraction -from secureli.modules.shared.models.scan import ScanFailure, ScanMode, ScanResult from secureli.repositories.repo_settings import PreCommitSettings @@ -18,15 +17,7 @@ class OutputParseErrors(str, Enum): REPO_NOT_FOUND = "repo-not-found" -class ScanOuput(pydantic.BaseModel): - """ - Represents the parsed output from a scan - """ - - failures: list[ScanFailure] - - -class ScannerService: +class HooksScannerService: """ Scans the repo according to the repo's seCureLI config """ @@ -37,10 +28,10 @@ def __init__(self, pre_commit: PreCommitAbstraction): def scan_repo( self, folder_path: Path, - scan_mode: ScanMode, + scan_mode: scan.ScanMode, specific_test: Optional[str] = None, files: Optional[str] = None, - ) -> ScanResult: + ) -> scan.ScanResult: """ Scans the repo according to the repo's seCureLI config :param scan_mode: Whether to scan the staged files (i.e., the files about to be @@ -49,7 +40,7 @@ def scan_repo( If None, run all hooks. :return: A ScanResult object containing whether we succeeded and any error """ - all_files = True if scan_mode == ScanMode.ALL_FILES else False + all_files = True if scan_mode == scan.ScanMode.ALL_FILES else False execute_result = self.pre_commit.execute_hooks( folder_path, all_files, hook_id=specific_test, files=files ) @@ -57,19 +48,19 @@ def scan_repo( folder_path, output=execute_result.output ) - return ScanResult( + return scan.ScanResult( successful=execute_result.successful, output=execute_result.output, failures=parsed_output.failures, ) - def _parse_scan_ouput(self, folder_path: Path, output: str = "") -> ScanOuput: + def _parse_scan_ouput(self, folder_path: Path, output: str = "") -> scan.ScanOutput: """ Parses the output from a scan and returns a list of Failure objects representing any hook rule failures during a scan. :param folder_path: folder containing .secureli folder, usually repository root :param output: Raw output from a scan. - :return: ScanOuput object representing a list of hook rule Failure objects. + :return: ScanOutput object representing a list of hook rule Failure objects. """ failures = [] failure_indexes = [] @@ -100,9 +91,9 @@ def _parse_scan_ouput(self, folder_path: Path, output: str = "") -> ScanOuput: files = self._find_file_names(failure_output_list=failure_output_list) for file in files: - failures.append(ScanFailure(id=id, file=file, repo=repo)) + failures.append(scan.ScanFailure(id=id, file=file, repo=repo)) - return ScanOuput(failures=failures) + return scan.ScanOutput(failures=failures) def _get_single_failure_output( self, failure_start: int, output_by_line: list[str] diff --git a/secureli/modules/pii_scanner/pii_scanner.py b/secureli/modules/pii_scanner/pii_scanner.py new file mode 100644 index 00000000..b8c7a77e --- /dev/null +++ b/secureli/modules/pii_scanner/pii_scanner.py @@ -0,0 +1,214 @@ +from secureli.modules.shared.consts.pii import ( + DISABLE_PII_MARKER, + Format, + IGNORED_EXTENSIONS, + PII_CHECK, + RESULT_FORMAT, + SECURELI_GITHUB, +) +import os +import re +from typing import Optional +from pathlib import Path +import pydantic + +import secureli.modules.shared.models.scan as scan +from secureli.modules.shared.abstractions.echo import EchoAbstraction +from secureli.repositories.repo_files import RepoFilesRepository + + +class PiiResult(pydantic.BaseModel): + """ + An individual result of potential PII found + """ + + line_num: int + pii_key: str + + +class PiiScannerService: + """ + Scans the repo for potential PII + """ + + def __init__( + self, + repo_files: RepoFilesRepository, + echo: EchoAbstraction, + ): + self.repo_files = repo_files + self.echo = echo + + def scan_repo( + self, + folder_path: Path, + scan_mode: scan.ScanMode, + files: Optional[list[str]] = None, + ) -> scan.ScanResult: + """ + Scans the repo for potential PII + :param folder_path: The folder path to initialize the repo for + :param scan_mode: Whether to scan the staged files (i.e., the files about to be + committed) or the entire repository + :param files: A specified list of files to scan + :return: A ScanResult object with details of whether the scan succeeded and, if not, details of the failures + """ + + file_paths = self._get_files_list( + folder_path=folder_path, scan_mode=scan_mode, files=files + ) + current_line_num = 0 + pii_found: dict[str, list[PiiResult]] = {} + pii_found_files = set() + + for file_path in file_paths: + file_name = str(file_path) + try: + with open(file_path) as file: + for line in file: + current_line_num += 1 + for pii_key, pii_regex in PII_CHECK.items(): + if ( + re.search(pii_regex, line.lower()) + and not DISABLE_PII_MARKER in line + ): + if not file_name in pii_found: + pii_found[file_name] = [] + pii_found[file_name].append( + { + "line_num": current_line_num, + "pii_key": pii_key, + } + ) + pii_found_files.add(file_name) + current_line_num = 0 + + except Exception as e: + self.echo.print(f"Error PII scanning {file_name}: {e}") + scan_failures = self._generate_scan_failures(pii_found_files) + output = self._generate_scan_output(pii_found, not pii_found) + + return scan.ScanResult( + successful=not pii_found, + output=output, + failures=scan_failures, + ) + + def _file_extension_excluded(self, filename) -> bool: + _, file_extension = os.path.splitext(filename) + if file_extension in IGNORED_EXTENSIONS: + return True + + return False + + def _get_files_list( + self, + folder_path: Path, + scan_mode: scan.ScanMode, + files: Optional[list[str]] = None, + ) -> list[Path]: + """ + Gets the list of files to scan based on ScanMode and, if applicable, files provided in arguments + Note: Files cannot be specified for the `all-files` ScanMode. Also, if a provided file is not staged, + it will not be scanned + :param folder_path: The folder path to initialize the repo for + :param scan_mode: Whether to scan the staged files (i.e., the files about to be + committed) or the entire repository + :param files: A specified list of files to scan + :return: List of file names to be scanned + """ + file_paths: list[Path] = [] + + if scan_mode == scan.ScanMode.STAGED_ONLY: + file_paths = self.repo_files.list_staged_files(folder_path) + if files: + file_paths = list(filter(lambda file: file in file_paths, files)) + + if scan_mode == scan.ScanMode.ALL_FILES: + file_paths = self.repo_files.list_repo_files(folder_path) + + return list( + filter(lambda file: not self._file_extension_excluded(file), file_paths) + ) + + def _generate_scan_failures( + self, pii_found_files: set[str] + ) -> list[scan.ScanFailure]: + """ + Generates a list of ScanFailures for each file in which potential PII was found + :param pii_found_files: The set of files in which potential PII was found + :return: List of ScanFailures + """ + failures = [] + + for pii_found_file in pii_found_files: + failures.append( + scan.ScanFailure( + id="pii_scan", file=pii_found_file, repo=SECURELI_GITHUB + ) + ) + return failures + + def _generate_initial_output(self, success: bool) -> str: + """ + Generates the initial output of the PII scan, indicating whether the scan passed or failed + :param success: Whether the scan passed + :return: A string that will be used at the beginning of the output result + """ + CHECK_STR = "check for PII" + MAX_RESULT_LENGTH = ( + 82 # this aims to align with the results output by pre-commit hooks + ) + + result = ( + self._format_string("Passed", [Format.GREEN_BG]) + " " + if success + else self._format_string("Failed", [Format.RED_BG]) + "\n" + ) + length_of_dots = MAX_RESULT_LENGTH - len(CHECK_STR) - len(result) + final_msg = ( + "\n" + + self._format_string( + "Potential PII found!", [Format.BOLD_WEIGHT, Format.RED_TXT] + ) + if not success + else "" + ) + output = f"{CHECK_STR}{'.' * length_of_dots}{result}{final_msg}" + + return output + + def _generate_scan_output( + self, pii_found: dict[str, list[PiiResult]], success: bool + ) -> str: + """ + Generates the scan output of the PII scan, listing all the areas where potential PII was found + :param pii_found: The breakdown of what potential PII was found, and where + :param success: Whether the scan passed + :return: The final output result + """ + output = self._generate_initial_output(success) + for file, results in pii_found.items(): + output = ( + output + + "\n" + + self._format_string( + f"File: {file}", [Format.BOLD_WEIGHT, Format.PURPLE_TXT] + ) + ) + for result in results: + output = output + f"\n Line {result['line_num']}: {result['pii_key']}" + return output + "\n" + + def _format_string(self, str: str, formats: list[Format]) -> str: + """ + Applies formatting to a string + :param str: The string to format + :param formats: The formatting to apply to the string + :return: The formatted string + """ + + start = "".join(f"{RESULT_FORMAT[format]}" for format in formats) + end = f"{RESULT_FORMAT[Format.DEFAULT]}{RESULT_FORMAT[Format.REG_WEIGHT]}" + + return f"{start}{str}{end}" diff --git a/secureli/modules/shared/consts/pii.py b/secureli/modules/shared/consts/pii.py new file mode 100644 index 00000000..be3fffe7 --- /dev/null +++ b/secureli/modules/shared/consts/pii.py @@ -0,0 +1,51 @@ +from enum import Enum + +PII_CHECK = { + "Email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b", + "Social security number": r"(?!000|666|333)0*(?:[0-6][0-9][0-9]|[0-7][0-6][0-9]|[0-7][0-7][0-2])[- ](?!00)[0-9]{2}[- ](?!0000)[0-9]{4}", + "Phone number": r"[\+]?[(]?[0-9]{3}[)]?[-\s\.]?[0-9]{3}[-\s\.]?[0-9]{4,6}", +} + +IGNORED_EXTENSIONS = [ + ".md", + ".lock", + ".png", + ".jpg", + ".jpeg", + ".gif", + ".svg", + ".ico", + ".eot", + ".ttf", + ".woff", + ".css", +] + +SECURELI_GITHUB = "https://github.com/slalombuild/secureli" + + +class Format(str, Enum): + """ + Enum to use for formatting PII results + """ + + PURPLE_TXT = "purple_text" + RED_TXT = "red_text" + RED_BG = "red_background" + GREEN_BG = "green_background" + DEFAULT = "default_format" + BOLD_WEIGHT = "bold_weight" + REG_WEIGHT = "regular_weight" + + +RESULT_FORMAT = { + Format.DEFAULT: "\033[m", + Format.PURPLE_TXT: "\033[35m", + Format.RED_TXT: "\033[31m", + Format.GREEN_BG: "\033[42m", + Format.RED_BG: "\033[41m", + Format.BOLD_WEIGHT: "\033[1m", + Format.REG_WEIGHT: "\033[22m", +} + +DISABLE_PII_MARKER = "disable-pii-scan" diff --git a/secureli/modules/shared/models/scan.py b/secureli/modules/shared/models/scan.py index 21edcc92..d86e785a 100644 --- a/secureli/modules/shared/models/scan.py +++ b/secureli/modules/shared/models/scan.py @@ -23,6 +23,14 @@ class ScanFailure(pydantic.BaseModel): file: str +class ScanOutput(pydantic.BaseModel): + """ + Represents the parsed output from a scan + """ + + failures: list[ScanFailure] + + class ScanResult(pydantic.BaseModel): """ The results of calling scan_repo diff --git a/secureli/modules/shared/utilities.py b/secureli/modules/shared/utilities.py index 4af705d5..ac04a6d2 100644 --- a/secureli/modules/shared/utilities.py +++ b/secureli/modules/shared/utilities.py @@ -11,7 +11,7 @@ from secureli.modules.observability.consts import logging from secureli.modules.shared.models.publish_results import PublishLogResult from secureli.modules.shared.models.result import Result -from secureli.modules.shared.models.scan import ScanFailure +from secureli.modules.shared.models.scan import ScanFailure, ScanResult from secureli.settings import Settings @@ -153,3 +153,24 @@ def post_log(log_data: str, settings: Settings) -> PublishLogResult: def secureli_version() -> str: """Leverage package resources to determine the current version of secureli""" return version("secureli") + + +def merge_scan_results(results: list[ScanResult]): + """ + Creates a single ScanResult from multiple ScanResults + :param results: The list of ScanResults to merge + :return A single ScanResult + """ + final_successful = True + final_output = "" + final_failures: list[ScanFailure] = [] + + for result in results: + if result: + final_successful = final_successful and result.successful + final_output = final_output + (result.output or "") + "\n" + final_failures = final_failures + result.failures + + return ScanResult( + successful=final_successful, output=final_output, failures=final_failures + ) diff --git a/secureli/repositories/repo_files.py b/secureli/repositories/repo_files.py index 4679f9c5..6423a078 100644 --- a/secureli/repositories/repo_files.py +++ b/secureli/repositories/repo_files.py @@ -1,5 +1,6 @@ from pathlib import Path import re +import subprocess import chardet from secureli.modules.shared.utilities import combine_patterns @@ -37,10 +38,7 @@ def list_repo_files(self, folder_path: Path) -> list[Path]: :raises ValueError: The specified path does not exist or is not a git repo :return: The visible files within the specified repo as a list of Path objects """ - git_path = folder_path / ".git" - - if not git_path.exists() or not git_path.is_dir(): - raise ValueError("The current folder is not a Git repository!") + self._confirm_is_git_repo(folder_path) file_paths = [ f @@ -117,3 +115,34 @@ def load_file(self, file_path: Path) -> str: pass raise ValueError(f"An unknown error occurred loading the file from {file_path}") + + def list_staged_files(self, folder_path: Path) -> list[Path]: + """ + Lists the file paths of all staged files in a repository, or raises ValueError if the provided path + is not a git repo + :param folder_path: The path to a git repo containing files + :raises ValueError: The specified path does not exist or is not a git repo + :return: The list of staged file names + """ + self._confirm_is_git_repo(folder_path) + git_diff_result = ( + subprocess.run( + ["git", "diff", "--staged", "--name-only"], + stdout=subprocess.PIPE, + cwd=folder_path, + ) + .stdout.decode("utf8") + .split("\n") + ) + return git_diff_result[0:-1] + + def _confirm_is_git_repo(self, folder_path: Path): + """ + Confirms a provided path is a git repo, and if not raises a ValueError + :param folder_path: The path to a git repo containing files + :raises ValueError: The specified path does not exist or is not a git repo + """ + git_path = folder_path / ".git" + + if not git_path.exists() or not git_path.is_dir(): + raise ValueError("The current folder is not a Git repository!") diff --git a/tests/actions/test_action.py b/tests/actions/test_action.py index 942e2680..c9c6faf1 100644 --- a/tests/actions/test_action.py +++ b/tests/actions/test_action.py @@ -17,9 +17,9 @@ @pytest.fixture() -def mock_scanner() -> MagicMock: - mock_scanner = MagicMock() - return mock_scanner +def mock_hooks_scanner() -> MagicMock: + mock_hooks_scanner = MagicMock() + return mock_hooks_scanner @pytest.fixture() @@ -33,7 +33,7 @@ def action_deps( mock_echo: MagicMock, mock_language_analyzer: MagicMock, mock_language_support: MagicMock, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, mock_secureli_config: MagicMock, mock_updater: MagicMock, mock_settings: MagicMock, @@ -42,7 +42,7 @@ def action_deps( mock_echo, mock_language_analyzer, mock_language_support, - mock_scanner, + mock_hooks_scanner, mock_secureli_config, mock_settings, mock_updater, @@ -95,7 +95,7 @@ def test_that_initialize_repo_install_flow_selects_both_languages( def test_that_initialize_repo_install_flow_performs_security_analysis( action: Action, mock_language_analyzer: MagicMock, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, ): mock_language_analyzer.analyze.return_value = language.AnalyzeResult( language_proportions={ @@ -107,13 +107,13 @@ def test_that_initialize_repo_install_flow_performs_security_analysis( action.verify_install(test_folder_path, reset=True, always_yes=True, files=None) - mock_scanner.scan_repo.assert_called_once() + mock_hooks_scanner.scan_repo.assert_called_once() def test_that_initialize_repo_install_flow_displays_security_analysis_results( - action: Action, action_deps: MagicMock, mock_scanner: MagicMock + action: Action, action_deps: MagicMock, mock_hooks_scanner: MagicMock ): - mock_scanner.scan_repo.return_value = ScanResult( + mock_hooks_scanner.scan_repo.return_value = ScanResult( successful=False, output="Detect secrets...Failed", failures=[ScanFailure(repo="repo", id="id", file="file")], @@ -126,7 +126,7 @@ def test_that_initialize_repo_install_flow_displays_security_analysis_results( def test_that_initialize_repo_install_flow_skips_security_analysis_if_unavailable( action: Action, mock_language_analyzer: MagicMock, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, mock_language_support: MagicMock, ): mock_language_analyzer.analyze.return_value = language.AnalyzeResult( @@ -142,7 +142,7 @@ def test_that_initialize_repo_install_flow_skips_security_analysis_if_unavailabl action.verify_install(test_folder_path, reset=True, always_yes=True, files=None) - mock_scanner.scan_repo.assert_not_called() + mock_hooks_scanner.scan_repo.assert_not_called() def test_that_initialize_repo_install_flow_warns_about_skipped_files( @@ -413,14 +413,16 @@ def test_that_verify_install_returns_success_result_newly_detected_language_inst def test_that_verify_install_returns_failure_result_without_re_commit_config_file_path( action: Action, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, mock_echo: MagicMock, ): with (patch.object(Path, "exists", return_value=False),): - mock_scanner.pre_commit.get_preferred_pre_commit_config_path.return_value = ( + mock_hooks_scanner.pre_commit.get_preferred_pre_commit_config_path.return_value = ( test_folder_path / ".secureli" / ".pre-commit-config.yaml" ) - mock_scanner.pre_commit.migrate_config_file.side_effect = Exception("ERROR") + mock_hooks_scanner.pre_commit.migrate_config_file.side_effect = Exception( + "ERROR" + ) verify_result = action.verify_install( test_folder_path, reset=False, always_yes=True, files=None ) @@ -432,11 +434,11 @@ def test_that_verify_install_returns_failure_result_without_re_commit_config_fil def test_that_verify_install_continues_after_pre_commit_config_file_moved( action: Action, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, mock_echo: MagicMock, ): with (patch.object(Path, "exists", return_value=False),): - mock_scanner.pre_commit.get_preferred_pre_commit_config_path.return_value = ( + mock_hooks_scanner.pre_commit.get_preferred_pre_commit_config_path.return_value = ( test_folder_path / ".secureli" / ".pre-commit-config.yaml" ) verify_result = action.verify_install( @@ -447,7 +449,7 @@ def test_that_verify_install_continues_after_pre_commit_config_file_moved( def test_that_update_secureli_pre_commit_config_location_moves_file( action: Action, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, mock_echo: MagicMock, ): update_file_location = test_folder_path / ".secureli" / ".pre-commit-config.yaml" @@ -457,16 +459,18 @@ def test_that_update_secureli_pre_commit_config_location_moves_file( mock_echo.print.assert_called_once_with( "seCureLI's .pre-commit-config.yaml is in a deprecated location." ) - mock_scanner.pre_commit.migrate_config_file.assert_called_with(update_file_location) + mock_hooks_scanner.pre_commit.migrate_config_file.assert_called_with( + update_file_location + ) assert update_result.outcome == VerifyOutcome.UPDATE_SUCCEEDED def test_that_update_secureli_pre_commit_config_fails_on_exception( action: Action, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, ): with pytest.raises(Exception): - mock_scanner.pre_commit.get_preferred_pre_commit_config_path.return_value = ( + mock_hooks_scanner.pre_commit.get_preferred_pre_commit_config_path.return_value = ( test_folder_path / ".secureli" / ".pre-commit-config.yaml" ) update_result = action._update_secureli_pre_commit_config_location( @@ -478,7 +482,7 @@ def test_that_update_secureli_pre_commit_config_fails_on_exception( def test_that_update_secureli_pre_commit_config_location_cancels_on_user_response( action: Action, mock_echo: MagicMock, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, ): mock_echo.confirm.return_value = False update_file_location = test_folder_path / ".secureli" / ".pre-commit-config.yaml" @@ -489,7 +493,7 @@ def test_that_update_secureli_pre_commit_config_location_cancels_on_user_respons mock_echo.warning.assert_called_once_with( ".pre-commit-config.yaml migration declined" ) - mock_scanner.pre_commit.migrate_config_file.assert_not_called() + mock_hooks_scanner.pre_commit.migrate_config_file.assert_not_called() assert update_result.outcome == VerifyOutcome.UPDATE_CANCELED @@ -679,7 +683,7 @@ def test_that_post_install_scan_ignores_creating_pre_commit_on_existing_install( def test_that_post_install_scan_scans_repo( - action: Action, mock_scanner: MagicMock, mock_echo: MagicMock + action: Action, mock_hooks_scanner: MagicMock, mock_echo: MagicMock ): action._run_post_install_scan( "test/path", @@ -688,12 +692,12 @@ def test_that_post_install_scan_scans_repo( False, ) - mock_scanner.scan_repo.assert_called_once() + mock_hooks_scanner.scan_repo.assert_called_once() mock_echo.warning.assert_not_called() def test_that_post_install_scan_does_not_scan_repo_when_no_security_hook_id( - action: Action, mock_scanner: MagicMock, mock_echo: MagicMock + action: Action, mock_hooks_scanner: MagicMock, mock_echo: MagicMock ): action._run_post_install_scan( "test/path", @@ -702,7 +706,7 @@ def test_that_post_install_scan_does_not_scan_repo_when_no_security_hook_id( False, ) - mock_scanner.scan_repo.assert_not_called() + mock_hooks_scanner.scan_repo.assert_not_called() mock_echo.warning.assert_called_once_with( "RadLang does not support secrets detection, skipping" ) diff --git a/tests/actions/test_initializer_action.py b/tests/actions/test_initializer_action.py index 00e9e93c..a26fff90 100644 --- a/tests/actions/test_initializer_action.py +++ b/tests/actions/test_initializer_action.py @@ -12,9 +12,9 @@ @pytest.fixture() -def mock_scanner() -> MagicMock: - mock_scanner = MagicMock() - return mock_scanner +def mock_hooks_scanner() -> MagicMock: + mock_hooks_scanner = MagicMock() + return mock_hooks_scanner @pytest.fixture() @@ -28,7 +28,7 @@ def action_deps( mock_echo: MagicMock, mock_language_analyzer: MagicMock, mock_language_support: MagicMock, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, mock_secureli_config: MagicMock, mock_settings: MagicMock, mock_updater: MagicMock, @@ -37,7 +37,7 @@ def action_deps( mock_echo, mock_language_analyzer, mock_language_support, - mock_scanner, + mock_hooks_scanner, mock_secureli_config, mock_settings, mock_updater, diff --git a/tests/actions/test_scan_action.py b/tests/actions/test_scan_action.py index d3cb99f3..8724ada8 100644 --- a/tests/actions/test_scan_action.py +++ b/tests/actions/test_scan_action.py @@ -25,11 +25,11 @@ @pytest.fixture() -def mock_scanner(mock_pre_commit) -> MagicMock: - mock_scanner = MagicMock() - mock_scanner.scan_repo.return_value = ScanResult(successful=True, failures=[]) - mock_scanner.pre_commit = mock_pre_commit - return mock_scanner +def mock_hooks_scanner(mock_pre_commit) -> MagicMock: + mock_hooks_scanner = MagicMock() + mock_hooks_scanner.scan_repo.return_value = ScanResult(successful=True, failures=[]) + mock_hooks_scanner.pre_commit = mock_pre_commit + return mock_hooks_scanner @pytest.fixture() @@ -56,6 +56,13 @@ def mock_pre_commit() -> MagicMock: return mock_pre_commit +@pytest.fixture() +def mock_pii_scanner() -> MagicMock: + mock_pii_scanner = MagicMock() + mock_pii_scanner.scan_repo.return_value = ScanResult(successful=True, failures=[]) + return mock_pii_scanner + + @pytest.fixture() def mock_updater() -> MagicMock: mock_updater = MagicMock() @@ -103,7 +110,7 @@ def action_deps( mock_echo: MagicMock, mock_language_analyzer: MagicMock, mock_language_support: MagicMock, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, mock_secureli_config: MagicMock, mock_settings_repository: MagicMock, mock_updater: MagicMock, @@ -112,7 +119,7 @@ def action_deps( mock_echo, mock_language_analyzer, mock_language_support, - mock_scanner, + mock_hooks_scanner, mock_secureli_config, mock_settings_repository, mock_updater, @@ -123,13 +130,15 @@ def action_deps( def scan_action( action_deps: ActionDependencies, mock_logging_service: MagicMock, + mock_pii_scanner: MagicMock, mock_git_repo: MagicMock, ) -> ScanAction: return ScanAction( action_deps=action_deps, echo=action_deps.echo, logging=mock_logging_service, - scanner=action_deps.scanner, + hooks_scanner=action_deps.hooks_scanner, + pii_scanner=mock_pii_scanner, git_repo=mock_git_repo, ) @@ -142,7 +151,8 @@ def mock_post_log(mocker: MockerFixture) -> MagicMock: @mock.patch.dict(os.environ, {"API_KEY": "", "API_ENDPOINT": ""}, clear=True) def test_that_scan_repo_errors_if_not_successful( scan_action: ScanAction, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, + mock_pii_scanner: MagicMock, mock_secureli_config: MagicMock, mock_language_analyzer: MagicMock, ): @@ -151,7 +161,10 @@ def test_that_scan_repo_errors_if_not_successful( language_proportions={f"{mock_language}": 1.0}, skipped_files=[], ) - mock_scanner.scan_repo.return_value = ScanResult( + mock_pii_scanner.scan_repo.return_value = ScanResult( + successful=False, output="So much PII", failures=[] + ) + mock_hooks_scanner.scan_repo.return_value = ScanResult( successful=False, output="Bad Error", failures=[] ) mock_secureli_config.load.return_value = SecureliConfig( @@ -169,7 +182,7 @@ def test_that_scan_repo_scans_if_installed( scan_action: ScanAction, mock_secureli_config: MagicMock, mock_language_support: MagicMock, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, mock_language_analyzer: MagicMock, ): mock_language_analyzer.analyze.return_value = AnalyzeResult( @@ -181,9 +194,45 @@ def test_that_scan_repo_scans_if_installed( ) mock_language_support.version_for_language.return_value = "abc123" - scan_action.scan_repo(test_folder_path, ScanMode.STAGED_ONLY, False) + scan_action.scan_repo( + test_folder_path, ScanMode.STAGED_ONLY, False, None, "detect-secrets" + ) - mock_scanner.scan_repo.assert_called_once() + mock_hooks_scanner.scan_repo.assert_called_once() + + +@mock.patch.dict(os.environ, {"API_KEY": "", "API_ENDPOINT": ""}, clear=True) +def test_that_scan_repo_conducts_all_scans_and_merges_results( + scan_action: ScanAction, + mock_secureli_config: MagicMock, + mock_language_support: MagicMock, + mock_hooks_scanner: MagicMock, + mock_pii_scanner: MagicMock, + mock_language_analyzer: MagicMock, + mock_echo: MagicMock, +): + mock_language_analyzer.analyze.return_value = AnalyzeResult( + language_proportions={"RadLang": 1.0}, + skipped_files=[], + ) + mock_secureli_config.load.return_value = SecureliConfig( + languages=["RadLang"], version_installed="abc123" + ) + mock_language_support.version_for_language.return_value = "abc123" + mock_failure_1 = "Hooks scan failure" + mock_failure_2 = "PII scan failure" + mock_hooks_scanner.scan_repo.return_value = ScanResult( + successful=False, failures=[], output=mock_failure_1 + ) + mock_pii_scanner.scan_repo.return_value = ScanResult( + successful=False, failures=[], output=mock_failure_2 + ) + + with pytest.raises(SystemExit): + scan_action.scan_repo(test_folder_path, ScanMode.STAGED_ONLY, False) + mock_hooks_scanner.scan_repo.assert_called_once() + mock_pii_scanner.scan_repo.assert_called_once() + mock_echo.print.assert_called_once_with(f"\n{mock_failure_1}\n{mock_failure_2}") @mock.patch.dict(os.environ, {"API_KEY": "", "API_ENDPOINT": ""}, clear=True) @@ -191,7 +240,8 @@ def test_that_scan_repo_continue_scan_if_upgrade_canceled( scan_action: ScanAction, mock_secureli_config: MagicMock, mock_language_support: MagicMock, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, + mock_pii_scanner: MagicMock, mock_echo: MagicMock, mock_language_analyzer: MagicMock, ): @@ -207,13 +257,15 @@ def test_that_scan_repo_continue_scan_if_upgrade_canceled( scan_action.scan_repo(test_folder_path, ScanMode.STAGED_ONLY, False) - mock_scanner.scan_repo.assert_called_once() + mock_hooks_scanner.scan_repo.assert_called_once() + mock_pii_scanner.scan_repo.assert_called_once() @mock.patch.dict(os.environ, {"API_KEY": "", "API_ENDPOINT": ""}, clear=True) def test_that_scan_repo_does_not_scan_if_not_installed( scan_action: ScanAction, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, + mock_pii_scanner: MagicMock, mock_secureli_config: MagicMock, mock_echo: MagicMock, mock_language_analyzer: MagicMock, @@ -224,45 +276,47 @@ def test_that_scan_repo_does_not_scan_if_not_installed( mock_echo.confirm.return_value = False scan_action.scan_repo(test_folder_path, ScanMode.STAGED_ONLY, False) - mock_scanner.scan_repo.assert_not_called() + + mock_hooks_scanner.scan_repo.assert_not_called() + mock_pii_scanner.scan_repo.assert_not_called() def test_that_scan_checks_for_updates( scan_action: ScanAction, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, mock_secureli_config: MagicMock, mock_pass_install_verification: MagicMock, ): scan_action.scan_repo(test_folder_path, ScanMode.STAGED_ONLY, always_yes=True) - mock_scanner.pre_commit.check_for_hook_updates.assert_called_once() + mock_hooks_scanner.pre_commit.check_for_hook_updates.assert_called_once() def test_that_scan_only_checks_for_updates_periodically( scan_action: ScanAction, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, mock_get_time_near_epoch: MagicMock, mock_secureli_config: MagicMock, ): mock_secureli_config.load.return_value = SecureliConfig() scan_action.scan_repo(test_folder_path, ScanMode.STAGED_ONLY, always_yes=True) - mock_scanner.pre_commit.check_for_hook_updates.assert_not_called() + mock_hooks_scanner.pre_commit.check_for_hook_updates.assert_not_called() def test_that_scan_update_check_uses_pre_commit_config( scan_action: ScanAction, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, mock_secureli_config: MagicMock, ): mock_secureli_config.load.return_value = SecureliConfig() scan_action.scan_repo(test_folder_path, ScanMode.STAGED_ONLY, always_yes=True) - mock_scanner.pre_commit.get_pre_commit_config.assert_called_once() + mock_hooks_scanner.pre_commit.get_pre_commit_config.assert_called_once() # Test that _check_secureli_hook_updates returns UP_TO_DATE if no hooks need updating def test_scan_update_check_return_value_when_up_to_date( scan_action: ScanAction, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, mock_secureli_config: MagicMock, ): mock_secureli_config.load.return_value = SecureliConfig() @@ -273,11 +327,11 @@ def test_scan_update_check_return_value_when_up_to_date( # Test that _check_secureli_hook_updates returns UPDATE_CANCELED if hooks need updating def test_scan_update_check_return_value_when_not_up_to_date( scan_action: ScanAction, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, mock_secureli_config: MagicMock, ): mock_secureli_config.load.return_value = SecureliConfig() - mock_scanner.pre_commit.check_for_hook_updates.return_value = { + mock_hooks_scanner.pre_commit.check_for_hook_updates.return_value = { "http://example-repo.com/": RevisionPair(oldRev="old-rev", newRev="new-rev") } result = scan_action._check_secureli_hook_updates(test_folder_path) @@ -287,7 +341,7 @@ def test_scan_update_check_return_value_when_not_up_to_date( # Validate that scan_repo persists changes to the .secureli.yaml file after checking for hook updates def test_that_scan_update_check_updates_last_check_time( scan_action: ScanAction, - mock_scanner: MagicMock, + mock_hooks_scanner: MagicMock, mock_get_time_far_from_epoch: MagicMock, mock_secureli_config: MagicMock, mock_pass_install_verification: MagicMock, diff --git a/tests/modules/core/test_scanner_service.py b/tests/modules/core/test_scanner_service.py index d49d1045..820bb9f1 100644 --- a/tests/modules/core/test_scanner_service.py +++ b/tests/modules/core/test_scanner_service.py @@ -6,7 +6,7 @@ from secureli.modules.shared.models.scan import ScanMode from secureli.repositories import repo_settings from secureli.modules.core.core_services.scanner import ( - ScannerService, + HooksScannerService, OutputParseErrors, ) from pytest_mock import MockerFixture @@ -114,12 +114,12 @@ def mock_config_no_black(mocker: MockerFixture) -> MagicMock: @pytest.fixture() -def scanner_service(mock_pre_commit: MagicMock) -> ScannerService: - return ScannerService(mock_pre_commit) +def scanner_service(mock_pre_commit: MagicMock) -> HooksScannerService: + return HooksScannerService(mock_pre_commit) def test_that_scanner_service_scans_repositories_with_pre_commit( - scanner_service: ScannerService, + scanner_service: HooksScannerService, mock_pre_commit: MagicMock, ): scan_result = scanner_service.scan_repo(test_folder_path, ScanMode.ALL_FILES) @@ -129,7 +129,7 @@ def test_that_scanner_service_scans_repositories_with_pre_commit( def test_that_scanner_service_parses_failures( - scanner_service: ScannerService, + scanner_service: HooksScannerService, mock_pre_commit: MagicMock, mock_scan_output_single_failure: MagicMock, mock_config_all_repos: MagicMock, @@ -143,7 +143,7 @@ def test_that_scanner_service_parses_failures( def test_that_scanner_service_parses_multiple_failures( - scanner_service: ScannerService, + scanner_service: HooksScannerService, mock_pre_commit: MagicMock, mock_scan_output_double_failure: MagicMock, mock_config_all_repos: MagicMock, @@ -157,7 +157,7 @@ def test_that_scanner_service_parses_multiple_failures( def test_that_scanner_service_parses_when_no_failures( - scanner_service: ScannerService, + scanner_service: HooksScannerService, mock_pre_commit: MagicMock, mock_scan_output_no_failure: MagicMock, mock_config_all_repos: MagicMock, @@ -171,7 +171,7 @@ def test_that_scanner_service_parses_when_no_failures( def test_that_scanner_service_handles_error_in_missing_repo( - scanner_service: ScannerService, + scanner_service: HooksScannerService, mock_pre_commit: MagicMock, mock_scan_output_double_failure: MagicMock, mock_config_no_black: MagicMock, @@ -184,7 +184,9 @@ def test_that_scanner_service_handles_error_in_missing_repo( assert scan_result.failures[1].repo == OutputParseErrors.REPO_NOT_FOUND -def test_that_find_repo_from_id_finds_matching_hooks(scanner_service: ScannerService): +def test_that_find_repo_from_id_finds_matching_hooks( + scanner_service: HooksScannerService, +): mock_hook_id = "find_secrets" expected_repo = "mock_repo" result = scanner_service._find_repo_from_id( @@ -207,7 +209,7 @@ def test_that_find_repo_from_id_finds_matching_hooks(scanner_service: ScannerSer def test_that_find_repo_from_id_does_not_have_matching_hook_id( - scanner_service: ScannerService, + scanner_service: HooksScannerService, ): result = scanner_service._find_repo_from_id( "test-hook-id", diff --git a/tests/modules/pii_scanner/test_pii_scanner_service.py b/tests/modules/pii_scanner/test_pii_scanner_service.py new file mode 100644 index 00000000..36761cb7 --- /dev/null +++ b/tests/modules/pii_scanner/test_pii_scanner_service.py @@ -0,0 +1,129 @@ +import pytest +from pytest_mock import MockerFixture +from unittest.mock import MagicMock, Mock +import builtins +import contextlib, io +from pathlib import Path +from secureli.modules.pii_scanner.pii_scanner import PiiScannerService +from secureli.modules.shared.models.scan import ScanMode + + +test_folder_path = Path(".") + + +@pytest.fixture() +def mock_repo_files_repository() -> MagicMock: + mock_repo_files_repository = MagicMock() + mock_repo_files_repository.list_staged_files.return_value = ["fake_file_path"] + mock_repo_files_repository.list_repo_files.return_value = ["fake_file_path"] + return mock_repo_files_repository + + +@pytest.fixture() +def mock_echo() -> MagicMock: + mock_echo = MagicMock() + return mock_echo + + +@pytest.fixture() +def mock_open_fn(mocker: MockerFixture) -> MagicMock: + # The below data wouldn't ACTUALLY count as PII, but using fake PII here would prevent this code + # from being committed (as seCureLi scans itself before commit!) + # Instead, we mock the regex search function to pretend we found a PII match so we can assert the + # scanner's behavior + mock_open = mocker.mock_open( + read_data=""" + fake_email='pantsATpants.com' + fake_phone='phone-num-here' + """ + ) + return mocker.patch("builtins.open", mock_open) + + +# Include the below for any tests where you want PII to be "found" +@pytest.fixture() +def mock_re(mocker: MockerFixture) -> MagicMock: + match_object = mocker.patch("re.Match", lambda *args: True) + return mocker.patch("re.search", match_object) + + +@pytest.fixture() +def pii_scanner_service( + mock_repo_files_repository: MagicMock, mock_echo: MagicMock +) -> PiiScannerService: + return PiiScannerService(mock_repo_files_repository, mock_echo) + + +def test_that_pii_scanner_service_finds_potential_pii( + pii_scanner_service: PiiScannerService, + mock_repo_files_repository: MagicMock, + mock_open_fn: MagicMock, + mock_re: MagicMock, +): + scan_result = pii_scanner_service.scan_repo(test_folder_path, ScanMode.STAGED_ONLY) + + mock_repo_files_repository.list_staged_files.assert_called_once() + + assert scan_result.successful == False + assert len(scan_result.failures) == 1 + assert "Email" in scan_result.output + assert "Phone number" in scan_result.output + + +def test_that_pii_scanner_service_scans_all_files_when_specified( + pii_scanner_service: PiiScannerService, + mock_repo_files_repository: MagicMock, + mock_open_fn: MagicMock, +): + pii_scanner_service.scan_repo(test_folder_path, ScanMode.ALL_FILES) + + mock_repo_files_repository.list_repo_files.assert_called_once() + + +def test_that_pii_scanner_service_ignores_excluded_file_extensions( + pii_scanner_service: PiiScannerService, + mock_repo_files_repository: MagicMock, + mock_open_fn: MagicMock, + mock_re: MagicMock, +): + mock_repo_files_repository.list_staged_files.return_value = ["fake_file_path.md"] + + scan_result = pii_scanner_service.scan_repo(test_folder_path, ScanMode.STAGED_ONLY) + + assert scan_result.successful == True + + +def test_that_pii_scanner_service_only_scans_specific_files_if_provided( + pii_scanner_service: PiiScannerService, + mock_repo_files_repository: MagicMock, + mock_open_fn: MagicMock, + mock_re: MagicMock, +): + specified_file = "fake_file_path" + ignored_file = "not-the-file-we-want" + mock_repo_files_repository.list_staged_files.return_value = [ + specified_file, + ignored_file, + ] + scan_result = pii_scanner_service.scan_repo( + test_folder_path, ScanMode.STAGED_ONLY, [specified_file] + ) + + assert scan_result.successful == False + assert len(scan_result.failures) == 1 + assert scan_result.failures[0].file == specified_file + + +def test_that_pii_scanner_prints_when_exceptions_encountered( + pii_scanner_service: PiiScannerService, + mock_open_fn: MagicMock, + mock_echo: MagicMock, +): + mock_open_fn.side_effect = Exception("Oh no") + pii_scanner_service.scan_repo( + test_folder_path, + ScanMode.STAGED_ONLY, + ) + + mock_echo.print.assert_called_once() + assert "Error PII scanning" in mock_echo.print.call_args.args[0] diff --git a/tests/modules/shared/utilities/test_git_meta.py b/tests/modules/shared/utilities/test_git_meta.py index 94781e26..59d4eb2c 100644 --- a/tests/modules/shared/utilities/test_git_meta.py +++ b/tests/modules/shared/utilities/test_git_meta.py @@ -6,14 +6,18 @@ from secureli.modules.shared import utilities -mock_git_origin_url = r"git@github.com:my-org/repo%20with%20spaces.git" +mock_git_origin_url = ( + r"git@github.com:my-org/repo%20with%20spaces.git" # disable-pii-scan +) @pytest.fixture() def mock_subprocess(mocker: MockerFixture) -> MagicMock: mock_subprocess = mocker.patch("secureli.modules.shared.utilities.subprocess") mock_subprocess.run.return_value = CompletedProcess( - args=[], returncode=0, stdout="great.engineer@slalom.com\n".encode("utf8") + args=[], + returncode=0, + stdout="great.engineer@slalom.com\n".encode("utf8"), # disable-pii-scan ) return mock_subprocess @@ -60,7 +64,9 @@ def test_git_user_email_loads_user_email_via_git_subprocess(mock_subprocess: Mag result = utilities.git_user_email() mock_subprocess.run.assert_called_once() - assert result == "great.engineer@slalom.com" # note: without trailing newline + assert ( + result == "great.engineer@slalom.com" # disable-pii-scan + ) # note: without trailing newline def test_origin_url_parses_config_to_get_origin_url(mock_configparser: MagicMock): diff --git a/tests/repositories/test_repo_files_repository.py b/tests/repositories/test_repo_files_repository.py index 5a5ab450..4feee37d 100644 --- a/tests/repositories/test_repo_files_repository.py +++ b/tests/repositories/test_repo_files_repository.py @@ -3,10 +3,18 @@ import pytest from pytest_mock import MockerFixture +from subprocess import CompletedProcess from secureli.repositories.repo_files import RepoFilesRepository +@pytest.fixture() +def mock_subprocess(mocker: MockerFixture) -> MagicMock: + mock_subprocess = MagicMock() + mocker.patch("secureli.repositories.repo_files.subprocess", mock_subprocess) + return mock_subprocess + + @pytest.fixture() def git_not_exists_folder_path() -> MagicMock: git_folder_path = MagicMock() @@ -145,6 +153,22 @@ def test_that_list_repo_files_raises_value_error_without_git_repo( repo_files_repository.list_repo_files(git_not_exists_folder_path) +def test_that_list_staged_files_returns_list_of_staged_files( + repo_files_repository: RepoFilesRepository, + mock_subprocess: MagicMock, +): + fake_file_1 = "i/am/staged" + fake_file_2 = "also/staged" + mock_subprocess.run.return_value = CompletedProcess( + args=[], + returncode=0, + stdout=f"{fake_file_1}\n{fake_file_2}\n".encode("utf8"), + ) + + result = repo_files_repository.list_staged_files(Path(".")) + assert result == [fake_file_1, fake_file_2] + + def test_that_list_repo_files_raises_value_error_if_dot_git_is_a_file_somehow( repo_files_repository: RepoFilesRepository, git_a_file_for_some_reason_folder_path: MagicMock,