From 42872180dfdeb638e794b1b2e8ece32bc4fb30d2 Mon Sep 17 00:00:00 2001 From: Enthec Date: Fri, 26 Jul 2024 07:54:56 +0200 Subject: [PATCH] validate tags --- .../workflows/scripts/technology_validator.py | 74 ++++++++++++++++--- src/technologies/c.json | 4 +- 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/.github/workflows/scripts/technology_validator.py b/.github/workflows/scripts/technology_validator.py index 9beb2db8..6f27218d 100644 --- a/.github/workflows/scripts/technology_validator.py +++ b/.github/workflows/scripts/technology_validator.py @@ -64,10 +64,21 @@ def __init__(self, msg: str): super().__init__(msg) +class TooManyTagsException(Exception): + def __init__(self, msg: str): + super().__init__(msg) + + +class InvalidTagException(Exception): + def __init__(self, msg: str): + super().__init__(msg) + + class AbstractValidator: def __init__(self, required: bool = False): self._required = required self._custom_error: Optional[Exception] = None + self.__version_match = re.compile(r"^(?:(?P.*)?\\(?P\d+)(?:\?(?P.*)?:(?P.*)?)?|(?P[a-zA-Z0-9.]+)?)$") def process(self, property_name: str, tech_name: str, data: Any) -> bool: if self._required and not data: @@ -77,11 +88,41 @@ def process(self, property_name: str, tech_name: str, data: Any) -> bool: return self._validate(tech_name, data) def _validate(self, tech_name: str, data: Any) -> bool: + if isinstance(data, str): + if not self._validate_tags(tech_name, data): + return False for t in self.get_type(): if isinstance(data, t): return True return False + def _validate_tags(self, tech_name: str, pattern: str) -> bool: + tags: list[str] = pattern.split(r"\;")[1:] + if len(tags) > 2: + self._set_custom_error(TooManyTagsException(f"pattern '{pattern}' for tech '{tech_name}' has more than 2 tags, only confidence & version are allowed!")) + return False + tag_names: list[str] = [tag.split(":")[0].lower() for tag in tags] + if len(tag_names) == 2 and tag_names[0] == tag_names[1]: + self._set_custom_error(TooManyTagsException(f"pattern '{pattern}' for tech '{tech_name}' has more than 2 tags named {tag_names[1]}!")) + return False + for tag_name, tag_value in {tag.split(":")[0]: ":".join(tag.split(":")[1:]) for tag in tags}.items(): + if tag_name == "confidence": + if not tag_value.isnumeric(): + self._set_custom_error(InvalidTagException(f"Invalid tag value '{tag_value}' for tech '{tech_name}' in pattern '{pattern}', confidence must be numeric!")) + return False + if 0 >= int(tag_value) >= 100: + self._set_custom_error(InvalidTagException(f"Invalid tag value '{tag_value}' for tech '{tech_name}' in pattern '{pattern}', confidence must be between 0 and 100!")) + return False + elif tag_name == "version": + match: re.Match = self.__version_match.match(tag_value) + if not match: + self._set_custom_error(InvalidTagException(f"Invalid tag value '{tag_value}' for tech '{tech_name}' in pattern '{pattern}', version is invalid!")) + return False + else: + self._set_custom_error(InvalidTagException(f"this tag '{tag_name}' for tech '{tech_name}' in pattern '{pattern}'doesn't exist!")) + return False + return True + def get_type(self) -> list[Type]: raise NotImplementedError() @@ -92,11 +133,6 @@ def _set_custom_error(self, custom_error: Exception) -> None: self._custom_error = custom_error -class StringValidator(AbstractValidator): - def get_type(self) -> list[Type]: - return [str] - - class PricingValidator(AbstractValidator): def _validate(self, tech_name: str, data: Any) -> bool: if not super()._validate(tech_name, data): @@ -125,23 +161,30 @@ def _validate(self, tech_name: str, data: Any) -> bool: return True def _validate_regex(self, tech_name: str, data: Any) -> bool: - if type(data) is str: + if isinstance(data, str): try: - re.compile(data) + if not self._validate_tags(tech_name, data): + return False + re.compile(data.split(r"\;")[0]) except re.error as e: self._set_custom_error(InvalidRegexException(f"Unable to compile regex '{data}' for tech '{tech_name}', got error: {e.msg}")) return False - elif type(data) is dict: + elif isinstance(data, dict): for _, val in data.items(): if not self._validate_regex(tech_name, val): return False - elif type(data) is list: + elif isinstance(data, list): for item in data: if not self._validate_regex(tech_name, item): return False return True +class StringValidator(AbstractValidator): + def get_type(self) -> list[Type]: + return [str] + + class BoolValidator(AbstractValidator): def get_type(self) -> list[Type]: return [bool] @@ -176,6 +219,8 @@ class DomValidator(RegexValidator): def _validate(self, tech_name: str, data: Any) -> bool: if isinstance(data, list): for element in data: + if not self._validate_tags(tech_name, element): + return False BeautifulSoup("", "html.parser").select(element.split(r"\;")[0]) elif isinstance(data, dict): for k, v in data.items(): @@ -202,6 +247,8 @@ def _validate(self, tech_name: str, data: Any) -> bool: return False elif key == "exists": if val.split(r"\;")[0] != "": + if not self._validate_tags(tech_name, val): + return False self._set_custom_error(InvalidTypeForFieldException(f"Invalid value for dom in tech '{tech_name}', selector '{k}' empty string is required inside '{key}' but {val} was found!")) return False else: @@ -214,6 +261,9 @@ def _validate(self, tech_name: str, data: Any) -> bool: return False return True + def get_type(self) -> list[Type]: + return [list, dict] + class IconValidator(StringValidator): def __init__(self, icons: list[str], required: bool = False): @@ -336,6 +386,6 @@ def process(self) -> None: if __name__ == '__main__': # TODO validate ;confidence & ;version - # for letter in string.ascii_lowercase + "_": - # TechnologiesValidator(os.getenv("TECH_FILE_NAME", f"{letter}.json")).validate() - TechnologiesValidator(os.getenv("TECH_FILE_NAME", f"a.json")).validate() + for letter in string.ascii_lowercase + "_": + TechnologiesValidator(os.getenv("TECH_FILE_NAME", f"{letter}.json")).validate() + # TechnologiesValidator(os.getenv("TECH_FILE_NAME", f"a.json")).validate() diff --git a/src/technologies/c.json b/src/technologies/c.json index 8a0dee0a..6a87ad29 100644 --- a/src/technologies/c.json +++ b/src/technologies/c.json @@ -2846,14 +2846,14 @@ 18 ], "cookies": { - "ci_csrf_token": "^(.+)$\\;version:\\1?2+:", + "ci_csrf_token": "", "ci_session": "", "exp_last_activity": "", "exp_tracker": "" }, "cpe": "cpe:2.3:a:codeigniter:codeigniter:*:*:*:*:*:*:*:*", "html": [ - "]+name=\"ci_csrf_token\"\\;version:2+" + "]+name=\"ci_csrf_token\"" ], "icon": "CodeIgniter.png", "implies": [