Skip to content

Commit

Permalink
validate tags
Browse files Browse the repository at this point in the history
  • Loading branch information
enthec-opensource committed Jul 26, 2024
1 parent 50f4c99 commit 4287218
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 14 deletions.
74 changes: 62 additions & 12 deletions .github/workflows/scripts/technology_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,21 @@ def __init__(self, msg: str):
super().__init__(msg)


class TooManyTagsException(Exception):
def __init__(self, msg: str):
super().__init__(msg)


class InvalidTagException(Exception):
def __init__(self, msg: str):
super().__init__(msg)


class AbstractValidator:
def __init__(self, required: bool = False):
self._required = required
self._custom_error: Optional[Exception] = None
self.__version_match = re.compile(r"^(?:(?P<prefix>.*)?\\(?P<group>\d+)(?:\?(?P<first>.*)?:(?P<second>.*)?)?|(?P<fixed>[a-zA-Z0-9.]+)?)$")

def process(self, property_name: str, tech_name: str, data: Any) -> bool:
if self._required and not data:
Expand All @@ -77,11 +88,41 @@ def process(self, property_name: str, tech_name: str, data: Any) -> bool:
return self._validate(tech_name, data)

def _validate(self, tech_name: str, data: Any) -> bool:
if isinstance(data, str):
if not self._validate_tags(tech_name, data):
return False
for t in self.get_type():
if isinstance(data, t):
return True
return False

def _validate_tags(self, tech_name: str, pattern: str) -> bool:
tags: list[str] = pattern.split(r"\;")[1:]
if len(tags) > 2:
self._set_custom_error(TooManyTagsException(f"pattern '{pattern}' for tech '{tech_name}' has more than 2 tags, only confidence & version are allowed!"))
return False
tag_names: list[str] = [tag.split(":")[0].lower() for tag in tags]
if len(tag_names) == 2 and tag_names[0] == tag_names[1]:
self._set_custom_error(TooManyTagsException(f"pattern '{pattern}' for tech '{tech_name}' has more than 2 tags named {tag_names[1]}!"))
return False
for tag_name, tag_value in {tag.split(":")[0]: ":".join(tag.split(":")[1:]) for tag in tags}.items():
if tag_name == "confidence":
if not tag_value.isnumeric():
self._set_custom_error(InvalidTagException(f"Invalid tag value '{tag_value}' for tech '{tech_name}' in pattern '{pattern}', confidence must be numeric!"))
return False
if 0 >= int(tag_value) >= 100:
self._set_custom_error(InvalidTagException(f"Invalid tag value '{tag_value}' for tech '{tech_name}' in pattern '{pattern}', confidence must be between 0 and 100!"))
return False
elif tag_name == "version":
match: re.Match = self.__version_match.match(tag_value)
if not match:
self._set_custom_error(InvalidTagException(f"Invalid tag value '{tag_value}' for tech '{tech_name}' in pattern '{pattern}', version is invalid!"))
return False
else:
self._set_custom_error(InvalidTagException(f"this tag '{tag_name}' for tech '{tech_name}' in pattern '{pattern}'doesn't exist!"))
return False
return True

def get_type(self) -> list[Type]:
raise NotImplementedError()

Expand All @@ -92,11 +133,6 @@ def _set_custom_error(self, custom_error: Exception) -> None:
self._custom_error = custom_error


class StringValidator(AbstractValidator):
def get_type(self) -> list[Type]:
return [str]


class PricingValidator(AbstractValidator):
def _validate(self, tech_name: str, data: Any) -> bool:
if not super()._validate(tech_name, data):
Expand Down Expand Up @@ -125,23 +161,30 @@ def _validate(self, tech_name: str, data: Any) -> bool:
return True

def _validate_regex(self, tech_name: str, data: Any) -> bool:
if type(data) is str:
if isinstance(data, str):
try:
re.compile(data)
if not self._validate_tags(tech_name, data):
return False
re.compile(data.split(r"\;")[0])
except re.error as e:
self._set_custom_error(InvalidRegexException(f"Unable to compile regex '{data}' for tech '{tech_name}', got error: {e.msg}"))
return False
elif type(data) is dict:
elif isinstance(data, dict):
for _, val in data.items():
if not self._validate_regex(tech_name, val):
return False
elif type(data) is list:
elif isinstance(data, list):
for item in data:
if not self._validate_regex(tech_name, item):
return False
return True


class StringValidator(AbstractValidator):
def get_type(self) -> list[Type]:
return [str]


class BoolValidator(AbstractValidator):
def get_type(self) -> list[Type]:
return [bool]
Expand Down Expand Up @@ -176,6 +219,8 @@ class DomValidator(RegexValidator):
def _validate(self, tech_name: str, data: Any) -> bool:
if isinstance(data, list):
for element in data:
if not self._validate_tags(tech_name, element):
return False
BeautifulSoup("", "html.parser").select(element.split(r"\;")[0])
elif isinstance(data, dict):
for k, v in data.items():
Expand All @@ -202,6 +247,8 @@ def _validate(self, tech_name: str, data: Any) -> bool:
return False
elif key == "exists":
if val.split(r"\;")[0] != "":
if not self._validate_tags(tech_name, val):
return False
self._set_custom_error(InvalidTypeForFieldException(f"Invalid value for dom in tech '{tech_name}', selector '{k}' empty string is required inside '{key}' but {val} was found!"))
return False
else:
Expand All @@ -214,6 +261,9 @@ def _validate(self, tech_name: str, data: Any) -> bool:
return False
return True

def get_type(self) -> list[Type]:
return [list, dict]


class IconValidator(StringValidator):
def __init__(self, icons: list[str], required: bool = False):
Expand Down Expand Up @@ -336,6 +386,6 @@ def process(self) -> None:

if __name__ == '__main__':
# TODO validate ;confidence & ;version
# for letter in string.ascii_lowercase + "_":
# TechnologiesValidator(os.getenv("TECH_FILE_NAME", f"{letter}.json")).validate()
TechnologiesValidator(os.getenv("TECH_FILE_NAME", f"a.json")).validate()
for letter in string.ascii_lowercase + "_":
TechnologiesValidator(os.getenv("TECH_FILE_NAME", f"{letter}.json")).validate()
# TechnologiesValidator(os.getenv("TECH_FILE_NAME", f"a.json")).validate()
4 changes: 2 additions & 2 deletions src/technologies/c.json
Original file line number Diff line number Diff line change
Expand Up @@ -2846,14 +2846,14 @@
18
],
"cookies": {
"ci_csrf_token": "^(.+)$\\;version:\\1?2+:",
"ci_csrf_token": "",
"ci_session": "",
"exp_last_activity": "",
"exp_tracker": ""
},
"cpe": "cpe:2.3:a:codeigniter:codeigniter:*:*:*:*:*:*:*:*",
"html": [
"<input[^>]+name=\"ci_csrf_token\"\\;version:2+"
"<input[^>]+name=\"ci_csrf_token\""
],
"icon": "CodeIgniter.png",
"implies": [
Expand Down

0 comments on commit 4287218

Please sign in to comment.