diff --git a/vulnerabilities/helpers.py b/vulnerabilities/helpers.py index 37fa7d1cb..95dc2d801 100644 --- a/vulnerabilities/helpers.py +++ b/vulnerabilities/helpers.py @@ -24,8 +24,9 @@ import dataclasses import json import re -from typing import Optional from typing import List +from typing import Optional +from typing import Tuple import requests import saneyaml @@ -164,3 +165,29 @@ def __lt__(self, other): ) return affected_package_with_patched_package_objects + + +def split_markdown_front_matter(text: str) -> Tuple[str, str]: + r""" + Return a tuple of (front matter, markdown body) strings split from ``text``. + Each can be an empty string. + + >>> text='''--- + ... title: DUMMY-SECURITY-2019-001 + ... description: Incorrect access control. + ... cves: [CVE-2042-1337] + ... --- + ... # Markdown starts here + ... ''' + >>> split_markdown_front_matter(text) + ('title: DUMMY-SECURITY-2019-001\ndescription: Incorrect access control.\ncves: [CVE-2042-1337]', '# Markdown starts here') + """ + # The doctest contains \n and for the sake of clarity I chose raw strings than escaping those. + lines = text.splitlines() + if lines[0] == "---": + lines = lines[1:] + text = "\n".join(lines) + frontmatter, _, markdown = text.partition("\n---\n") + return frontmatter, markdown + + return "", text diff --git a/vulnerabilities/importers/istio.py b/vulnerabilities/importers/istio.py index 509bde08d..103044bc2 100644 --- a/vulnerabilities/importers/istio.py +++ b/vulnerabilities/importers/istio.py @@ -33,6 +33,7 @@ from vulnerabilities.data_source import GitDataSource from vulnerabilities.data_source import Reference from vulnerabilities.helpers import nearest_patched_package +from vulnerabilities.helpers import split_markdown_front_matter from vulnerabilities.package_managers import GitHubTagsAPI is_release = re.compile(r"^[\d.]+$", re.IGNORECASE).match @@ -80,45 +81,6 @@ def get_pkg_versions_from_ranges(self, version_range_list): safe_pkg_versions = set(all_version) - set(vuln_pkg_versions) return safe_pkg_versions, vuln_pkg_versions - def get_data_from_yaml_lines(self, yaml_lines): - """Return a mapping of data from a iterable of yaml_lines - for example : - ['title: ISTIO-SECURITY-2019-001', - 'description: Incorrect access control.','cves: [CVE-2019-12243]'] - - would give {'title':'ISTIO-SECURITY-2019-001', - 'description': 'Incorrect access control.', - 'cves': '[CVE-2019-12243]'} - """ - - return saneyaml.load("\n".join(yaml_lines)) - - def get_yaml_lines(self, lines): - """The istio advisory file contains lines similar to yaml format . - This function extracts those lines and return an iterable of lines - - for example : - lines = - --- - title: ISTIO-SECURITY-2019-001 - description: Incorrect access control. - cves: [CVE-2019-12243] - --- - - get_yaml_lines(lines) would return - ['title: ISTIO-SECURITY-2019-001','description: Incorrect access control.' - ,'cves: [CVE-2019-12243]'] - """ - - for index, line in enumerate(lines): - line = line.strip() - if line.startswith("---") and index == 0: - continue - elif line.endswith("---"): - break - else: - yield line - def process_file(self, path): advisories = [] @@ -212,10 +174,8 @@ def process_file(self, path): return advisories def get_data_from_md(self, path): - """Return a mapping of vulnerability data from istio . The data is - in the form of yaml_lines inside a .md file. - """ + """Return a mapping of vulnerability data extracted from an advisory.""" with open(path) as f: - yaml_lines = self.get_yaml_lines(f) - return self.get_data_from_yaml_lines(yaml_lines) + front_matter, _ = split_markdown_front_matter(f.read()) + return saneyaml.load(front_matter)