forked from elastic/detection-rules
-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_markdown.py
140 lines (113 loc) · 5.43 KB
/
generate_markdown.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License
# 2.0; you may not use this file except in compliance with the Elastic License
# 2.0.
"""Lightweight builtin toml-markdown converter."""
import tomllib
import urllib3
from dataclasses import dataclass, field
from pathlib import Path
from typing import List, Optional
HUNTING_DIR = Path(__file__).parent
ATLAS_URL = "https://atlas.mitre.org/techniques/"
ATTACK_URL = "https://attack.mitre.org/techniques/"
# the standard link takes `integration.package` and converts the link to `integration/package`, however, there are
# some exceptions such as `aws_bedrock.invocation` which should be linked to `aws_bedrock` instead
# https://docs.elastic.co/integrations/aws_bedrock
STATIC_INTEGRATION_LINK_MAP = {
'aws_bedrock.invocation': 'aws_bedrock'
}
@dataclass
class Hunt:
"""Dataclass to represent a hunt."""
author: str
description: str
integration: list[str]
uuid: str
name: str
language: list[str]
license: str
query: list[str]
notes: Optional[list[str]] = field(default_factory=list)
mitre: Optional[list[str]] = field(default_factory=list)
references: Optional[list[str]] = field(default_factory=list)
def load_toml(contents: str) -> Hunt:
"""Load and validate TOML content as Hunt dataclass."""
toml_dict = tomllib.loads(contents)
return Hunt(**toml_dict["hunt"])
def load_all_toml(base_path: Path) -> List[tuple[Hunt, Path]]:
"""Load all TOML files from the directory and return a list of Hunt configurations and their paths."""
hunts = []
for toml_file in base_path.rglob("*.toml"):
hunt_config = load_toml(toml_file.read_text(encoding="utf-8"))
hunts.append((hunt_config, toml_file))
return hunts
def validate_link(link: str):
"""Validate and return the link."""
http = urllib3.PoolManager()
response = http.request('GET', link)
if response.status != 200:
raise ValueError(f"Invalid link: {link}")
def generate_integration_links(integrations: list[str]) -> list[str]:
base_url = 'https://docs.elastic.co/integrations'
generated = []
for integration in integrations:
if integration in STATIC_INTEGRATION_LINK_MAP:
link_str = STATIC_INTEGRATION_LINK_MAP[integration]
else:
link_str = integration.replace('.', '/')
link = f'{base_url}/{link_str}'
validate_link(link)
generated.append(f'[{integration}]({link})')
return generated
def convert_toml_to_markdown(hunt_config: Hunt, file_path: Path) -> str:
"""Convert Hunt to Markdown format."""
markdown = f"# {hunt_config.name}\n\n---\n\n"
markdown += "## Metadata\n\n"
markdown += f"- **Author:** {hunt_config.author}\n"
markdown += f"- **Description:** {hunt_config.description}\n"
markdown += f"- **UUID:** `{hunt_config.uuid}`\n"
markdown += f"- **Integration:** {', '.join(generate_integration_links(hunt_config.integration))}\n"
markdown += f"- **Language:** `{hunt_config.language}`\n".replace("'", "").replace('"', "")
markdown += f"- **Source File:** [{hunt_config.name}]({(Path('../queries') / file_path.name).as_posix()})\n"
markdown += "\n## Query\n\n"
for query in hunt_config.query:
markdown += f"```sql\n{query}```\n\n"
if hunt_config.notes:
markdown += "## Notes\n\n" + "\n".join(f"- {note}" for note in hunt_config.notes)
if hunt_config.mitre:
markdown += "\n\n## MITRE ATT&CK Techniques\n\n" + "\n".join(
f"- [{tech}]({ATLAS_URL if tech.startswith('AML') else ATTACK_URL}"
f"{tech.replace('.', '/') if tech.startswith('T') else tech})"
for tech in hunt_config.mitre
)
if hunt_config.references:
markdown += "\n\n## References\n\n" + "\n".join(f"- {ref}" for ref in hunt_config.references)
markdown += f"\n\n## License\n\n- `{hunt_config.license}`\n"
return markdown
def process_toml_files(base_path: Path) -> None:
"""Process all TOML files in the directory recursively and convert them to Markdown."""
hunts = load_all_toml(base_path)
index_content = "# List of Available Queries\n\nHere are the queries currently available:"
directories = {}
for hunt_config, toml_file in hunts:
markdown_content = convert_toml_to_markdown(hunt_config, toml_file)
markdown_path = toml_file.parent.parent / "docs" / f"{toml_file.stem}.md"
markdown_path.parent.mkdir(parents=True, exist_ok=True)
markdown_path.write_text(markdown_content, encoding="utf-8")
print(f"Markdown generated: {markdown_path}")
relative_path = markdown_path.relative_to(base_path)
folder_name = toml_file.parent.parent.name
directories.setdefault(folder_name, []).append((relative_path, hunt_config.name, hunt_config.language))
# Build index content
for folder, files in sorted(directories.items()):
index_content += f"\n\n## {folder}\n"
for file_path, rule_name, language in sorted(files):
index_path = f"./{file_path.as_posix()}"
index_content += f"- [{rule_name}]({index_path}) ({', '.join(language)})\n"
# Write the index file at the base directory level
index_path = base_path / "index.md"
index_path.write_text(index_content, encoding="utf-8")
print(f"Index Markdown generated at: {index_path}")
if __name__ == "__main__":
process_toml_files(HUNTING_DIR)