-
Notifications
You must be signed in to change notification settings - Fork 1
/
EvtXHunt.py
142 lines (124 loc) · 6.35 KB
/
EvtXHunt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!python2
import string
import os
import pyevtx
import argparse
import pandas as pd
import json
from pandasql import sqldf
from bs4 import BeautifulSoup
if __name__ == "__main__":
# argparse
help_msg = "python EvtXHunt_python.py -f /dir/file"
parser = argparse.ArgumentParser(description=help_msg, formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-f', '--file', type=str, required=True, help="Evtx Folder")
parser.add_argument('-r', '--rule', type=str, required=True, help="Folder containing JSON rules")
parser.add_argument('-o', '--output', type=str, required=True, help="Absolute path to directory")
args = parser.parse_args()
eventlog = pd.DataFrame() # DataFrame to store all processes EVTX event entries as a row
evtx_dir = args.file
print("[+] Processing specified EVTX...")
dir_list = [d[0] for d in os.walk(evtx_dir)] # Get all dir and sub dir path into a list
evtx_dict = {path : [] for path in dir_list} # Init dict to hold {'relative path':[list of files], ...}
# Setup EVTX folder recursively
for path in dir_list:
for file in os.listdir(path):
if not os.path.isdir(path + '\\' + file):
evtx_dict[path].append(file)
# Open all the files in evtx_dict
for path in evtx_dict:
for file in evtx_dict[path]:
file_object = open(path + '\\' + file, "rb")
evtx_file = pyevtx.file()
evtx_file.open_file_object(file_object)
# Getting all fields in Evtx event entry
for i in range(0, evtx_file.get_number_of_records()):
# event Entry has 2 components: System and EventData
record_dict = {} # each iteration will build a dict(row for evtxDF)
evtx_record = evtx_file.get_record(i) # evtx_record = event entry
evtx_record = evtx_record.get_xml_string()
# Get all System's sub elements
soup = BeautifulSoup(evtx_record, 'xml')
# System attribute in record, always check if key/value exist in the attribute, if not then assign empty string
record_dict['FileName'] = file
record_dict['Provider_Name'] = soup.System.Provider['Name']
record_dict['Provider_Guid'] = soup.System.Provider['Guid']
record_dict['EventID'] = soup.System.EventID.string
record_dict['Version'] = soup.System.Version.string
record_dict['Level'] = soup.System.Level.string
record_dict['Task'] = soup.System.Task.string
record_dict['Opcode'] = soup.System.Opcode.string
record_dict['Keywords'] = soup.System.Keywords.string
record_dict['TimeCreated_SystemTime'] = soup.System.TimeCreated['SystemTime']
record_dict['EventRecordID'] = soup.System.EventRecordID.string
record_dict['Correlation'] = soup.System.Correlation.string
record_dict['Execution_ProcessID'] = soup.System.Execution['ProcessID']
record_dict['Execution_ThreadID'] = soup.System.Execution['ThreadID']
record_dict['Channel'] = soup.System.Channel.string
record_dict['Computer'] = soup.System.Computer.string
record_dict['Security_UserID'] = soup.System.Security['UserID'] if 'UserID' in soup.System.Security else ''
# EventData attribute in record
for e in soup.findAll('Data'):
key = string.capwords(e.get('Name'))
value = e.string
record_dict[key] = value
eventlog = eventlog.append(record_dict, ignore_index=True) # record_dict is the row
output_path = args.output # absolute path eg. C:\Users\nic\Desktop\SIT_Local\evtxIOC\temp\sub
if not os.path.exists(output_path):
os.makedirs(output_path)
# Create dataframe to store a summary of the analysis
analysisDF = pd.DataFrame()
json_file = args.rule
dir_list = [d[0] for d in os.walk(json_file)] # Get all dir and sub dir path into a list
rule_dict = {path : [] for path in dir_list} # Init dict to hold {'relative path':[list of files], ...}
# Setup rule folder recursively
for path in dir_list:
for file in os.listdir(path):
if not os.path.isdir(path + '\\' + file):
rule_dict[path].append(file)
# Open the JSON file in rule_dict
print("[+] Hunting for IOC in specified EVTX...\n")
for path in rule_dict:
for file in rule_dict[path]:
with open(path + '\\' + file) as rule:
# print('Checking EVTX against rules specified in ' + file + '...')
data=json.load(rule)
# Extract the SQL query for each rule
for i in range(len(data)):
unquery = data[i]['rule']
# Preprocess every SQL query
cleanquery = str(unquery).replace('[','').replace(']','').replace('\\\\','\\').replace(r"\'\\'",r"'\'").replace(r"(\'",r"('")
query = cleanquery[2:-1]
# Try to query the dataframe. If error due to missing columns, pass and move on to the next query.
try:
analysis_dict = {
'Title':'', 'Description':'', 'Count':''
}
# Perform SQL query of each rule on evtxDF
output = sqldf(query,locals())
output['SIGMA Rule'] = data[i]['title']
output['SIGMA Description'] = data[i]['description']
# For each rule that has 1 or more matches, we will create a CSV file containing the relevant log entries
if len(output) > 1:
# print("[+]"+data[i]['title']+": " + str(len(output)) + " matches")
print("[+] %-70s: %2s matches" % (data[i]['title'] , str(len(output))))
analysis_dict['SIGMA Rule Name'] = data[i]['title']
analysis_dict['SIGMA Rule Description'] = data[i]['description']
analysis_dict['Count'] = str(len(output))
analysisDF = analysisDF.append(analysis_dict, ignore_index=True)
output.to_csv(os.path.join(output_path,data[i]['title']+'.csv'), encoding='utf-8')
except Exception:
pass
# Format DataFrame output
# Sort columns of analysisDF dataframe
# print("\n====================================Final Summary Report====================================\n")
print("\n=============================================Final Summary Report=============================================\n")
# print("\n====================================Final Summary Report=============================================\n")
# analysisDF = analysisDF.reindex(columns=['Title','Description','Count'])
analysisDF = analysisDF.reindex(columns=['SIGMA Rule Name','SIGMA Rule Description','Count'])
analysisDF.index = analysisDF.index + 1
print(analysisDF)
# Write analysisDF to csv file
analysisDF.to_csv(os.path.join(output_path,'Summary.csv'), encoding='utf-8')
# Write eventlog dataframe to csv file
eventlog.to_csv(os.path.join(output_path,'eventlog.csv'), encoding='utf-8')