This repository has been archived by the owner on Nov 29, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
anapyzermodel.py
323 lines (266 loc) · 12 KB
/
anapyzermodel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
# Import the enum class for better readability
import enum
# Import the pathlib library for cross platform file path abstraction
import pathlib
# Enumeration for the accepted log types
class AcceptedLogTypes(enum.Enum):
APACHE = 'Apache (access.log)'
IIS = 'IIS (u_ex*.log)'
DEFAULT = APACHE
# Enumeration for the accepted file formats
class AcceptedFileFormats(enum.Enum):
LOG = ('log files', '*.log')
DEFAULT = LOG
# Enumeration for the output file formats
class OutputFileFormats(enum.Enum):
CSV = ('CSV (Comma delimited)', '*.csv')
DEFAULT = CSV
# Enumeration for the file parse modes
class FileParseModes(enum.Enum):
GRAPH = 'Generate graph'
REPORT = 'Generate report'
CSV = 'Convert to csv'
DEFAULT = GRAPH
# Enumeration for the graph output modes
class GraphModes(enum.Enum):
CON_PER_HOUR = 'Connections per hour'
IP_CONNECTIONS = 'Connections by Country'
DEFAULT = CON_PER_HOUR
# Enumeration for the report output modes
class ReportModes(enum.Enum):
URL_RPT = 'Website resources'
SUSP_ACT = 'Suspicious activity report'
CONN_LENGTH = 'Connection length report'
DEFAULT = URL_RPT
class AnaPyzerModelError(Exception):
def __init__(self, message):
self.message = message
# Class definition for the file reader of the application
class AnaPyzerModel:
# Constructor
def __init__(self, parser, analyzer):
self.DEFAULT_FILE_PATH = pathlib.Path.home()
self._in_file_path = pathlib.Path('')
self._out_file_path = pathlib.Path('')
self._log_type = AcceptedLogTypes.DEFAULT
self._file_parse_mode = FileParseModes.DEFAULT
self._graph_mode = GraphModes.DEFAULT
self._report_mode = ReportModes.DEFAULT
self._parsed_log_data = None
self._graph_data = None
self._report_data = None
self._in_file_path_has_changed = False
self._analyzer = analyzer
self._parser = parser
# Setter for the file path to the input file
# Takes a string for the file path
def set_in_file_path(self, in_file_path):
# If the input file path was set, set the model's file path equal to it
if in_file_path:
new_in_file_path = pathlib.Path(in_file_path)
if self._in_file_path is not new_in_file_path:
self._in_file_path = new_in_file_path
self._in_file_path_has_changed = True
# Otherwise set the model's file path equal to the default file path
else:
self._in_file_path = pathlib.Path(self.DEFAULT_FILE_PATH)
self._in_file_path_has_changed = True
# Getter for the model's file path to the input file
# Returns a string representing the file path
def get_in_file_path(self):
in_file_path = str(self._in_file_path)
if in_file_path is '.':
in_file_path = ''
return in_file_path
# Validation method that determines whether the input file path that is currently set in the model is valid
def in_file_path_is_valid(self):
return pathlib.Path(self._in_file_path).is_file()
# Setter for the file path to the input file
# Takes a string for the file path
def set_out_file_path(self, out_file_path):
# If the output file path was set, set the model's output file path equal to it
if out_file_path:
# If we are in convert to CSV mode
if self._file_parse_mode is FileParseModes.CSV:
# Get the suffix of the output file
out_file_suffix = str(pathlib.PurePath(out_file_path).suffix)
# If it is not '.csv'
if out_file_suffix is not '.csv':
# Change the suffix to '.csv'
out_file_path = str(pathlib.PurePath(out_file_path).with_suffix('.csv'))
# Set the model's out file path to the out file path
self._out_file_path = pathlib.Path(out_file_path)
# Otherwise set the model's file path equal to the current working directory
else:
self._out_file_path = pathlib.Path(self.DEFAULT_FILE_PATH)
# Getter for the model's file path to the input file
# Returns a string representing the file path
def get_out_file_path(self):
out_file_path = str(self._out_file_path)
if out_file_path is '.':
out_file_path = ''
return out_file_path
# Validation method that determines whether the output file path that is currently set in the model is valid
def out_file_path_is_valid(self):
is_valid = False
out_file_path = pathlib.PurePath(self._out_file_path)
out_file_path_parent = pathlib.Path(str(out_file_path.parent))
if self._out_file_path is not '' and out_file_path_parent.is_dir():
is_valid = True
return is_valid
# Setter for the type of input log file that will be read
def set_log_type(self, log_type):
self._log_type = AcceptedLogTypes(log_type)
self._in_file_path_has_changed = True
# Getter for the model's file type for the expected input log type
# Returns a string representing the expected input log type
def get_log_type(self):
return self._log_type
# Setter for how the input file will be parsed
def set_file_parse_mode(self, file_parse_mode):
self._file_parse_mode = FileParseModes(file_parse_mode)
# Getter for how the input file will be parsed
def get_file_parse_mode(self):
return self._file_parse_mode
# Setter for the type of graph to generate
def set_graph_mode(self, graph_mode):
self._graph_mode = GraphModes(graph_mode)
# Getter for the type of graph to generate
def get_graph_mode(self):
return self._graph_mode
# Setter for the type of report to generate
def set_report_mode(self, report_mode):
self._report_mode = ReportModes(report_mode)
# Getter for the type of report to generate
def get_report_mode(self):
return self._report_mode
# Reads from the input file, converts to csv, and writes to the output file
def export_log_to_csv(self):
self._parse_log_file_data()
try:
out_file = open(self._out_file_path, 'w')
except IOError as e:
raise AnaPyzerModelError("Could not write to file:\n" + e.filename + "\n" + e.strerror)
try:
self._analyzer.write_parsed_log_to_csv(self._parsed_log_data, out_file)
except IOError as e:
raise AnaPyzerModelError("Error encountered with file:\n" + e.filename + "\n" + e.strerror)
finally:
out_file.close()
return True
def create_report_data(self):
self._parse_log_file_data()
if self._report_mode is ReportModes.URL_RPT:
self._report_data = self._analyzer.get_web_pages(self._parsed_log_data)
elif self._report_mode is ReportModes.SUSP_ACT:
self._report_data = self._analyzer.malicious_activity_report(self._parsed_log_data)
elif self._report_mode is ReportModes.CONN_LENGTH:
self._report_data = self._analyzer.get_connection_length_report(self._parsed_log_data)
def get_report_data(self):
return self._report_data
# get_parsed_log_file opens the current in_file and attempts to parse it, determining the log type
# based on the current state of the UI
def _parse_log_file_data(self):
if self._in_file_path_has_changed or self._parsed_log_data is None:
parsed_log = None
self._in_file_path_has_changed = False
try:
log_file = open(self.get_in_file_path(), 'r')
if self._log_type is AcceptedLogTypes.IIS:
# print("parsing IIS")
try:
parsed_log = self._parser.parse_w3c_to_list(log_file)
except IndexError as e:
raise AnaPyzerModelError("Log file does not appear to be in IIS / W3C log format")
elif self._log_type is AcceptedLogTypes.APACHE:
# print("parsing Apache")
try:
parsed_log = self._parser.parse_common_apache_to_list(log_file)
except IndexError as e:
raise AnaPyzerModelError("Log file does not appear to be in Apache / Common log format")
except IOError as e:
raise AnaPyzerModelError("Could not read from " + e.filename + "\n" + e.strerror)
log_file.close()
if parsed_log is not None:
self._parsed_log_data = parsed_log
return True
else:
raise AnaPyzerModelError("Log was unable to be parsed.")
# create_graph_data attempts to extract graphable data from the current report_data dictionary
def create_graph_data(self):
self._parse_log_file_data()
graph_data = None
if self._graph_mode is GraphModes.CON_PER_HOUR:
print("Creating Connections Per Hour Report")
graph_data = self._analyzer.get_connections_per_hour(self._parsed_log_data)
elif self._graph_mode is GraphModes.IP_CONNECTIONS:
print("Creating IP Connections Report")
graph_data = self._analyzer.ip_connection_report(self._parsed_log_data)
if graph_data is not None:
self._graph_data = graph_data
# Print method for testing, outputs current delimited graph data to console
def print_current_graph_data_split(self):
for date in self._graph_data:
print(self._graph_data[date])
# Print method for testing, outputs current graph data to console
def print_current_graph_data(self):
print(self._graph_data)
# Print method for testing, outputs current report data to console
def print_current_report_data(self):
print("printing _report_data")
print(self._parsed_log_data)
if self._parsed_log_data['length'] > 0:
i = 0
while i < self._parsed_log_data['length']:
print(self._parsed_log_data[i])
i += 1
# Returns an array of the graph data dictionary's keys which contain
# values that are also dictionaries or arrays.
def get_graph_data_split(self):
split = []
for key in self._graph_data.keys():
if isinstance(self._graph_data[key], dict):
split.append(key)
return split
# Getter method for graph data keys,
# used for data structures which contain a layer of abstraction
# such as each data set being separated by date
def get_graph_data_split_keys(self,date):
if self._graph_data.get(date):
return self._graph_data[date].keys()
else:
return None
# Getter method for graph data values,
# used for data structures which contain a layer of abstraction
# such as each data set being separated by date
def get_graph_data_split_values(self,date):
if self._graph_data.get(date):
return self._graph_data[date].values()
else:
return None
# Getter method for graph keys
# used for graphing of non-delimited data (not separated by date/time/etc)
def get_graph_data_keys(self):
return self._graph_data.keys()
# Getter method for graph values
# used for graphing of non-delimited data (not separated by date/time/etc)
def get_graph_data_values(self):
return self._graph_data.values()
# Getter method for graph x label
def get_graph_data_x_label(self):
if 'xlabel' in self._graph_data.keys():
return self._graph_data['xlabel']
else:
return 'X Axis'
# Getter method for graph y label
def get_graph_data_y_label(self):
if 'ylabel' in self._graph_data.keys():
return self._graph_data['ylabel']
else:
return 'Y Axis'
# Getter method for graph title
def get_graph_data_title(self):
if self._graph_data.get('title'):
return self._graph_data['title']
else:
return 'Title'