-
Notifications
You must be signed in to change notification settings - Fork 384
/
source_code_comment_handler.py
369 lines (299 loc) · 12.2 KB
/
source_code_comment_handler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
# -------------------------------------------------------------------------
#
# Part of the CodeChecker project, under the Apache License v2.0 with
# LLVM Exceptions. See LICENSE for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# -------------------------------------------------------------------------
"""
Source code comment handling.
"""
import json
import logging
import re
from typing import Dict, Iterable, List, Optional, Set, TextIO, Tuple
from . import util
LOG = logging.getLogger('report-converter')
REVIEW_STATUS_VALUES = ["confirmed", "false_positive", "intentional",
"suppress", "unreviewed"]
def contains_codechecker_comment(fp):
"""Returns true if the file content contains any
codechecker review comments.
The position in the object is restored where it was after the
scanning.
"""
pos_before_read = fp.tell()
if pos_before_read != 0:
fp.seek(0)
source_text = fp.read()
match = "codechecker_" in source_text
fp.seek(pos_before_read)
if not match:
return False
return True
class SpellException(Exception):
"""Exception for the review comment spell errors."""
class SourceCodeComment:
def __init__(
self,
checkers: Set[str],
message: str,
status: str,
line: Optional[str] = None
):
self.checkers = checkers
self.message = message
self.status = status
self.line = line
def to_json(self) -> Dict:
""" Creates a JSON dictionary. """
return {
"checkers": list(self.checkers),
"message": self.message,
"status": self.status,
"line": self.line
}
def __eq__(self, other) -> bool:
if isinstance(other, SourceCodeComment):
return self.checkers == other.checkers and \
self.message == other.message and \
self.status == other.status and \
self.line == other.line
raise NotImplementedError(
f"Comparison SourceCodeComment object with '{type(other)}' is not "
"supported")
def __repr__(self):
return json.dumps(self.to_json())
SourceCodeComments = List[SourceCodeComment]
class SourceCodeCommentHandler:
"""
Handle source code comments.
"""
source_code_comment_markers = [
'codechecker_suppress',
'codechecker_false_positive',
'codechecker_intentional',
'codechecker_confirmed']
@staticmethod
def __check_if_comment(source_line: str) -> bool:
"""
Check if the line is a comment.
Accepted comment format is only if line starts with '//'.
"""
return source_line.strip().startswith('//')
@staticmethod
def __check_if_cstyle_comment(source_line) -> Tuple[bool, bool]:
"""
Check if the line contains the start '/*' or
the the end '*/' of a C style comment.
"""
src_line = source_line.strip()
cstyle_start = '/*' in src_line
cstyle_end = '*/' in src_line
return cstyle_start, cstyle_end
def __process_source_line_comment(
self,
source_line_comment: str
) -> Optional[SourceCodeComment]:
"""
Process CodeChecker source code comment.
Source code comments are having the following format:
<source_code_markers> [<checker_names>] some comment
Valid CodeChecker source code comments:
// codechecker_suppress [all] some comment for all checkers
// codechecker_confirmed [checker.name1] some comment
// codechecker_confirmed [checker.name1, checker.name2] some multi
// line comment
Valid C style comments
/* codechecker_suppress [all] some comment for all checkers*/
/* codechecker_confirmed [checker.name1] some comment */
/* codechecker_confirmed [checker.name1, checker.name2] some multi
line comment */
"""
# Remove extra spaces if any.
formatted = ' '.join(source_line_comment.split())
# Check for codechecker source code comment.
comment_markers = '|'.join(self.source_code_comment_markers)
pattern = r'^\s*(?P<status>' + comment_markers + r')' \
+ r'\s*\[\s*(?P<checkers>[^\]]*)\s*\]\s*(?P<comment>.*)$'
ptn = re.compile(pattern)
res = re.match(ptn, formatted)
if not res:
return None
checkers_names = set()
review_status = 'false_positive'
message = "WARNING! source code comment is missing"
# Get checker names from suppress comment.
checkers = res.group('checkers')
if checkers == "all":
checkers_names.add('all')
else:
suppress_checker_list = re.findall(r"[^,\s]+",
checkers.strip())
checkers_names.update(suppress_checker_list)
# Get comment message from suppress comment.
comment = res.group('comment')
if comment:
message = comment
# Get status from suppress comment.
status = res.group('status')
if status == 'codechecker_intentional':
review_status = 'intentional'
elif status == 'codechecker_confirmed':
review_status = 'confirmed'
return SourceCodeComment(checkers_names, message, review_status)
def has_source_line_comments(self, fp: TextIO, line: int) -> bool:
"""
Return True if there is any source code comment or False if not,
for a given line.
"""
try:
comments = self.get_source_line_comments(fp, line)
except SpellException as ex:
# Misspell in the review status comment.
LOG.warning(ex)
return False
return bool(comments)
def scan_source_line_comments(
self,
fp: TextIO,
line_numbers: Iterable[int]
) -> Tuple[List[Tuple[int, SourceCodeComments]], List[str]]:
"""collect all the source line review comments if exists
in a source file at the given line numbers.
returns a list of (line_num, comments) tuples where comments
were found.
"""
comments: List[Tuple[int, SourceCodeComments]] = []
misspelled_comments: List[str] = []
if not contains_codechecker_comment(fp):
return comments, misspelled_comments
line_numbers = sorted(line_numbers)
for num in line_numbers:
try:
comments.append((num, self.get_source_line_comments(fp, num)))
except SpellException as ex:
misspelled_comments.append(str(ex))
return comments, misspelled_comments
def get_source_line_comments(
self,
fp: TextIO,
bug_line: int
) -> SourceCodeComments:
""" Returns the preprocessed source code comments for a bug line.
raise: SpellException in case there is a spell error in the
codechecker review comment keyword
"""
previous_line_num = bug_line - 1
# No more line.
if previous_line_num < 1:
return []
source_line_comments = []
curr_suppress_comment = []
# Iterate over lines while it has comments or we reached
# the top of the file.
cstyle_end_found = False
while True:
source_line = util.get_linef(fp, previous_line_num)
# cpp style comment
is_comment = \
SourceCodeCommentHandler.__check_if_comment(source_line)
# cstyle commment
cstyle_start, cstyle_end = \
SourceCodeCommentHandler.__check_if_cstyle_comment(source_line)
if not is_comment and not cstyle_start and not cstyle_end:
if not cstyle_end_found:
# Not a comment
break
if not cstyle_end_found and cstyle_end:
cstyle_end_found = True
curr_suppress_comment.append(source_line)
has_any_marker = any(marker in source_line for marker
in self.source_code_comment_markers)
# It is a comment.
if has_any_marker:
rev = list(reversed(curr_suppress_comment))
orig_review_comment = ' '.join(rev)
if rev[0].strip().startswith('//'):
review_comment = orig_review_comment.replace('//', '')
else:
r_comment = []
for comment in rev:
comment = comment.strip()
comment = comment.replace('/*', '').replace('*/', '')
if comment.startswith('*'):
r_comment.append(comment[1:])
else:
r_comment.append(comment)
review_comment = ' '.join(r_comment).strip()
source_line_comment = self.__process_source_line_comment(
review_comment)
if source_line_comment:
source_line_comment.line = orig_review_comment
source_line_comments.append(source_line_comment)
else:
orig_review_comment = orig_review_comment.strip()
raise SpellException(
f"misspelled review status comment "
f"@{previous_line_num}: "
f"{orig_review_comment.strip()}")
curr_suppress_comment = []
if previous_line_num > 0:
previous_line_num -= 1
else:
break
if cstyle_start:
break
return source_line_comments
def filter_source_line_comments(
self,
fp: TextIO,
bug_line: int,
checker_name: str
) -> SourceCodeComments:
"""
This function filters the available source code comments for bug line
by the checker name and returns a list of source code comments.
Multiple cases are possible:
- If the checker name is specified in one of the source code comment
than this will be return.
E.g.: // codechecker_suppress [checker.name1] some comment
or
/* codechecker_suppress [checker.name1] some comment */
- If checker name is not specified explicitly in any source code
comment bug source code comment with checker name 'all' is
specified then this will be returned.
E.g.: // codechecker_suppress [all] some comment
or
/* codechecker_suppress [all] some comment */
- If multiple source code comments are specified with the same checker
name then multiple source code comments will be returned.
E.g.: // codechecker_suppress [checker.name1] some comment1
// codechecker_suppress [checker.name1, checker.name2] some
// comment1
or
/* codechecker_suppress [checker.name1] some comment1
codechecker_suppress [checker.name1, checker.name2] some
comment1 */
"""
source_line_comments = self.get_source_line_comments(fp, bug_line)
if not source_line_comments:
return []
checker_name_comments = []
for line_comment in source_line_comments:
for bug_name in line_comment.checkers:
if (bug_name in checker_name) or (bug_name == 'all'):
checker_name_comments.append(line_comment)
# More than one source code comment found for this line.
if not checker_name_comments:
LOG.debug("No source code comments are found for checker %s",
checker_name)
elif len(checker_name_comments) > 1:
LOG.debug("Multiple source code comment can be found for '%s' "
"checker at line %s.", checker_name, bug_line)
LOG.debug(checker_name_comments)
else:
LOG.debug("The following source code comment is found for"
"checker '%s': %s", checker_name,
checker_name_comments[0])
return checker_name_comments