Skip to content

Commit

Permalink
Merge pull request #3402 from csordasmarton/diagnostic_message_hash
Browse files Browse the repository at this point in the history
[analyzer] Create new diagnostic message hash
  • Loading branch information
csordasmarton authored Sep 2, 2021
2 parents b0db093 + 6e9d946 commit 5aec531
Show file tree
Hide file tree
Showing 11 changed files with 242 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,12 @@ def postprocess_result(self):
Override the context sensitive issue hash in the plist files to
context insensitive if it is enabled during analysis.
"""
if self.report_hash_type in ['context-free', 'context-free-v2'] and \
os.path.exists(self.analyzer_result_file):
replace_report_hash(self.analyzer_result_file,
HashType.CONTEXT_FREE)
if os.path.exists(self.analyzer_result_file):
if self.report_hash_type in ['context-free', 'context-free-v2']:
replace_report_hash(
self.analyzer_result_file,
HashType.CONTEXT_FREE)
elif self.report_hash_type == 'diagnostic-message':
replace_report_hash(
self.analyzer_result_file,
HashType.DIAGNOSTIC_MESSAGE)
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,5 @@ def postprocess_result(self):
# context free hash for 'context-free' choice.
if self.report_hash_type == 'context-free-v2':
replace_report_hash(output_file, HashType.CONTEXT_FREE)
elif self.report_hash_type == 'diagnostic-message':
replace_report_hash(output_file, HashType.DIAGNOSTIC_MESSAGE)
15 changes: 14 additions & 1 deletion analyzer/codechecker_analyzer/cmd/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@
from the source content are removed.
* 'column numbers' from the main diag sections location.
- diagnostic-message:
* Same as 'context-free-v2' (file name, checker message etc.)
* 'bug step messages' from all events.
Be careful with this hash because it can change easily for example on
variable / function renames.
OUR RECOMMENDATION: we recommend you to use 'context-free-v2' hash because the
hash will not be changed so easily for example on code indentation or when a
checker is renamed.
Expand Down Expand Up @@ -289,7 +296,10 @@ def add_arguments_to_parser(parser):
dest="report_hash",
default=argparse.SUPPRESS,
required=False,
choices=['context-free', 'context-free-v2'],
choices=[
'context-free',
'context-free-v2',
'diagnostic-message'],
help="R|Specify the hash calculation method for "
"reports. By default the calculation method for "
"Clang Static Analyzer is context sensitive and "
Expand All @@ -300,6 +310,9 @@ def add_arguments_to_parser(parser):
"(kept for backward compatibility).\n"
"- context-free-v2: context free hash is used "
"for ClangSA and Clang Tidy.\n"
"- diagnostic-message: context free hash with "
"bug step messages is used for ClangSA and "
"Clang Tidy.\n"
"See the 'issue hashes' section of the help "
"message of this command below for more "
"information.\n"
Expand Down
8 changes: 7 additions & 1 deletion analyzer/codechecker_analyzer/cmd/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,10 @@ def add_arguments_to_parser(parser):
dest="report_hash",
default=argparse.SUPPRESS,
required=False,
choices=['context-free', 'context-free-v2'],
choices=[
'context-free',
'context-free-v2',
'diagnostic-message'],
help="R|Specify the hash calculation method "
"for reports. By default the calculation "
"method for Clang Static Analyzer is "
Expand All @@ -246,6 +249,9 @@ def add_arguments_to_parser(parser):
"compatibility).\n"
"- context-free-v2: context free hash is "
"used for ClangSA and Clang Tidy.\n"
"- diagnostic-message: context free hash "
"with bug step messages is used for "
"ClangSA and Clang Tidy.\n"
"See the 'issue hashes' section of the "
"help message of this command below for "
"more information.\n"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
NORMAL#CodeChecker log --output $LOGFILE$ --build "make context_hash" --quiet
NORMAL#CodeChecker analyze $LOGFILE$ --output $OUTPUT$ --report-hash=diagnostic-message --analyzers clang-tidy
NORMAL#CodeChecker parse $OUTPUT$ --print-steps
CHECK#CodeChecker check --build "make context_hash" --output $OUTPUT$ --quiet --print-steps --report-hash=diagnostic-message --analyzers clang-tidy
--------------------------------------------------------------------------------
[] - Starting build ...
[] - Build finished successfully.
[] - Starting static analysis ...
[] - [1/1] clang-tidy analyzed context_hash.cpp successfully.
[] - ----==== Summary ====----
[] - Successfully analyzed
[] - clang-tidy: 1
[] - Total analyzed compilation commands: 1
[] - ----=================----
[] - Analysis finished.
[] - To view results in the terminal use the "CodeChecker parse" command.
[] - To store results use the "CodeChecker store" command.
[] - See --help and the user guide for further options about parsing and storing the reports.
[] - ----=================----
[HIGH] context_hash.cpp:23:23: suspicious usage of 'sizeof(K)'; did you mean 'K'? [bugprone-sizeof-expression]
std::memset(buf, 0, sizeof(BUFLEN)); // sizeof(42) ==> sizeof(int)
^
Report hash: 449e90b974072a5fc5f5b97e2bae4a3f
Steps:
1, context_hash.cpp:23:23: suspicious usage of 'sizeof(K)'; did you mean 'K'?

[HIGH] context_hash.cpp:34:23: suspicious usage of 'sizeof(K)'; did you mean 'K'? [bugprone-sizeof-expression]
std::memset(buf, 0, sizeof(BUFLEN)); // sizeof(42) ==> sizeof(int)
^
Report hash: 449e90b974072a5fc5f5b97e2bae4a3f
Steps:
1, context_hash.cpp:34:23: suspicious usage of 'sizeof(K)'; did you mean 'K'?

[HIGH] context_hash.cpp:37:21: suspicious usage of 'sizeof(K)'; did you mean 'K'? [bugprone-sizeof-expression]
std::memset(buf, 0, sizeof(BUFLEN)); // sizeof(42) ==> sizeof(int)
^
Report hash: 449e90b974072a5fc5f5b97e2bae4a3f
Steps:
1, context_hash.cpp:37:21: suspicious usage of 'sizeof(K)'; did you mean 'K'?

Found 3 defect(s) in context_hash.cpp


----==== Summary ====----
-------------------------------
Filename | Report count
-------------------------------
context_hash.cpp | 3
-------------------------------
-----------------------
Severity | Report count
-----------------------
HIGH | 3
-----------------------
----=================----
Total number of reports: 3
----=================----
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
NORMAL#CodeChecker log --output $LOGFILE$ --build "make context_hash" --quiet
NORMAL#CodeChecker analyze $LOGFILE$ --output $OUTPUT$ --report-hash=diagnostic-message --analyzers clangsa
NORMAL#CodeChecker parse $OUTPUT$ --print-steps
CHECK#CodeChecker check --build "make context_hash" --output $OUTPUT$ --quiet --print-steps --report-hash=diagnostic-message --analyzers clangsa
--------------------------------------------------------------------------------
[] - Starting build ...
[] - Build finished successfully.
[] - Starting static analysis ...
[] - [1/1] clangsa analyzed context_hash.cpp successfully.
[] - ----==== Summary ====----
[] - Successfully analyzed
[] - clangsa: 1
[] - Total analyzed compilation commands: 1
[] - ----=================----
[] - Analysis finished.
[] - To view results in the terminal use the "CodeChecker parse" command.
[] - To store results use the "CodeChecker store" command.
[] - See --help and the user guide for further options about parsing and storing the reports.
[] - ----=================----
[LOW] context_hash.cpp:8:3: Value stored to 'x' is never read [deadcode.DeadStores]
x = 1;
^
Report hash: d101ee6221bb0ee2294619731f8a37de
Steps:
1, context_hash.cpp:8:3: Value stored to 'x' is never read

[LOW] context_hash.cpp:13:3: Value stored to 'x' is never read [deadcode.DeadStores]
x = 1;
^
Report hash: d101ee6221bb0ee2294619731f8a37de
Steps:
1, context_hash.cpp:13:3: Value stored to 'x' is never read

[LOW] context_hash.cpp:19:3: Value stored to 'z' is never read [deadcode.DeadStores]
z = 1;
^
Report hash: 9bf6918cf70cc58a37ea556c4afed103
Steps:
1, context_hash.cpp:19:3: Value stored to 'z' is never read

Found 3 defect(s) in context_hash.cpp


----==== Summary ====----
-------------------------------
Filename | Report count
-------------------------------
context_hash.cpp | 3
-------------------------------
-----------------------
Severity | Report count
-----------------------
LOW | 3
-----------------------
----=================----
Total number of reports: 3
----=================----
10 changes: 10 additions & 0 deletions docs/analyzer/report_identification.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,13 @@ The hashes are calculated based on the following information:
- **content of the line** where the bug is if it can be read up. All the
whitespaces from the source content are removed.
- **range column numbers** where the bug is.

#### diagnostic-message
The hashes are calculated based on the following information:
- Same as `context-free-v2` (**file name**, **checker message** etc.)
- **bug step messages** (e.g.: *Calling 'foo'*, *Entered call from 'main'*
etc.).

**Note**: this is an experimental hash and it is not recommended to use it on
your project because this hash can change very easily for example on variable /
function renames.
19 changes: 15 additions & 4 deletions docs/analyzer/user_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ usage: CodeChecker check [-h] [-o OUTPUT_DIR] [-t {plist}] [-q]
[--keep-gcc-include-fixed] [--keep-gcc-intrin]
(-b COMMAND | -l LOGFILE) [-j JOBS] [-c]
[--compile-uniqueing COMPILE_UNIQUEING]
[--report-hash {context-free,context-free-v2}]
[--report-hash {context-free,context-free-v2,diagnostic-message}]
[-i SKIPFILE | --file FILE [FILE ...]]
[--analyzers ANALYZER [ANALYZER ...]]
[--capture-analysis-output] [--generate-reproducer]
Expand Down Expand Up @@ -189,7 +189,7 @@ analyzer arguments:
directory. (By default, CodeChecker would keep reports
and overwrites only those files that were update by
the current build command).
--report-hash {context-free,context-free-v2}
--report-hash {context-free,context-free-v2,diagnostic-message}
Specify the hash calculation method for reports. By
default the calculation method for Clang Static
Analyzer is context sensitive and for Clang Tidy it is
Expand All @@ -200,6 +200,8 @@ analyzer arguments:
compatibility).
- context-free-v2: context free hash is used for
ClangSA and Clang Tidy.
- diagnostic-message: context free hash with bug step
messages is used for ClangSA and Clang Tidy.
See the 'issue hashes' section of the help message of
this command below for more information.
USE WISELY AND AT YOUR OWN RISK!
Expand Down Expand Up @@ -506,6 +508,13 @@ generated and not the context free hash (kept for backward compatibility). Use
from the source content are removed.
* 'column numbers' from the main diag sections location.
- diagnostic-message:
* Same as 'context-free-v2' (file name, checker message etc.)
* 'bug step messages' from all events.
Be careful with this hash because it can change easily for example on
variable / function renames.
OUR RECOMMENDATION: we recommend you to use 'context-free-v2' hash because the
hash will not be changed so easily for example on code indentation or when a
checker is renamed.
Expand Down Expand Up @@ -819,7 +828,7 @@ usage: CodeChecker analyze [-h] [-j JOBS]
[--keep-gcc-include-fixed] [--keep-gcc-intrin]
[-t {plist}] [-q] [-c]
[--compile-uniqueing COMPILE_UNIQUEING]
[--report-hash {context-free,context-free-v2}]
[--report-hash {context-free,context-free-v2,diagnostic-message}]
[-n NAME] [--analyzers ANALYZER [ANALYZER ...]]
[--add-compiler-defaults]
[--capture-analysis-output] [--generate-reproducer]
Expand Down Expand Up @@ -909,7 +918,7 @@ optional arguments:
python regex. If more than one matches an error is
given. The whole compilation action text is searched
for match. (default: none)
--report-hash {context-free,context-free-v2}
--report-hash {context-free,context-free-v2,diagnostic-message}
Specify the hash calculation method for reports. By
default the calculation method for Clang Static
Analyzer is context sensitive and for Clang Tidy it is
Expand All @@ -920,6 +929,8 @@ optional arguments:
compatibility).
- context-free-v2: context free hash is used for
ClangSA and Clang Tidy.
- diagnostic-message: context free hash with bug step
messages is used for ClangSA and Clang Tidy.
See the 'issue hashes' section of the help message of
this command below for more information.
USE WISELY AND AT YOUR OWN RISK!
Expand Down
13 changes: 13 additions & 0 deletions docs/tools/codechecker_report_hash.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Multiple hash types are available:
- [`CONTEXT_FREE`](#generate-path-sensitive-report-hash)
- [`PATH_SENSITIVE`](#generate-context-sensitive-report-hash)
- [`DIAGNOSTIC_MESSAGE`](#generate-diagnostic-message-hash)

You can use this library to generate report hash for these types by using the
`get_report_hash` function.
Expand Down Expand Up @@ -36,6 +37,18 @@ High level overview of the hash content:
from the source content are removed.
* `column numbers` from the main diag sections location.

### Generate diagnostic message hash
`get_report_hash` function can be used to generate report hash with bug event
messages if the hash type parameter is `DIAGNOSTIC_MESSAGE`.

High level overview of the hash content:
* Same as `context-free-v2` (*file name*, *checker message* etc.)
* `bug step messages` from all events.

**Note**: this is an experimental hash and it is not recommended to use it on
your project because this hash can change very easily for example on variable /
function renames.

### Generate path hash
`get_report_path_hash` can be used to get path hash for the given bug path
which can be used to filter deduplications of multiple reports.
Expand Down
46 changes: 37 additions & 9 deletions tools/codechecker_report_hash/codechecker_report_hash/hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class HashType(Enum):
""" Report hash types. """
CONTEXT_FREE = 1
PATH_SENSITIVE = 2
DIAGNOSTIC_MESSAGE = 3


def __get_line(file_path: str, line_no: int, errors: str = 'ignore') -> str:
Expand Down Expand Up @@ -121,7 +122,7 @@ def _remove_whitespace(line_content: str, old_col: int) -> Tuple[str, int]:
old_col - line_strip_len


def __get_report_hash_path_sensitive(diag: Diag, file_path: str) -> str:
def __get_report_hash_path_sensitive(diag: Diag, file_path: str) -> List[str]:
""" Report hash generation from the given diagnostic.
Hash generation algorithm for older plist versions where no
Expand Down Expand Up @@ -237,15 +238,14 @@ def compare_ctrl_sections(
col_num = loc['col']
hash_content.append(str(col_num))

return __str_to_hash('|||'.join(hash_content))

return hash_content
except Exception as ex:
LOG.error("Hash generation failed")
LOG.error(ex)
return ''
return []


def __get_report_hash_context_free(diag: Diag, file_path: str) -> str:
def __get_report_hash_context_free(diag: Diag, file_path: str) -> List[str]:
""" Generate report hash without bug path.
!!! NOT Compatible with the old hash generation method
Expand Down Expand Up @@ -290,23 +290,51 @@ def __get_report_hash_context_free(diag: Diag, file_path: str) -> str:
str(from_col),
str(until_col)]

return __str_to_hash('|||'.join(hash_content))
return hash_content
except Exception as ex:
LOG.error("Hash generation failed")
LOG.error(ex)
return []


def __get_report_hash_diagnostic_message(
diag: Diag,
file_path: str
) -> List[str]:
""" Generate report hash with bug path messages.
The hash will contain the same information as the CONTEXT_FREE hash +
'bug step messages' from events.
"""
try:
hash_content = __get_report_hash_context_free(diag, file_path)

# Add bug step messages to the hash.
for event in [x for x in diag['path'] if x.get('kind') == 'event']:
hash_content.append(event['message'])

return hash_content
except Exception as ex:
LOG.error("Hash generation failed")
LOG.error(ex)
return ''
return []


def get_report_hash(diag: Diag, file_path: str, hash_type: HashType) -> str:
""" Get report hash for the given diagnostic. """
hash_content = None

if hash_type == HashType.CONTEXT_FREE:
return __get_report_hash_context_free(diag, file_path)
hash_content = __get_report_hash_context_free(diag, file_path)
elif hash_type == HashType.PATH_SENSITIVE:
return __get_report_hash_path_sensitive(diag, file_path)
hash_content = __get_report_hash_path_sensitive(diag, file_path)
elif hash_type == HashType.DIAGNOSTIC_MESSAGE:
hash_content = __get_report_hash_diagnostic_message(diag, file_path)
else:
raise Exception("Invalid report hash type: " + str(hash_type))

return __str_to_hash('|||'.join(hash_content))


def get_report_path_hash(report) -> str:
""" Returns path hash for the given bug path.
Expand Down
Loading

0 comments on commit 5aec531

Please sign in to comment.