Skip to content

Commit

Permalink
[analyzer] Generate reproducer
Browse files Browse the repository at this point in the history
CodeChecker analyze has been added a new flag: --generate-reproducer.
This creates .zip files under report_dir/reproducers which contains all
information for reproducing an analysis. The content of these .zip
files is the same as failed zips, but reproducers are generated even in
case of success.
  • Loading branch information
bruntib committed May 20, 2021
1 parent a682daf commit 7d06b04
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 28 deletions.
83 changes: 58 additions & 25 deletions analyzer/codechecker_analyzer/analysis_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def handle_success(rh, result_file, result_base, skip_handler,
skip_handler)


def handle_failure(source_analyzer, rh, zip_file, result_base, actions_map):
def handle_reproducer(source_analyzer, rh, zip_file, actions_map):
"""
If the analysis fails a debug zip is packed together which contains
build, analysis information and source files to be able to
Expand All @@ -256,8 +256,7 @@ def handle_failure(source_analyzer, rh, zip_file, result_base, actions_map):
action = rh.buildaction

try:
LOG.debug("Fetching other dependent files from analyzer "
"output...")
LOG.debug("Fetching other dependent files from analyzer output...")
other_files.update(
source_analyzer.get_analyzer_mentioned_files(
rh.analyzer_stdout))
Expand Down Expand Up @@ -315,8 +314,21 @@ def handle_failure(source_analyzer, rh, zip_file, result_base, actions_map):
if toolchain:
archive.writestr("gcc-toolchain-path", toolchain)

compiler_info_file = os.path.join(rh.workspace, 'compiler_info.json')
if os.path.exists(compiler_info_file):
archive.write(compiler_info_file, "compiler_info.json")

LOG.debug("ZIP file written at '%s'", zip_file)


def handle_failure(source_analyzer, rh, zip_file, result_base, actions_map):
"""
If the analysis fails a debug zip is packed together which contains
build, analysis information and source files to be able to
reproduce the failed analysis.
"""
handle_reproducer(source_analyzer, rh, zip_file, actions_map)

# In case of compiler errors the error message still needs to be collected
# from the standard output by this postprocess phase so we can present them
# as CodeChecker reports.
Expand Down Expand Up @@ -478,12 +490,13 @@ def check(check_data):
"""
actions_map, action, context, analyzer_config, \
output_dir, skip_handler, quiet_output_on_stdout, \
capture_analysis_output, analysis_timeout, \
capture_analysis_output, generate_reproducer, analysis_timeout, \
analyzer_environment, ctu_reanalyze_on_failure, \
output_dirs, statistics_data = check_data

failed_dir = output_dirs["failed"]
success_dir = output_dirs["success"]
reproducer_dir = output_dirs["reproducer"]

try:
# If one analysis fails the check fails.
Expand Down Expand Up @@ -555,37 +568,37 @@ def __create_timeout(analyzer_process):

ctu_active = is_ctu_active(source_analyzer)

ctu_suffix = '_CTU'
zip_suffix = ctu_suffix if ctu_active else ''
zip_suffix = '_CTU' if ctu_active else ''

failure_type = "_unknown"
if rh.analyzer_returncode == 1:
failure_type = "_compile_error"
elif rh.analyzer_returncode == 254:
failure_type = "_crash"
elif rh.analyzer_returncode == 0:
failure_type = ""

zip_file = result_base + zip_suffix + failure_type + '.zip'
zip_file = os.path.join(failed_dir, zip_file)

ctu_zip_file = result_base + ctu_suffix + failure_type + '.zip'
ctu_zip_file = os.path.join(failed_dir, ctu_zip_file)
failed_zip_file = os.path.join(failed_dir, zip_file)
reproducer_zip_file = os.path.join(reproducer_dir, zip_file)

return_codes = rh.analyzer_returncode

source_file_name = os.path.basename(action.source)

# Remove the previously generated error file.
if os.path.exists(zip_file):
os.remove(zip_file)

# Remove the previously generated CTU error file.
if os.path.exists(ctu_zip_file):
os.remove(ctu_zip_file)
# Remove the previously generated .zip files.
if os.path.exists(failed_zip_file):
os.remove(failed_zip_file)
if os.path.exists(reproducer_zip_file):
os.remove(reproducer_zip_file)

if rh.analyzer_returncode == 0:
handle_success(rh, result_file, result_base,
skip_handler, capture_analysis_output,
success_dir)
if generate_reproducer:
handle_reproducer(source_analyzer, rh, reproducer_zip_file,
actions_map)
LOG.info("[%d/%d] %s analyzed %s successfully.",
progress_checked_num.value, progress_actions.value,
action.analyzer_type, source_file_name)
Expand All @@ -603,8 +616,12 @@ def __create_timeout(analyzer_process):
LOG.error("\n%s", rh.analyzer_stdout)
LOG.error("\n%s", rh.analyzer_stderr)

handle_failure(source_analyzer, rh, zip_file, result_base,
actions_map)
if generate_reproducer:
handle_reproducer(source_analyzer, rh, reproducer_zip_file,
actions_map)
else:
handle_failure(source_analyzer, rh, failed_zip_file,
result_base, actions_map)

if ctu_active and ctu_reanalyze_on_failure:
LOG.error("Try to reanalyze without CTU")
Expand All @@ -630,6 +647,9 @@ def __create_timeout(analyzer_process):
handle_success(rh, result_file, result_base,
skip_handler, capture_analysis_output,
success_dir)
if generate_reproducer:
handle_reproducer(source_analyzer, rh,
reproducer_zip_file, actions_map)

LOG.info("[%d/%d] %s analyzed %s without"
" CTU successfully.",
Expand All @@ -647,10 +667,16 @@ def __create_timeout(analyzer_process):
LOG.error("Analyzing '%s' with %s without CTU failed.",
source_file_name, action.analyzer_type)

zip_file = result_base + '.zip'
zip_file = os.path.join(failed_dir, zip_file)
handle_failure(source_analyzer, rh, zip_file,
result_base, actions_map)
if generate_reproducer:
handle_reproducer(
source_analyzer, rh,
os.path.join(reproducer_dir, result_base + '.zip'),
actions_map)
else:
handle_failure(
source_analyzer, rh,
os.path.join(failed_dir, result_base + '.zip'),
result_base, actions_map)

collect_ctu_involved_files(rh, source_analyzer,
output_dirs['ctu_connections'])
Expand Down Expand Up @@ -704,8 +730,8 @@ def skip_cpp(compile_actions, skip_handler):

def start_workers(actions_map, actions, context, analyzer_config_map,
jobs, output_path, skip_handler, metadata_tool,
quiet_analyze, capture_analysis_output, timeout,
ctu_reanalyze_on_failure, statistics_data, manager,
quiet_analyze, capture_analysis_output, generate_reproducer,
timeout, ctu_reanalyze_on_failure, statistics_data, manager,
compile_cmd_count):
"""
Start the workers in the process pool.
Expand Down Expand Up @@ -740,13 +766,19 @@ def signal_handler(signum, frame):
if not os.path.exists(success_dir):
os.makedirs(success_dir)

# Similar to failed dir, but generated both in case of success and failure.
reproducer_dir = os.path.join(output_path, "reproducer")
if not os.path.exists(reproducer_dir) and generate_reproducer:
os.makedirs(reproducer_dir)

# Collect what other TUs were involved during CTU analysis.
ctu_connections_dir = os.path.join(output_path, "ctu_connections")
if not os.path.exists(ctu_connections_dir):
os.makedirs(ctu_connections_dir)

output_dirs = {'success': success_dir,
'failed': failed_dir,
'reproducer': reproducer_dir,
'ctu_connections': ctu_connections_dir}

# Construct analyzer env.
Expand All @@ -761,6 +793,7 @@ def signal_handler(signum, frame):
skip_handler,
quiet_analyze,
capture_analysis_output,
generate_reproducer,
timeout,
analyzer_environment,
ctu_reanalyze_on_failure,
Expand Down
1 change: 1 addition & 0 deletions analyzer/codechecker_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,7 @@ def perform_analysis(args, skip_handler, context, actions, metadata_tool,
metadata_tool,
'quiet' in args,
'capture_analysis_output' in args,
'generate_reproducer' in args,
args.timeout if 'timeout' in args
else None,
ctu_reanalyze_on_failure,
Expand Down
12 changes: 12 additions & 0 deletions analyzer/codechecker_analyzer/cmd/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,18 @@ def add_arguments_to_parser(parser):
"into the '<OUTPUT_DIR>/success' "
"directory.")

analyzer_opts.add_argument('--generate-reproducer',
dest='generate_reproducer',
action='store_true',
default=argparse.SUPPRESS,
required=False,
help="Collect all necessary information for "
"reproducing an analysis action. The "
"gathered files will be stored in a "
"folder named 'reproducer' under the "
"report directory. When this flag is "
"used, 'failed' directory remains empty.")

analyzer_opts.add_argument('--config',
dest='config_file',
required=False,
Expand Down
13 changes: 13 additions & 0 deletions analyzer/codechecker_analyzer/cmd/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,18 @@ def add_arguments_to_parser(parser):
"into the '<OUTPUT_DIR>/success' "
"directory.")

analyzer_opts.add_argument('--generate-reproducer',
dest='generate_reproducer',
action='store_true',
default=argparse.SUPPRESS,
required=False,
help="Collect all necessary information for "
"reproducing an analysis action. The "
"gathered files will be stored in a "
"folder named 'reproducer' under the "
"report directory. When this flag is "
"used, 'failed' directory remains empty.")

analyzer_opts.add_argument('--config',
dest='config_file',
required=False,
Expand Down Expand Up @@ -792,6 +804,7 @@ def __update_if_key_exists(source, target, key):
'analyzer_config',
'checker_config',
'capture_analysis_output',
'generate_reproducer',
'config_file',
'ctu_phases',
'ctu_reanalyze_on_failure',
Expand Down
71 changes: 70 additions & 1 deletion analyzer/tests/functional/analyze/test_analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,76 @@ def test_failure(self):
self.assertEqual(archived_code.read().decode("utf-8"),
source_code.read())

os.remove(os.path.join(failed_dir, failed_files[0]))
shutil.rmtree(failed_dir)

def test_reproducer(self):
"""
Test if reports/reproducer/<reproducer_file>.zip file is created
"""
build_json = os.path.join(self.test_workspace, "build.json")
reproducer_dir = os.path.join(self.report_dir, "reproducer")
source_file = os.path.join(self.test_dir, "failure.c")

# Create a compilation database.
build_log = [{"directory": self.test_workspace,
"command": "gcc -c " + source_file,
"file": source_file
}]

with open(build_json, 'w',
encoding="utf-8", errors="ignore") as outfile:
json.dump(build_log, outfile)

# Create and run analyze command.
analyze_cmd = [self._codechecker_cmd, "analyze", build_json,
"--analyzers", "clangsa", "--verbose", "debug",
"-o", self.report_dir, "--generate-reproducer", "-c"]

print(analyze_cmd)
process = subprocess.Popen(
analyze_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=self.test_dir,
encoding="utf-8",
errors="ignore")
out, err = process.communicate()

print(out)
print(err)
errcode = process.returncode
self.assertEqual(errcode, 3)
self.assertNotIn('failed', os.listdir(self.report_dir))

self.assertNotIn("UserWarning: Duplicate name", err)

# We expect a failure archive to be in the failed directory.
reproducer_files = os.listdir(reproducer_dir)
self.assertEqual(len(reproducer_files), 1)

fail_zip = os.path.join(reproducer_dir, reproducer_files[0])

with zipfile.ZipFile(fail_zip, 'r') as archive:
files = archive.namelist()

self.assertIn("build-action", files)
self.assertIn("analyzer-command", files)

with archive.open("build-action", 'r') as archived_buildcmd:
self.assertEqual(archived_buildcmd.read().decode("utf-8"),
"gcc -c " + source_file)

source_in_archive = os.path.join("sources-root",
source_file.lstrip('/'))
self.assertIn(source_in_archive, files)

with archive.open(source_in_archive, 'r') as archived_code:
with open(source_file, 'r',
encoding="utf-8", errors="ignore") as source_code:
self.assertEqual(archived_code.read().decode("utf-8"),
source_code.read())

shutil.rmtree(reproducer_dir)

def test_robustness_for_dependencygen_failure(self):
"""
Expand Down
18 changes: 16 additions & 2 deletions docs/analyzer/user_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ usage: CodeChecker check [-h] [-o OUTPUT_DIR] [-t {plist}] [-q]
[--report-hash {context-free,context-free-v2}]
[-i SKIPFILE | --file FILE [FILE ...]]
[--analyzers ANALYZER [ANALYZER ...]]
[--capture-analysis-output] [--config CONFIG_FILE]
[--capture-analysis-output] [--generate-reproducer]
[--config CONFIG_FILE]
[--saargs CLANGSA_ARGS_CFG_FILE]
[--tidyargs TIDY_ARGS_CFG_FILE]
[--tidy-config TIDY_CONFIG]
Expand Down Expand Up @@ -220,6 +221,12 @@ analyzer arguments:
Store standard output and standard error of successful
analyzer invocations into the '<OUTPUT_DIR>/success'
directory.
--generate-reproducer
Collect all necessary information for reproducing an
analysis action. The gathered files will be stored in a
folder named 'reproducer' under the report directory.
When this flag is used, 'failed' directory remains
empty.
--config CONFIG_FILE Allow the configuration from an explicit JSON based
configuration file. The value of the 'analyzer' key in
the config file will be emplaced as command line
Expand Down Expand Up @@ -813,7 +820,8 @@ usage: CodeChecker analyze [-h] [-j JOBS]
[--report-hash {context-free,context-free-v2}]
[-n NAME] [--analyzers ANALYZER [ANALYZER ...]]
[--add-compiler-defaults]
[--capture-analysis-output] [--config CONFIG_FILE]
[--capture-analysis-output] [--generate-reproducer]
[--config CONFIG_FILE]
[--saargs CLANGSA_ARGS_CFG_FILE]
[--tidyargs TIDY_ARGS_CFG_FILE]
[--tidy-config TIDY_CONFIG] [--timeout TIMEOUT]
Expand Down Expand Up @@ -1005,6 +1013,12 @@ analyzer arguments:
Store standard output and standard error of successful
analyzer invocations into the '<OUTPUT_DIR>/success'
directory.
--generate-reproducer
Collect all necessary information for reproducing an
analysis action. The gathered files will be stored in a
folder named 'reproducer' under the report directory.
When this flag is used, 'failed' directory remains
empty.
--config CONFIG_FILE Allow the configuration from an explicit JSON based
configuration file. The value of the 'analyzer' key in
the config file will be emplaced as command line
Expand Down

0 comments on commit 7d06b04

Please sign in to comment.