[analyzer] Generate reproducer

CodeChecker analyze has been added a new flag: --generate-reproducer. This creates .zip files under report_dir/reproducers which contains all information for reproducing an analysis. The content of these .zip files is the same as failed zips, but reproducers are generated even in case of success.
Ericsson · May 20, 2021 · 7d06b04 · 7d06b04
1 parent a682daf
commit 7d06b04
Show file tree

Hide file tree

Showing 6 changed files with 170 additions and 28 deletions.
diff --git a/analyzer/codechecker_analyzer/analysis_manager.py b/analyzer/codechecker_analyzer/analysis_manager.py
@@ -246,7 +246,7 @@ def handle_success(rh, result_file, result_base, skip_handler,
                                             skip_handler)
 
 
-def handle_failure(source_analyzer, rh, zip_file, result_base, actions_map):
+def handle_reproducer(source_analyzer, rh, zip_file, actions_map):
     """
     If the analysis fails a debug zip is packed together which contains
     build, analysis information and source files to be able to
@@ -256,8 +256,7 @@ def handle_failure(source_analyzer, rh, zip_file, result_base, actions_map):
     action = rh.buildaction
 
     try:
-        LOG.debug("Fetching other dependent files from analyzer "
-                  "output...")
+        LOG.debug("Fetching other dependent files from analyzer output...")
         other_files.update(
             source_analyzer.get_analyzer_mentioned_files(
                 rh.analyzer_stdout))
@@ -315,8 +314,21 @@ def handle_failure(source_analyzer, rh, zip_file, result_base, actions_map):
         if toolchain:
             archive.writestr("gcc-toolchain-path", toolchain)
 
+        compiler_info_file = os.path.join(rh.workspace, 'compiler_info.json')
+        if os.path.exists(compiler_info_file):
+            archive.write(compiler_info_file, "compiler_info.json")
+
     LOG.debug("ZIP file written at '%s'", zip_file)
 
+
+def handle_failure(source_analyzer, rh, zip_file, result_base, actions_map):
+    """
+    If the analysis fails a debug zip is packed together which contains
+    build, analysis information and source files to be able to
+    reproduce the failed analysis.
+    """
+    handle_reproducer(source_analyzer, rh, zip_file, actions_map)
+
     # In case of compiler errors the error message still needs to be collected
     # from the standard output by this postprocess phase so we can present them
     # as CodeChecker reports.
@@ -478,12 +490,13 @@ def check(check_data):
     """
     actions_map, action, context, analyzer_config, \
         output_dir, skip_handler, quiet_output_on_stdout, \
-        capture_analysis_output, analysis_timeout, \
+        capture_analysis_output, generate_reproducer, analysis_timeout, \
         analyzer_environment, ctu_reanalyze_on_failure, \
         output_dirs, statistics_data = check_data
 
     failed_dir = output_dirs["failed"]
     success_dir = output_dirs["success"]
+    reproducer_dir = output_dirs["reproducer"]
 
     try:
         # If one analysis fails the check fails.
@@ -555,37 +568,37 @@ def __create_timeout(analyzer_process):
 
         ctu_active = is_ctu_active(source_analyzer)
 
-        ctu_suffix = '_CTU'
-        zip_suffix = ctu_suffix if ctu_active else ''
+        zip_suffix = '_CTU' if ctu_active else ''
 
         failure_type = "_unknown"
         if rh.analyzer_returncode == 1:
             failure_type = "_compile_error"
         elif rh.analyzer_returncode == 254:
             failure_type = "_crash"
+        elif rh.analyzer_returncode == 0:
+            failure_type = ""
 
         zip_file = result_base + zip_suffix + failure_type + '.zip'
-        zip_file = os.path.join(failed_dir, zip_file)
-
-        ctu_zip_file = result_base + ctu_suffix + failure_type + '.zip'
-        ctu_zip_file = os.path.join(failed_dir, ctu_zip_file)
+        failed_zip_file = os.path.join(failed_dir, zip_file)
+        reproducer_zip_file = os.path.join(reproducer_dir, zip_file)
 
         return_codes = rh.analyzer_returncode
 
         source_file_name = os.path.basename(action.source)
 
-        # Remove the previously generated error file.
-        if os.path.exists(zip_file):
-            os.remove(zip_file)
-
-        # Remove the previously generated CTU error file.
-        if os.path.exists(ctu_zip_file):
-            os.remove(ctu_zip_file)
+        # Remove the previously generated .zip files.
+        if os.path.exists(failed_zip_file):
+            os.remove(failed_zip_file)
+        if os.path.exists(reproducer_zip_file):
+            os.remove(reproducer_zip_file)
 
         if rh.analyzer_returncode == 0:
             handle_success(rh, result_file, result_base,
                            skip_handler, capture_analysis_output,
                            success_dir)
+            if generate_reproducer:
+                handle_reproducer(source_analyzer, rh, reproducer_zip_file,
+                                  actions_map)
             LOG.info("[%d/%d] %s analyzed %s successfully.",
                      progress_checked_num.value, progress_actions.value,
                      action.analyzer_type, source_file_name)
@@ -603,8 +616,12 @@ def __create_timeout(analyzer_process):
                 LOG.error("\n%s", rh.analyzer_stdout)
                 LOG.error("\n%s", rh.analyzer_stderr)
 
-            handle_failure(source_analyzer, rh, zip_file, result_base,
-                           actions_map)
+            if generate_reproducer:
+                handle_reproducer(source_analyzer, rh, reproducer_zip_file,
+                                  actions_map)
+            else:
+                handle_failure(source_analyzer, rh, failed_zip_file,
+                               result_base, actions_map)
 
             if ctu_active and ctu_reanalyze_on_failure:
                 LOG.error("Try to reanalyze without CTU")
@@ -630,6 +647,9 @@ def __create_timeout(analyzer_process):
                     handle_success(rh, result_file, result_base,
                                    skip_handler, capture_analysis_output,
                                    success_dir)
+                    if generate_reproducer:
+                        handle_reproducer(source_analyzer, rh,
+                                          reproducer_zip_file, actions_map)
 
                     LOG.info("[%d/%d] %s analyzed %s without"
                              " CTU successfully.",
@@ -647,10 +667,16 @@ def __create_timeout(analyzer_process):
                     LOG.error("Analyzing '%s' with %s without CTU failed.",
                               source_file_name, action.analyzer_type)
 
-                    zip_file = result_base + '.zip'
-                    zip_file = os.path.join(failed_dir, zip_file)
-                    handle_failure(source_analyzer, rh, zip_file,
-                                   result_base, actions_map)
+                    if generate_reproducer:
+                        handle_reproducer(
+                            source_analyzer, rh,
+                            os.path.join(reproducer_dir, result_base + '.zip'),
+                            actions_map)
+                    else:
+                        handle_failure(
+                            source_analyzer, rh,
+                            os.path.join(failed_dir, result_base + '.zip'),
+                            result_base, actions_map)
 
         collect_ctu_involved_files(rh, source_analyzer,
                                    output_dirs['ctu_connections'])
@@ -704,8 +730,8 @@ def skip_cpp(compile_actions, skip_handler):
 
 def start_workers(actions_map, actions, context, analyzer_config_map,
                   jobs, output_path, skip_handler, metadata_tool,
-                  quiet_analyze, capture_analysis_output, timeout,
-                  ctu_reanalyze_on_failure, statistics_data, manager,
+                  quiet_analyze, capture_analysis_output, generate_reproducer,
+                  timeout, ctu_reanalyze_on_failure, statistics_data, manager,
                   compile_cmd_count):
     """
     Start the workers in the process pool.
@@ -740,13 +766,19 @@ def signal_handler(signum, frame):
     if not os.path.exists(success_dir):
         os.makedirs(success_dir)
 
+    # Similar to failed dir, but generated both in case of success and failure.
+    reproducer_dir = os.path.join(output_path, "reproducer")
+    if not os.path.exists(reproducer_dir) and generate_reproducer:
+        os.makedirs(reproducer_dir)
+
     # Collect what other TUs were involved during CTU analysis.
     ctu_connections_dir = os.path.join(output_path, "ctu_connections")
     if not os.path.exists(ctu_connections_dir):
         os.makedirs(ctu_connections_dir)
 
     output_dirs = {'success': success_dir,
                    'failed': failed_dir,
+                   'reproducer': reproducer_dir,
                    'ctu_connections': ctu_connections_dir}
 
     # Construct analyzer env.
@@ -761,6 +793,7 @@ def signal_handler(signum, frame):
                          skip_handler,
                          quiet_analyze,
                          capture_analysis_output,
+                         generate_reproducer,
                          timeout,
                          analyzer_environment,
                          ctu_reanalyze_on_failure,

diff --git a/analyzer/codechecker_analyzer/analyzer.py b/analyzer/codechecker_analyzer/analyzer.py
@@ -351,6 +351,7 @@ def perform_analysis(args, skip_handler, context, actions, metadata_tool,
                                        metadata_tool,
                                        'quiet' in args,
                                        'capture_analysis_output' in args,
+                                       'generate_reproducer' in args,
                                        args.timeout if 'timeout' in args
                                        else None,
                                        ctu_reanalyze_on_failure,

diff --git a/analyzer/codechecker_analyzer/cmd/analyze.py b/analyzer/codechecker_analyzer/cmd/analyze.py
@@ -343,6 +343,18 @@ def add_arguments_to_parser(parser):
                                     "into the '<OUTPUT_DIR>/success' "
                                     "directory.")
 
+    analyzer_opts.add_argument('--generate-reproducer',
+                               dest='generate_reproducer',
+                               action='store_true',
+                               default=argparse.SUPPRESS,
+                               required=False,
+                               help="Collect all necessary information for "
+                                    "reproducing an analysis action. The "
+                                    "gathered files will be stored in a "
+                                    "folder named 'reproducer' under the "
+                                    "report directory. When this flag is "
+                                    "used, 'failed' directory remains empty.")
+
     analyzer_opts.add_argument('--config',
                                dest='config_file',
                                required=False,

diff --git a/analyzer/codechecker_analyzer/cmd/check.py b/analyzer/codechecker_analyzer/cmd/check.py
@@ -298,6 +298,18 @@ def add_arguments_to_parser(parser):
                                     "into the '<OUTPUT_DIR>/success' "
                                     "directory.")
 
+    analyzer_opts.add_argument('--generate-reproducer',
+                               dest='generate_reproducer',
+                               action='store_true',
+                               default=argparse.SUPPRESS,
+                               required=False,
+                               help="Collect all necessary information for "
+                                    "reproducing an analysis action. The "
+                                    "gathered files will be stored in a "
+                                    "folder named 'reproducer' under the "
+                                    "report directory. When this flag is "
+                                    "used, 'failed' directory remains empty.")
+
     analyzer_opts.add_argument('--config',
                                dest='config_file',
                                required=False,
@@ -792,6 +804,7 @@ def __update_if_key_exists(source, target, key):
                           'analyzer_config',
                           'checker_config',
                           'capture_analysis_output',
+                          'generate_reproducer',
                           'config_file',
                           'ctu_phases',
                           'ctu_reanalyze_on_failure',

diff --git a/analyzer/tests/functional/analyze/test_analyze.py b/analyzer/tests/functional/analyze/test_analyze.py
@@ -351,7 +351,76 @@ def test_failure(self):
                     self.assertEqual(archived_code.read().decode("utf-8"),
                                      source_code.read())
 
-        os.remove(os.path.join(failed_dir, failed_files[0]))
+        shutil.rmtree(failed_dir)
+
+    def test_reproducer(self):
+        """
+        Test if reports/reproducer/<reproducer_file>.zip file is created
+        """
+        build_json = os.path.join(self.test_workspace, "build.json")
+        reproducer_dir = os.path.join(self.report_dir, "reproducer")
+        source_file = os.path.join(self.test_dir, "failure.c")
+
+        # Create a compilation database.
+        build_log = [{"directory": self.test_workspace,
+                      "command": "gcc -c " + source_file,
+                      "file": source_file
+                      }]
+
+        with open(build_json, 'w',
+                  encoding="utf-8", errors="ignore") as outfile:
+            json.dump(build_log, outfile)
+
+        # Create and run analyze command.
+        analyze_cmd = [self._codechecker_cmd, "analyze", build_json,
+                       "--analyzers", "clangsa", "--verbose", "debug",
+                       "-o", self.report_dir, "--generate-reproducer", "-c"]
+
+        print(analyze_cmd)
+        process = subprocess.Popen(
+            analyze_cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            cwd=self.test_dir,
+            encoding="utf-8",
+            errors="ignore")
+        out, err = process.communicate()
+
+        print(out)
+        print(err)
+        errcode = process.returncode
+        self.assertEqual(errcode, 3)
+        self.assertNotIn('failed', os.listdir(self.report_dir))
+
+        self.assertNotIn("UserWarning: Duplicate name", err)
+
+        # We expect a failure archive to be in the failed directory.
+        reproducer_files = os.listdir(reproducer_dir)
+        self.assertEqual(len(reproducer_files), 1)
+
+        fail_zip = os.path.join(reproducer_dir, reproducer_files[0])
+
+        with zipfile.ZipFile(fail_zip, 'r') as archive:
+            files = archive.namelist()
+
+            self.assertIn("build-action", files)
+            self.assertIn("analyzer-command", files)
+
+            with archive.open("build-action", 'r') as archived_buildcmd:
+                self.assertEqual(archived_buildcmd.read().decode("utf-8"),
+                                 "gcc -c " + source_file)
+
+            source_in_archive = os.path.join("sources-root",
+                                             source_file.lstrip('/'))
+            self.assertIn(source_in_archive, files)
+
+            with archive.open(source_in_archive, 'r') as archived_code:
+                with open(source_file, 'r',
+                          encoding="utf-8", errors="ignore") as source_code:
+                    self.assertEqual(archived_code.read().decode("utf-8"),
+                                     source_code.read())
+
+        shutil.rmtree(reproducer_dir)
 
     def test_robustness_for_dependencygen_failure(self):
         """

diff --git a/docs/analyzer/user_guide.md b/docs/analyzer/user_guide.md
@@ -99,7 +99,8 @@ usage: CodeChecker check [-h] [-o OUTPUT_DIR] [-t {plist}] [-q]
                          [--report-hash {context-free,context-free-v2}]
                          [-i SKIPFILE | --file FILE [FILE ...]]
                          [--analyzers ANALYZER [ANALYZER ...]]
-                         [--capture-analysis-output] [--config CONFIG_FILE]
+                         [--capture-analysis-output] [--generate-reproducer]
+                         [--config CONFIG_FILE]
                          [--saargs CLANGSA_ARGS_CFG_FILE]
                          [--tidyargs TIDY_ARGS_CFG_FILE]
                          [--tidy-config TIDY_CONFIG]
@@ -220,6 +221,12 @@ analyzer arguments:
                         Store standard output and standard error of successful
                         analyzer invocations into the '<OUTPUT_DIR>/success'
                         directory.
+  --generate-reproducer
+                        Collect all necessary information for reproducing an
+                        analysis action. The gathered files will be stored in a
+                        folder named 'reproducer' under the report directory.
+                        When this flag is used, 'failed' directory remains
+                        empty.
   --config CONFIG_FILE  Allow the configuration from an explicit JSON based
                         configuration file. The value of the 'analyzer' key in
                         the config file will be emplaced as command line
@@ -813,7 +820,8 @@ usage: CodeChecker analyze [-h] [-j JOBS]
                            [--report-hash {context-free,context-free-v2}]
                            [-n NAME] [--analyzers ANALYZER [ANALYZER ...]]
                            [--add-compiler-defaults]
-                           [--capture-analysis-output] [--config CONFIG_FILE]
+                           [--capture-analysis-output] [--generate-reproducer]
+                           [--config CONFIG_FILE]
                            [--saargs CLANGSA_ARGS_CFG_FILE]
                            [--tidyargs TIDY_ARGS_CFG_FILE]
                            [--tidy-config TIDY_CONFIG] [--timeout TIMEOUT]
@@ -1005,6 +1013,12 @@ analyzer arguments:
                         Store standard output and standard error of successful
                         analyzer invocations into the '<OUTPUT_DIR>/success'
                         directory.
+  --generate-reproducer
+                        Collect all necessary information for reproducing an
+                        analysis action. The gathered files will be stored in a
+                        folder named 'reproducer' under the report directory.
+                        When this flag is used, 'failed' directory remains
+                        empty.
   --config CONFIG_FILE  Allow the configuration from an explicit JSON based
                         configuration file. The value of the 'analyzer' key in
                         the config file will be emplaced as command line