Ericsson · bruntib · Jan 21, 2022 · Jan 13, 2022
@@ -300,7 +300,10 @@ def handle_reproducer(source_analyzer, rh, zip_file, actions_map):
 
         LOG.debug("[ZIP] Writing extra information...")
         archive.writestr("build-action", action.original_command)
-        archive.writestr("analyzer-command", ' '.join(rh.analyzer_cmd))
+        archive.writestr(
+            "analyzer-command",
+            ' '.join([shlex.quote(x) for x in rh.analyzer_cmd]),
+            )
         archive.writestr("return-code", str(rh.analyzer_returncode))
 
         toolchain = gcc_toolchain.toolchain_in_args(

@@ -15,6 +15,7 @@
 import signal
 import subprocess
 import sys
+import shlex
 
 from codechecker_common.logger import get_logger
 
@@ -82,7 +83,8 @@ def analyze(self, analyzer_cmd, res_handler, env=None, proc_callback=None):
         """
         LOG.debug('Running analyzer ...')
 
-        LOG.debug_analyzer('\n%s', ' '.join(analyzer_cmd))
+        LOG.debug_analyzer('\n%s',
+                           ' '.join([shlex.quote(x) for x in analyzer_cmd]))
 
         res_handler.analyzer_cmd = analyzer_cmd
         try:

@@ -13,6 +13,7 @@
 import os
 import pickle
 import platform
+import shlex
 import subprocess
 import sys
 from uuid import uuid4
@@ -81,7 +82,7 @@ def perform_build_command(logfile, command, context, keep_link, silent=False,
         final_command = command
         command = ' '.join(["intercept-build",
                             "--cdb", logfile,
-                            "sh -c \"" + final_command + "\""])
+                            "sh -c", shlex.quote(final_command)])
         log_env = original_env
         LOG.debug_analyzer(command)
 

@@ -968,7 +968,8 @@ def parse_options(compilation_db_entry,
 
     if 'arguments' in compilation_db_entry:
         gcc_command = compilation_db_entry['arguments']
-        details['original_command'] = ' '.join(gcc_command)
+        details['original_command'] = \
+            ' '.join([shlex.quote(x) for x in gcc_command])
     elif 'command' in compilation_db_entry:
         details['original_command'] = compilation_db_entry['command']
         gcc_command = shlex.split(compilation_db_entry['command'])
@@ -1172,10 +1173,10 @@ def extend_compilation_database_entries(compilation_database):
                         continue
 
                     opts, sources = process_response_file(response_file)
-                    cmd.extend(opts)
+                    cmd.extend([shlex.quote(x) for x in opts])
                     source_files.extend(sources)
                 else:
-                    cmd.append(opt)
+                    cmd.append(shlex.quote(opt))
 
             entry['command'] = ' '.join(cmd)
 

@@ -29,6 +29,7 @@ def show(out, err):
         print("\nTEST execute stderr:\n")
         print(err)
 
+    cmd_log = ' '.join([shlex.quote(x) for x in cmd])
     try:
         proc = subprocess.Popen(
             cmd,
@@ -41,13 +42,13 @@ def show(out, err):
         out, err = proc.communicate()
         if proc.returncode != 0:
             show(out, err)
-            print('Unsuccessful run: "' + ' '.join(cmd) + '"')
+            print(f'Unsuccessful run: {cmd_log}')
             print(proc.returncode)
         return out, err, proc.returncode
     except OSError as oerr:
         print(oerr)
         show(out, err)
-        print('Failed to run: "' + ' '.join(cmd) + '"')
+        print(f'Failed to run: {cmd_log}')
         raise
 
 

@@ -0,0 +1 @@
+'-DVARIABLE="some value"' '-DVARIABLE2="me@domain.com"'
@@ -0,0 +1,7 @@
+[
+	{
+		"directory": "/tmp",
+		"command": "g++ -DVARIABLE=\\\"me@domain.com\\\" /tmp/a.cpp -o /tmp/a.out",
+		"file": "/tmp/a.cpp"
+	}
+]
@@ -0,0 +1,7 @@
+[
+	{
+		"directory": "PLACEHOLDER",
+		"command": "g++ @ldlogger\\ response.rsp /tmp/a.cpp -o /tmp/a.out",
+		"file": "/tmp/a.cpp"
+	}
+]
@@ -1,7 +1,7 @@
 [
 	{
 		"directory": "/tmp",
-		"command": "g++ /tmp/a b.cpp",
+		"command": "g++ /tmp/a\\ b.cpp",
 		"file": "/tmp/a b.cpp"
 	}
 ]
@@ -1,7 +1,7 @@
 [
 	{
 		"directory": "/tmp",
-		"command": "g++ -DVARIABLE=\"some value\" /tmp/a.cpp -o /tmp/a.out",
+		"command": "g++ -DVARIABLE=\\\"some\\ value\\\" /tmp/a.cpp -o /tmp/a.out",
 		"file": "/tmp/a.cpp"
 	}
 ]
@@ -103,7 +103,7 @@ def test_new_ldlogger(self):
         self.assertEqual(len(build_action.analyzer_options), 1)
         self.assertTrue(len(build_action.target) > 0)
         self.assertEqual(build_action.analyzer_options[0],
-                         r'-DVARIABLE=some value')
+                         r'-DVARIABLE="some value"')
 
         # Test source file with spaces.
         logfile = os.path.join(self.__test_files, "ldlogger-new-space.json")
@@ -115,16 +115,42 @@ def test_new_ldlogger(self):
         self.assertEqual(build_action.source, r'/tmp/a b.cpp')
         self.assertEqual(build_action.lang, 'c++')
 
+        # Test @ sign in variable definition.
+        logfile = os.path.join(self.__test_files, "ldlogger-new-at.json")
+
+        build_actions, _ = log_parser.\
+            parse_unique_log(load_json_or_empty(logfile), self.__this_dir)
+        build_action = build_actions[0]
+
+        self.assertEqual(len(build_action.analyzer_options), 1)
+        self.assertEqual(build_action.analyzer_options[0],
+                         r'-DVARIABLE="me@domain.com"')
+
+        # Test the same stuff with response files.
+        logfile = os.path.join(self.__test_files, "ldlogger-new-response.json")
+        logjson = load_json_or_empty(logfile)
+        # Make it relative to the response file.
+        logjson[0]['directory'] = self.__test_files
+
+        build_actions, _ = log_parser.\
+            parse_unique_log(logjson, self.__this_dir)
+        build_action = build_actions[0]
+
+        self.assertEqual(len(build_action.analyzer_options), 2)
+        self.assertEqual(build_action.analyzer_options[0],
+                         r'-DVARIABLE="some value"')
+        self.assertEqual(build_action.analyzer_options[1],
+                         r'-DVARIABLE2="me@domain.com"')
+
     def test_old_intercept_build(self):
         """
         Test log file parsing escape behaviour with clang-5.0 intercept-build.
         """
-        logfile = os.path.join(self.__test_files, "intercept-old.json")
-
+        # FIXME: Yes, the json is actually bad! The space should have been
+        #        escaped by intercept-build along with the backslash.
         # Scan-build-py shipping with clang-5.0 makes a logfile that contains:
         # -DVARIABLE=\"some value\" and --target=x86_64-linux-gnu
-        #
-        # The define is passed to the analyzer properly.
+        logfile = os.path.join(self.__test_files, "intercept-old.json")
 
         build_actions, _ = log_parser.\
             parse_unique_log(load_json_or_empty(logfile), self.__this_dir)
@@ -133,6 +159,8 @@ def test_old_intercept_build(self):
         self.assertEqual(build_action.source, r'/tmp/a.cpp')
         self.assertEqual(len(build_action.analyzer_options), 1)
         self.assertTrue(len(build_action.target) > 0)
+        # FIXME: We should expect r'-DVARIABLE="some value"' with a fixed
+        #        intercept-build.
         self.assertEqual(build_action.analyzer_options[0],
                          r'-DVARIABLE="some')
 
@@ -454,7 +482,8 @@ def test_response_file_simple(self):
             parse_unique_log(load_json_or_empty(logfile), self.__this_dir)
         build_action = build_actions[0]
         self.assertEqual(len(build_action.analyzer_options), 1)
-        self.assertEqual(build_action.analyzer_options[0], '-DVARIABLE=some')
+        self.assertEqual(build_action.analyzer_options[0],
+                         '-DVARIABLE=some value')
 
     def test_response_file_contains_source_file(self):
         """
@@ -481,7 +510,8 @@ def test_response_file_contains_source_file(self):
 
         self.assertEqual(len(build_action.analyzer_options), 1)
         self.assertEqual(build_action.source, self.src_file_path)
-        self.assertEqual(build_action.analyzer_options[0], '-DVARIABLE=some')
+        self.assertEqual(build_action.analyzer_options[0],
+                         '-DVARIABLE=some value')
 
     def test_response_file_contains_multiple_source_files(self):
         """
@@ -521,12 +551,14 @@ def test_response_file_contains_multiple_source_files(self):
         a_build_action = [b for b in build_actions
                           if b.source == a_file_path][0]
         self.assertEqual(len(a_build_action.analyzer_options), 1)
-        self.assertEqual(a_build_action.analyzer_options[0], '-DVARIABLE=some')
+        self.assertEqual(a_build_action.analyzer_options[0],
+                         '-DVARIABLE=some value')
 
         b_build_action = [b for b in build_actions
                           if b.source == b_file_path][0]
         self.assertEqual(len(b_build_action.analyzer_options), 1)
-        self.assertEqual(b_build_action.analyzer_options[0], '-DVARIABLE=some')
+        self.assertEqual(b_build_action.analyzer_options[0],
+                         '-DVARIABLE=some value')
 
     def test_source_file_contains_at_sign(self):
         """
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		'-DVARIABLE="some value"' '-DVARIABLE2="me@domain.com"'