From 7a98601745249578d76a4bdc5a819eeae2480aa2 Mon Sep 17 00:00:00 2001 From: Discookie Date: Mon, 30 Oct 2023 13:34:33 +0000 Subject: [PATCH] Better sanitization for stored file names Instead of only removing a single leading slash, the path is normalized and sanitized before being read from the disk. --- codechecker_common/util.py | 7 +++++++ .../codechecker_server/api/mass_store_run.py | 20 +++++++++---------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/codechecker_common/util.py b/codechecker_common/util.py index 08743f8f35..f7930c93d1 100644 --- a/codechecker_common/util.py +++ b/codechecker_common/util.py @@ -13,6 +13,7 @@ import itertools import json from typing import TextIO +import os import portalocker from codechecker_common.logger import get_logger @@ -98,3 +99,9 @@ def get_linef(fp: TextIO, line_no: int) -> str: if line_no == 0: return line return '' + +def path_for_fake_root(full_path: str, root_path: str = '/') -> str: + """Normalize and sanitize full_path, then make it relative to root_path.""" + relative_path = os.path.relpath(full_path, '/') + fake_root_path = os.path.join(root_path, relative_path) + return os.path.realpath(fake_root_path) diff --git a/web/server/codechecker_server/api/mass_store_run.py b/web/server/codechecker_server/api/mass_store_run.py index f311f9dc69..6ea0a6bb49 100644 --- a/web/server/codechecker_server/api/mass_store_run.py +++ b/web/server/codechecker_server/api/mass_store_run.py @@ -28,7 +28,7 @@ from codechecker_common.logger import get_logger from codechecker_common.review_status_handler import ReviewStatusHandler, \ SourceReviewStatus -from codechecker_common.util import load_json +from codechecker_common.util import load_json, path_for_fake_root from codechecker_report_converter.util import trim_path_prefixes from codechecker_report_converter.report import report_file, Report @@ -391,13 +391,12 @@ def __store_source_files( file_path_to_id = {} for file_name, file_hash in filename_to_hash.items(): - source_file_name = os.path.join(source_root, file_name.strip("/")) - source_file_name = os.path.realpath(source_file_name) - LOG.debug("Storing source file: %s", source_file_name) + source_file_path = path_for_fake_root(file_name, source_root) + LOG.debug("Storing source file: %s", source_file_path) trimmed_file_path = trim_path_prefixes( file_name, self.__trim_path_prefixes) - if not os.path.isfile(source_file_name): + if not os.path.isfile(source_file_path): # The file was not in the ZIP file, because we already # have the content. Let's check if we already have a file # record in the database or we need to add one. @@ -407,16 +406,17 @@ def __store_source_files( fid = add_file_record( session, trimmed_file_path, file_hash) - if not fid: + if fid: + file_path_to_id[trimmed_file_path] = fid + LOG.debug("%d fileid found", fid) + else: LOG.error("File ID for %s is not found in the DB with " "content hash %s. Missing from ZIP?", - source_file_name, file_hash) - file_path_to_id[trimmed_file_path] = fid - LOG.debug("%d fileid found", fid) + source_file_path, file_hash) continue with DBSession(self.__Session) as session: - self.__add_file_content(session, source_file_name, file_hash) + self.__add_file_content(session, source_file_path, file_hash) file_path_to_id[trimmed_file_path] = add_file_record( session, trimmed_file_path, file_hash)