Skip to content

Commit

Permalink
Better sanitization for stored file names
Browse files Browse the repository at this point in the history
Instead of only removing a single leading slash,
the path is normalized and sanitized before being read from the disk.
  • Loading branch information
Discookie committed Dec 7, 2023
1 parent 4f5c7fa commit 7a98601
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 10 deletions.
7 changes: 7 additions & 0 deletions codechecker_common/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import itertools
import json
from typing import TextIO
import os
import portalocker

from codechecker_common.logger import get_logger
Expand Down Expand Up @@ -98,3 +99,9 @@ def get_linef(fp: TextIO, line_no: int) -> str:
if line_no == 0:
return line
return ''

def path_for_fake_root(full_path: str, root_path: str = '/') -> str:
"""Normalize and sanitize full_path, then make it relative to root_path."""
relative_path = os.path.relpath(full_path, '/')
fake_root_path = os.path.join(root_path, relative_path)
return os.path.realpath(fake_root_path)
20 changes: 10 additions & 10 deletions web/server/codechecker_server/api/mass_store_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from codechecker_common.logger import get_logger
from codechecker_common.review_status_handler import ReviewStatusHandler, \
SourceReviewStatus
from codechecker_common.util import load_json
from codechecker_common.util import load_json, path_for_fake_root

from codechecker_report_converter.util import trim_path_prefixes
from codechecker_report_converter.report import report_file, Report
Expand Down Expand Up @@ -391,13 +391,12 @@ def __store_source_files(
file_path_to_id = {}

for file_name, file_hash in filename_to_hash.items():
source_file_name = os.path.join(source_root, file_name.strip("/"))
source_file_name = os.path.realpath(source_file_name)
LOG.debug("Storing source file: %s", source_file_name)
source_file_path = path_for_fake_root(file_name, source_root)
LOG.debug("Storing source file: %s", source_file_path)
trimmed_file_path = trim_path_prefixes(
file_name, self.__trim_path_prefixes)

if not os.path.isfile(source_file_name):
if not os.path.isfile(source_file_path):
# The file was not in the ZIP file, because we already
# have the content. Let's check if we already have a file
# record in the database or we need to add one.
Expand All @@ -407,16 +406,17 @@ def __store_source_files(
fid = add_file_record(
session, trimmed_file_path, file_hash)

if not fid:
if fid:
file_path_to_id[trimmed_file_path] = fid
LOG.debug("%d fileid found", fid)
else:
LOG.error("File ID for %s is not found in the DB with "
"content hash %s. Missing from ZIP?",
source_file_name, file_hash)
file_path_to_id[trimmed_file_path] = fid
LOG.debug("%d fileid found", fid)
source_file_path, file_hash)
continue

with DBSession(self.__Session) as session:
self.__add_file_content(session, source_file_name, file_hash)
self.__add_file_content(session, source_file_path, file_hash)

file_path_to_id[trimmed_file_path] = add_file_record(
session, trimmed_file_path, file_hash)
Expand Down

0 comments on commit 7a98601

Please sign in to comment.