Skip to content

Commit

Permalink
[web] Update blame info
Browse files Browse the repository at this point in the history
When a run is stured with an old CodeChecker version then blame info is
empty for the file contents. In a subsequent store we'd like to update
blame info for these files.
  • Loading branch information
bruntib committed Nov 11, 2021
1 parent 85dd52d commit 5d27c3b
Show file tree
Hide file tree
Showing 17 changed files with 214 additions and 40 deletions.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion web/api/js/codechecker-api-node/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "codechecker-api",
"version": "6.46.0",
"version": "6.47.0",
"description": "Generated node.js compatible API stubs for CodeChecker server.",
"main": "lib",
"homepage": "https://github.com/Ericsson/codechecker",
Expand Down
Binary file modified web/api/py/codechecker_api/dist/codechecker_api.tar.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion web/api/py/codechecker_api/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
with open('README.md', encoding='utf-8', errors="ignore") as f:
long_description = f.read()

api_version = '6.46.0'
api_version = '6.47.0'

setup(
name='codechecker_api',
Expand Down
Binary file not shown.
2 changes: 1 addition & 1 deletion web/api/py/codechecker_api_shared/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
with open('README.md', encoding='utf-8', errors="ignore") as f:
long_description = f.read()

api_version = '6.46.0'
api_version = '6.47.0'

setup(
name='codechecker_api_shared',
Expand Down
8 changes: 8 additions & 0 deletions web/api/report_server.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -764,6 +764,14 @@ service codeCheckerDBAccess {
list<string> getMissingContentHashes(1: list<string> fileHashes)
throws (1: codechecker_api_shared.RequestFailed requestError),

// The client can ask the server whether a blame info is already stored in the
// database. If it is, then it is not necessary to send it in the ZIP file
// with massStoreRun() function. This function requires a list of file hashes
// (sha256) and returns the ones to which no blame info is stored yet.
// PERMISSION: PRODUCT_STORE
list<string> getMissingContentHashesForBlameInfo(1: list<string> fileHashes)
throws (1: codechecker_api_shared.RequestFailed requestError),

// This function stores an entire run encapsulated and sent in a ZIP file.
// The ZIP file has to be compressed and sent as a base64 encoded string. The
// ZIP file must contain a "reports" and an optional "root" sub-folder.
Expand Down
16 changes: 14 additions & 2 deletions web/client/codechecker_client/cmd/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,15 @@ def assemble_zip(inputs, zip_file, client):
if file_hashes else []
LOG.info("Get missing file content hashes done.")

LOG.info(
"Get file content hashes which do not have blame information from the "
"server...")
necessary_blame_hashes = \
client.getMissingContentHashesForBlameInfo(file_hashes) \
if file_hashes else []
LOG.info(
"Get file content hashes which do not have blame information done.")

LOG.info("Collecting review comments ...")

# Get files which can be found on the server but contains source code
Expand Down Expand Up @@ -509,10 +518,13 @@ def assemble_zip(inputs, zip_file, client):
except KeyError:
zipf.write(f, file_path)

if collected_file_paths:
if necessary_blame_hashes:
file_paths = list(f for f, h in file_to_hash.items()
if h in necessary_blame_hashes)

LOG.info("Collecting blame information for source files...")
try:
if assemble_blame_info(zipf, collected_file_paths):
if assemble_blame_info(zipf, file_paths):
LOG.info("Collecting blame information done.")
else:
LOG.info("No blame information found for source files.")
Expand Down
4 changes: 4 additions & 0 deletions web/client/codechecker_client/helpers/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ def removeSourceComponent(self, name):
def getMissingContentHashes(self, file_hashes):
pass

@ThriftClientCall
def getMissingContentHashesForBlameInfo(self, file_hashes):
pass

@ThriftClientCall
def massStoreRun(self, name, tag, version, zipdir, force,
trim_path_prefixes, description):
Expand Down
2 changes: 1 addition & 1 deletion web/codechecker_web/shared/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# The newest supported minor version (value) for each supported major version
# (key) in this particular build.
SUPPORTED_VERSIONS = {
6: 46
6: 47
}

# Used by the client to automatically identify the latest major and minor
Expand Down
100 changes: 70 additions & 30 deletions web/server/codechecker_server/api/mass_store_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from datetime import datetime
from hashlib import sha256
from tempfile import TemporaryDirectory
from typing import Any, Dict, List, Optional, Set, Tuple
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple

import codechecker_api_shared
from codechecker_api.codeCheckerDBAccess_v6 import ttypes
Expand Down Expand Up @@ -108,9 +108,7 @@ def parse_codechecker_review_comment(
def add_file_record(
session: DBSession,
file_path: str,
content_hash: str,
remote_url: Optional[str],
tracking_branch: Optional[str]
content_hash: str
) -> Optional[int]:
"""
Add the necessary file record pointing to an already existing content.
Expand All @@ -123,6 +121,13 @@ def add_file_record(
finish_checker_run() and since SQLite doesn't support parallel
transactions, this API call will wait until the other transactions
finish. In the meantime the run adding transaction times out.
This function doesn't insert blame info in the File objects because those
are added by __add_blame_info(). In previous CodeChecker versions git info
was not stored at all, so upgrading to a new CodeChecker results the
storage of git blame info even for files that are already stored. For this
reason we can't avoid an update on File and FileContent tables, but we can
avoid double reading of blame info json files.
"""
file_record = session.query(File) \
.filter(File.content_hash == content_hash,
Expand All @@ -133,8 +138,7 @@ def add_file_record(
return file_record.id

try:
file_record = File(
file_path, content_hash, remote_url, tracking_branch)
file_record = File(file_path, content_hash, None, None)
session.add(file_record)
session.commit()
except sqlalchemy.exc.IntegrityError as ex:
Expand Down Expand Up @@ -364,7 +368,6 @@ def __free_run_lock(self, session: DBSession):
def __store_source_files(
self,
source_root: str,
blame_root: str,
filename_to_hash: Dict[str, str]
) -> Dict[str, int]:
""" Storing file contents from plist. """
Expand All @@ -378,12 +381,6 @@ def __store_source_files(
trimmed_file_path = util.trim_path_prefixes(
file_name, self.__trim_path_prefixes)

blame_file = os.path.realpath(os.path.join(
blame_root, file_name.strip("/")))

blame_info, remote_url, tracking_branch = get_blame_file_data(
blame_file)

if not os.path.isfile(source_file_name):
# The file was not in the ZIP file, because we already
# have the content. Let's check if we already have a file
Expand All @@ -392,8 +389,7 @@ def __store_source_files(
LOG.debug('%s not found or already stored.', trimmed_file_path)
with DBSession(self.__Session) as session:
fid = add_file_record(
session, trimmed_file_path, file_hash, remote_url,
tracking_branch)
session, trimmed_file_path, file_hash)

if not fid:
LOG.error("File ID for %s is not found in the DB with "
Expand All @@ -404,21 +400,63 @@ def __store_source_files(
continue

with DBSession(self.__Session) as session:
self.__add_file_content(
session, source_file_name, file_hash, blame_info)
self.__add_file_content(session, source_file_name, file_hash)

file_path_to_id[trimmed_file_path] = add_file_record(
session, trimmed_file_path, file_hash, remote_url,
tracking_branch)
session, trimmed_file_path, file_hash)

return file_path_to_id

def __add_blame_info(
self,
blame_root: str,
filename_to_hash: Dict[str, str]
):
"""
This function updates blame info in File and FileContent tables if
these were NULL. This function exists only because in earlier
CodeChecker versions blame info was not stored in these tables and
in a subsequent storage we can't update the tables for an unchanged
file since __store_source_files() updates only the ones that are in the
.zip file. This function stores blame info even if the corresponding
source file is not in the .zip file.
"""
with DBSession(self.__Session) as session:
for subdir, _, files in os.walk(blame_root):
for f in files:
blame_file = os.path.join(subdir, f)
file_path = blame_file[len(blame_root.rstrip("/")):]
blame_info, remote_url, tracking_branch = \
get_blame_file_data(blame_file)

compressed_blame_info = None
if blame_info:
compressed_blame_info = zlib.compress(
json.dumps(blame_info).encode('utf-8'),
zlib.Z_BEST_COMPRESSION)

session \
.query(FileContent) \
.filter(FileContent.blame_info.is_(None)) \
.filter(FileContent.content_hash ==
filename_to_hash.get(file_path)) \
.update({"blame_info": compressed_blame_info})

session \
.query(File) \
.filter(File.remote_url.is_(None)) \
.filter(File.filepath == file_path) \
.update({
"remote_url": remote_url,
"tracking_branch": tracking_branch})

session.commit()

def __add_file_content(
self,
session: DBSession,
source_file_name: str,
content_hash: Optional[str],
blame_info: Optional[dict]
content_hash: Optional[str]
):
"""
Add the necessary file contents. If content_hash in None then this
Expand All @@ -431,6 +469,14 @@ def __add_file_content(
finish_checker_run() and since SQLite doesn't support parallel
transactions, this API call will wait until the other transactions
finish. In the meantime the run adding transaction times out.
This function doesn't insert blame info in the FileContent objects
because those are added by __add_blame_info(). In previous CodeChecker
versions git info was not stored at all, so upgrading to a new
CodeChecker results the storage of git blame info even for files that
are already stored. For this reason we can't avoid an update on File
and FileContent tables, but we can avoid double reading of blame info
json files.
"""
source_file_content = None
if not content_hash:
Expand All @@ -448,14 +494,7 @@ def __add_file_content(
compressed_content = zlib.compress(
source_file_content, zlib.Z_BEST_COMPRESSION)

compressed_blame_info = None
if blame_info:
compressed_blame_info = zlib.compress(
json.dumps(blame_info).encode('utf-8'),
zlib.Z_BEST_COMPRESSION)

fc = FileContent(
content_hash, compressed_content, compressed_blame_info)
fc = FileContent(content_hash, compressed_content, None)

session.add(fc)
session.commit()
Expand Down Expand Up @@ -997,7 +1036,8 @@ def store(self) -> int:

LOG.info("[%s] Store source files...", self.__name)
file_path_to_id = self.__store_source_files(
source_root, blame_root, filename_to_hash)
source_root, filename_to_hash)
self.__add_blame_info(blame_root, filename_to_hash)
LOG.info("[%s] Store source files done.", self.__name)

run_history_time = datetime.now()
Expand Down
18 changes: 18 additions & 0 deletions web/server/codechecker_server/api/report_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2814,6 +2814,24 @@ def getMissingContentHashes(self, file_hashes):
return list(set(file_hashes) -
set([fc.content_hash for fc in q]))

@exc_to_thrift_reqfail
@timeit
def getMissingContentHashesForBlameInfo(self, file_hashes):
self.__require_store()

if not file_hashes:
return []

with DBSession(self._Session) as session:

q = session.query(FileContent) \
.options(sqlalchemy.orm.load_only('content_hash')) \
.filter(FileContent.content_hash.in_(file_hashes)) \
.filter(FileContent.blame_info.isnot(None))

return list(set(file_hashes) -
set([fc.content_hash for fc in q]))

@exc_to_thrift_reqfail
@timeit
def massStoreRun(self, name, tag, version, b64zip, force,
Expand Down
4 changes: 2 additions & 2 deletions web/server/vue-cli/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion web/server/vue-cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
},
"dependencies": {
"@mdi/font": "^5.9.55",
"codechecker-api": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.46.0.tgz",
"codechecker-api": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.47.0.tgz",
"chart.js": "^2.9.4",
"chartjs-plugin-datalabels": "^0.7.0",
"codemirror": "^5.60.0",
Expand Down
Loading

0 comments on commit 5d27c3b

Please sign in to comment.