Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[web] Update blame info #3488

Merged
merged 1 commit into from
Nov 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion web/api/js/codechecker-api-node/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "codechecker-api",
"version": "6.46.0",
"version": "6.47.0",
"description": "Generated node.js compatible API stubs for CodeChecker server.",
"main": "lib",
"homepage": "https://github.com/Ericsson/codechecker",
Expand Down
Binary file modified web/api/py/codechecker_api/dist/codechecker_api.tar.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion web/api/py/codechecker_api/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
with open('README.md', encoding='utf-8', errors="ignore") as f:
long_description = f.read()

api_version = '6.46.0'
api_version = '6.47.0'

setup(
name='codechecker_api',
Expand Down
Binary file not shown.
2 changes: 1 addition & 1 deletion web/api/py/codechecker_api_shared/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
with open('README.md', encoding='utf-8', errors="ignore") as f:
long_description = f.read()

api_version = '6.46.0'
api_version = '6.47.0'

setup(
name='codechecker_api_shared',
Expand Down
8 changes: 8 additions & 0 deletions web/api/report_server.thrift
Original file line number Diff line number Diff line change
Expand Up @@ -764,6 +764,14 @@ service codeCheckerDBAccess {
list<string> getMissingContentHashes(1: list<string> fileHashes)
throws (1: codechecker_api_shared.RequestFailed requestError),

// The client can ask the server whether a blame info is already stored in the
// database. If it is, then it is not necessary to send it in the ZIP file
// with massStoreRun() function. This function requires a list of file hashes
// (sha256) and returns the ones to which no blame info is stored yet.
// PERMISSION: PRODUCT_STORE
list<string> getMissingContentHashesForBlameInfo(1: list<string> fileHashes)
throws (1: codechecker_api_shared.RequestFailed requestError),

// This function stores an entire run encapsulated and sent in a ZIP file.
// The ZIP file has to be compressed and sent as a base64 encoded string. The
// ZIP file must contain a "reports" and an optional "root" sub-folder.
Expand Down
16 changes: 14 additions & 2 deletions web/client/codechecker_client/cmd/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,15 @@ def assemble_zip(inputs, zip_file, client):
if file_hashes else []
LOG.info("Get missing file content hashes done.")

LOG.info(
"Get file content hashes which do not have blame information from the "
"server...")
necessary_blame_hashes = \
client.getMissingContentHashesForBlameInfo(file_hashes) \
if file_hashes else []
LOG.info(
"Get file content hashes which do not have blame information done.")

LOG.info("Collecting review comments ...")

# Get files which can be found on the server but contains source code
Expand Down Expand Up @@ -509,10 +518,13 @@ def assemble_zip(inputs, zip_file, client):
except KeyError:
zipf.write(f, file_path)

if collected_file_paths:
if necessary_blame_hashes:
file_paths = list(f for f, h in file_to_hash.items()
if h in necessary_blame_hashes)

LOG.info("Collecting blame information for source files...")
try:
if assemble_blame_info(zipf, collected_file_paths):
if assemble_blame_info(zipf, file_paths):
bruntib marked this conversation as resolved.
Show resolved Hide resolved
LOG.info("Collecting blame information done.")
else:
LOG.info("No blame information found for source files.")
Expand Down
4 changes: 4 additions & 0 deletions web/client/codechecker_client/helpers/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ def removeSourceComponent(self, name):
def getMissingContentHashes(self, file_hashes):
pass

@ThriftClientCall
def getMissingContentHashesForBlameInfo(self, file_hashes):
pass

@ThriftClientCall
def massStoreRun(self, name, tag, version, zipdir, force,
trim_path_prefixes, description):
Expand Down
2 changes: 1 addition & 1 deletion web/codechecker_web/shared/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# The newest supported minor version (value) for each supported major version
# (key) in this particular build.
SUPPORTED_VERSIONS = {
6: 46
6: 47
}

# Used by the client to automatically identify the latest major and minor
Expand Down
98 changes: 69 additions & 29 deletions web/server/codechecker_server/api/mass_store_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,7 @@ def parse_codechecker_review_comment(
def add_file_record(
session: DBSession,
file_path: str,
content_hash: str,
remote_url: Optional[str],
tracking_branch: Optional[str]
content_hash: str
) -> Optional[int]:
"""
Add the necessary file record pointing to an already existing content.
Expand All @@ -123,6 +121,13 @@ def add_file_record(
finish_checker_run() and since SQLite doesn't support parallel
transactions, this API call will wait until the other transactions
finish. In the meantime the run adding transaction times out.

This function doesn't insert blame info in the File objects because those
are added by __add_blame_info(). In previous CodeChecker versions git info
was not stored at all, so upgrading to a new CodeChecker results the
storage of git blame info even for files that are already stored. For this
reason we can't avoid an update on File and FileContent tables, but we can
avoid double reading of blame info json files.
"""
file_record = session.query(File) \
.filter(File.content_hash == content_hash,
Expand All @@ -133,8 +138,7 @@ def add_file_record(
return file_record.id

try:
file_record = File(
file_path, content_hash, remote_url, tracking_branch)
file_record = File(file_path, content_hash, None, None)
session.add(file_record)
session.commit()
except sqlalchemy.exc.IntegrityError as ex:
Expand Down Expand Up @@ -364,7 +368,6 @@ def __free_run_lock(self, session: DBSession):
def __store_source_files(
self,
source_root: str,
blame_root: str,
filename_to_hash: Dict[str, str]
) -> Dict[str, int]:
""" Storing file contents from plist. """
Expand All @@ -378,12 +381,6 @@ def __store_source_files(
trimmed_file_path = util.trim_path_prefixes(
file_name, self.__trim_path_prefixes)

blame_file = os.path.realpath(os.path.join(
blame_root, file_name.strip("/")))

blame_info, remote_url, tracking_branch = get_blame_file_data(
blame_file)

if not os.path.isfile(source_file_name):
# The file was not in the ZIP file, because we already
# have the content. Let's check if we already have a file
Expand All @@ -392,8 +389,7 @@ def __store_source_files(
LOG.debug('%s not found or already stored.', trimmed_file_path)
with DBSession(self.__Session) as session:
fid = add_file_record(
session, trimmed_file_path, file_hash, remote_url,
tracking_branch)
session, trimmed_file_path, file_hash)

if not fid:
LOG.error("File ID for %s is not found in the DB with "
Expand All @@ -404,21 +400,63 @@ def __store_source_files(
continue

with DBSession(self.__Session) as session:
self.__add_file_content(
session, source_file_name, file_hash, blame_info)
self.__add_file_content(session, source_file_name, file_hash)

file_path_to_id[trimmed_file_path] = add_file_record(
session, trimmed_file_path, file_hash, remote_url,
tracking_branch)
session, trimmed_file_path, file_hash)

return file_path_to_id

def __add_blame_info(
self,
blame_root: str,
filename_to_hash: Dict[str, str]
):
"""
This function updates blame info in File and FileContent tables if
these were NULL. This function exists only because in earlier
CodeChecker versions blame info was not stored in these tables and
in a subsequent storage we can't update the tables for an unchanged
file since __store_source_files() updates only the ones that are in the
.zip file. This function stores blame info even if the corresponding
source file is not in the .zip file.
"""
with DBSession(self.__Session) as session:
for subdir, _, files in os.walk(blame_root):
for f in files:
blame_file = os.path.join(subdir, f)
file_path = blame_file[len(blame_root.rstrip("/")):]
blame_info, remote_url, tracking_branch = \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already getting and storing blame information on this part:

blame_info, remote_url, tracking_branch = get_blame_file_data(
blame_file)

We have to process every git blame info file only once and not multiple times.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I chose not adding blame info when inserting new File and FileContent fields because the alternative solution (i.e. returning the list of newly inserted fields) was not convenient. For example gathering the information whether a File is newly inserted would require to add a second return value for add_file_record() function and this info should be transferred to __add_blame_info() through further return values and function parameters.

get_blame_file_data(blame_file)

compressed_blame_info = None
if blame_info:
compressed_blame_info = zlib.compress(
json.dumps(blame_info).encode('utf-8'),
zlib.Z_BEST_COMPRESSION)

session \
.query(FileContent) \
.filter(FileContent.blame_info.is_(None)) \
.filter(FileContent.content_hash ==
filename_to_hash.get(file_path)) \
.update({"blame_info": compressed_blame_info})

session \
.query(File) \
.filter(File.remote_url.is_(None)) \
.filter(File.filepath == file_path) \
.update({
"remote_url": remote_url,
"tracking_branch": tracking_branch})

session.commit()

def __add_file_content(
self,
session: DBSession,
source_file_name: str,
content_hash: Optional[str],
blame_info: Optional[dict]
content_hash: Optional[str]
):
"""
Add the necessary file contents. If content_hash in None then this
Expand All @@ -431,6 +469,14 @@ def __add_file_content(
finish_checker_run() and since SQLite doesn't support parallel
transactions, this API call will wait until the other transactions
finish. In the meantime the run adding transaction times out.

This function doesn't insert blame info in the FileContent objects
because those are added by __add_blame_info(). In previous CodeChecker
versions git info was not stored at all, so upgrading to a new
CodeChecker results the storage of git blame info even for files that
are already stored. For this reason we can't avoid an update on File
and FileContent tables, but we can avoid double reading of blame info
json files.
"""
source_file_content = None
if not content_hash:
Expand All @@ -448,14 +494,7 @@ def __add_file_content(
compressed_content = zlib.compress(
source_file_content, zlib.Z_BEST_COMPRESSION)

compressed_blame_info = None
if blame_info:
compressed_blame_info = zlib.compress(
json.dumps(blame_info).encode('utf-8'),
zlib.Z_BEST_COMPRESSION)

fc = FileContent(
content_hash, compressed_content, compressed_blame_info)
fc = FileContent(content_hash, compressed_content, None)

session.add(fc)
session.commit()
Expand Down Expand Up @@ -997,7 +1036,8 @@ def store(self) -> int:

LOG.info("[%s] Store source files...", self.__name)
file_path_to_id = self.__store_source_files(
source_root, blame_root, filename_to_hash)
source_root, filename_to_hash)
self.__add_blame_info(blame_root, filename_to_hash)
LOG.info("[%s] Store source files done.", self.__name)

run_history_time = datetime.now()
Expand Down
18 changes: 18 additions & 0 deletions web/server/codechecker_server/api/report_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2814,6 +2814,24 @@ def getMissingContentHashes(self, file_hashes):
return list(set(file_hashes) -
set([fc.content_hash for fc in q]))

@exc_to_thrift_reqfail
@timeit
def getMissingContentHashesForBlameInfo(self, file_hashes):
self.__require_store()

if not file_hashes:
return []

with DBSession(self._Session) as session:

q = session.query(FileContent) \
.options(sqlalchemy.orm.load_only('content_hash')) \
.filter(FileContent.content_hash.in_(file_hashes)) \
.filter(FileContent.blame_info.isnot(None))

return list(set(file_hashes) -
set([fc.content_hash for fc in q]))

@exc_to_thrift_reqfail
@timeit
def massStoreRun(self, name, tag, version, b64zip, force,
Expand Down
4 changes: 2 additions & 2 deletions web/server/vue-cli/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion web/server/vue-cli/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
},
"dependencies": {
"@mdi/font": "^5.9.55",
"codechecker-api": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.46.0.tgz",
"codechecker-api": "file:../../api/js/codechecker-api-node/dist/codechecker-api-6.47.0.tgz",
"chart.js": "^2.9.4",
"chartjs-plugin-datalabels": "^0.7.0",
"codemirror": "^5.60.0",
Expand Down
Loading