Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(server): Multiprocess migration and db_cleanup #4175

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions analyzer/codechecker_analyzer/cmd/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
import os
import shutil
import sys

import multiprocess
from typing import List

from tu_collector import tu_collector
Expand All @@ -31,6 +29,7 @@
from codechecker_analyzer.buildlog import log_parser

from codechecker_common import arg, logger, cmd_config, review_status_handler
from codechecker_common.compatibility.multiprocessing import cpu_count
from codechecker_common.skiplist_handler import SkipListHandler, \
SkipListHandlers
from codechecker_common.util import load_json
Expand Down Expand Up @@ -169,8 +168,7 @@ def add_arguments_to_parser(parser):
type=int,
dest="jobs",
required=False,
# pylint: disable=no-member
default=multiprocess.cpu_count(),
default=cpu_count(),
help="Number of threads to use in analysis. More "
"threads mean faster analysis at the cost of "
"using more memory.")
Expand Down
6 changes: 2 additions & 4 deletions analyzer/codechecker_analyzer/cmd/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@
import sys
import tempfile

import multiprocess

from codechecker_analyzer.analyzers import analyzer_types
from codechecker_analyzer.arg import \
OrderedCheckersAction, OrderedConfigAction, \
analyzer_config, checker_config, existing_abspath

from codechecker_common import arg, cmd_config, logger
from codechecker_common.compatibility.multiprocessing import cpu_count
from codechecker_common.source_code_comment_handler import \
REVIEW_STATUS_VALUES

Expand Down Expand Up @@ -183,8 +182,7 @@ def add_arguments_to_parser(parser):
type=int,
dest="jobs",
required=False,
# pylint: disable=no-member
default=multiprocess.cpu_count(),
default=cpu_count(),
help="Number of threads to use in analysis. "
"More threads mean faster analysis at "
"the cost of using more memory.")
Expand Down
7 changes: 7 additions & 0 deletions codechecker_common/compatibility/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# -------------------------------------------------------------------------
#
# Part of the CodeChecker project, under the Apache License v2.0 with
# LLVM Exceptions. See LICENSE for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# -------------------------------------------------------------------------
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
#
# -------------------------------------------------------------------------
"""
Multiprocess compatibility module.
Multiprocessing compatibility module.
"""

import sys

# pylint: disable=unused-import
# pylint: disable=no-name-in-module,unused-import
if sys.platform in ["darwin", "win32"]:
from multiprocess import Pool as MultiProcessPool
from multiprocess import Pool # type: ignore
from multiprocess import cpu_count
else:
from concurrent.futures import ProcessPoolExecutor as MultiProcessPool
from concurrent.futures import ProcessPoolExecutor as Pool # type: ignore
from multiprocessing import cpu_count
14 changes: 11 additions & 3 deletions codechecker_common/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,11 @@
"""
Util module.
"""


import itertools
import json
from typing import TextIO
import os
from typing import TextIO

import portalocker

from codechecker_common.logger import get_logger
Expand All @@ -34,6 +33,15 @@ def arg_match(options, args):
return matched_args


def clamp(min_: int, value: int, max_: int) -> int:
"""
Clamps ``value`` to be between ``min_`` and ``max_``, inclusive.
"""
if min_ > max_:
raise ValueError("min <= max required")
return min(max(min_, value), max_)


def chunks(iterator, n):
"""
Yield the next chunk if an iterable object. A chunk consists of maximum n
Expand Down
53 changes: 29 additions & 24 deletions docs/web/db_schema_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ https://alembic.sqlalchemy.org/en/latest/autogenerate.html#what-does-autogenerat

# Updating configuration database schema

Config database schema scripts can be found under the `config_db_migrate`
directory.
Config database schema scripts can be found under the
`server/codechecker_server/migrations/config/versions` directory.

## Automatic migration script generation (Online)

Expand All @@ -30,20 +30,21 @@ version.
The configuration database schema file can be found here:
`server/codechecker_server/database/config_db_model.py`

### **Step 2**: Check the alembic.ini configuration settings
### **Step 2**: Check the `alembic.ini` configuration settings

Database connection should point to the correct database.
Edit the sqlalchemy.url option in [alembic.ini](
https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file)
according to your database configuration.
Edit the `sqlalchemy.url` option in
[alembic.ini](https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file)
according to your database configuration.

### **Step 3**: Use alembic to autogenerate migration scripts

`alembic --name config_db revision --autogenerate -m "Change description"`

### **Step 4**: Check the generated scripts

The new migration script
`config_db_migrate/versions/{hash}_change_description.py` is generated.
`migrations/config/versions/{hash}_change_description.py` is generated.
**You must always check the generated script because sometimes it isn't
correct.**

Expand All @@ -60,29 +61,34 @@ Don't forget to commit the migration script with your other changes.

## Automatic migration script generation (Online)

A Codechecker server should be started and a product should be configured with
A CodeChecker server should be started and a product should be configured with
a previous database schema version.

Product (run) database schema scripts can be found under the
`server/codechecker_server/migrations/report/versions` directory.

### **Step 1**: Update the database model

The run database schema file can be found here:
`server/codechecker_server/database/run_db_model.py`

### **Step 2**: Check alembic.ini configuration
### **Step 2**: Check `alembic.ini` configuration

Database connection should point to the correct database.
Edit the sqlalchemy.url option in [alembic.ini](
https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file)
according to your database configuration.
Edit the `sqlalchemy.url` option in
[alembic.ini](https://alembic.sqlalchemy.org/en/latest/tutorial.html#editing-the-ini-file)
according to your database configuration.

#### **Step 2**: Generating migration scripts using autogenerate

`alembic --name run_db revision --autogenerate -m "Change description"`

#### **Step 3**: Check the generated scripts
The new migration script db_migrate/versions/{hash}_change_description.py is
generated. **You must always check the generated script because sometimes it
isn't correct.**

The new migration script
`migrations/report/versions/{hash}_change_description.py` is generated.
**You must always check the generated script because sometimes it isn't
correct.**

#### **Step 4**: Run all test cases.

Expand Down Expand Up @@ -118,7 +124,7 @@ and the other is the run database (storing analysis reports).
If there is some schema mismatch and migration is needed you will get a
warning at server start.

## IMPORTANT before schema upgrade
## IMPORTANT: before schema upgrade

If there is some schema change it is recommended to create a full backup
of your configuration and run databases before running the migration.
Expand Down Expand Up @@ -187,17 +193,16 @@ command.
$ CodeChecker server --db-upgrade-schema Default
[15:01] - Checking configuration database ...
[15:01] - Database is OK.
[15:01] - Preparing schema upgrade for Default
[15:01] - Preparing schema upgrade for 'Default'
[WARNING] [15:01] - Please note after migration only newer CodeChecker versions can be used to start the server
[WARNING] [15:01] - It is advised to make a full backup of your run databases.
[15:01] - Checking: Default
[15:01] - [Default] Database schema mismatch: migration is available.
Do you want to upgrade 'Default' to new schema? Y(es)/n(o) y
[15:01] - [Default] Schema will be upgraded...
[15:01] - ========================
[15:01] - Upgrading: Default
[15:01] - Database schema mismatch: migration is available.
Do you want to upgrade to new schema? Y(es)/n(o) y
Upgrading schema ...
Done.
Database is OK.
[15:01] - ========================
[15:02] - [Default] Upgrading...
[15:03] - [Default] Done upgrading.
```

Schema upgrade can be done for multiple products in a row if the
Expand Down
4 changes: 2 additions & 2 deletions web/client/codechecker_client/blame_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from git.exc import InvalidGitRepositoryError, GitCommandError
from typing import Dict, Iterable, Optional

from codechecker_common.compatibility.multiprocessing import Pool
from codechecker_common.logger import get_logger
from codechecker_common.multiprocesspool import MultiProcessPool

LOG = get_logger('system')

Expand Down Expand Up @@ -115,7 +115,7 @@ def assemble_blame_info(

Returns the number of collected blame information.
"""
with MultiProcessPool() as executor:
with Pool() as executor:
file_blame_info = __collect_blame_info_for_files(
file_paths, executor.map)

Expand Down
23 changes: 14 additions & 9 deletions web/client/codechecker_client/cmd/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,24 @@
from codechecker_client import product
from codechecker_common import arg, logger, cmd_config
from codechecker_common.checker_labels import CheckerLabels
from codechecker_common.compatibility.multiprocessing import Pool
from codechecker_common.source_code_comment_handler import \
SourceCodeCommentHandler
from codechecker_common.util import load_json
from codechecker_common.multiprocesspool import MultiProcessPool

from codechecker_web.shared import webserver_context, host_check
from codechecker_web.shared.env import get_default_workspace

try:
from codechecker_client.blame_info import assemble_blame_info
except ImportError:
pass
def assemble_blame_info(_, __) -> int:
"""
Shim for cases where Git blame info is not gatherable due to
missing libraries.
"""
raise NotImplementedError()


LOG = logger.get_logger('system')

Expand Down Expand Up @@ -371,7 +377,7 @@ def filter_source_files_with_comments(
"""
jobs = file_report_positions.items()

with MultiProcessPool() as executor:
with Pool() as executor:
return get_source_file_with_comments(jobs, executor.map)


Expand Down Expand Up @@ -447,7 +453,7 @@ def assemble_zip(inputs,

LOG.debug("Processing report files ...")

with MultiProcessPool() as executor:
with Pool() as executor:
analyzer_result_file_reports = parse_analyzer_result_files(
analyzer_result_file_paths, checker_labels, executor.map)

Expand Down Expand Up @@ -562,14 +568,13 @@ def assemble_zip(inputs,
zipf, file_paths)

if stats.num_of_blame_information:
LOG.info("Collecting blame information done.")
LOG.info("Collecting blame information... Done.")
else:
LOG.info("No blame information found for source files.")
except NameError:
except NotImplementedError:
LOG.warning(
"Collecting blame information has been failed. Make sure "
"'git' is available on your system to hide this warning "
"message.")
"Failed to collect blame information. Make sure Git is "
"installed on your system.")

zipf.writestr('content_hashes.json', json.dumps(file_to_hash))

Expand Down
8 changes: 5 additions & 3 deletions web/server/codechecker_server/api/product_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,8 @@ def addProduct(self, product):
msg)

conn_str = SQLServer \
.from_cmdline_args(conn_str_args, IDENTIFIER, None, False, None) \
.from_cmdline_args(conn_str_args, product.endpoint, IDENTIFIER,
None, False, None) \
.get_connection_string()

is_rws_change_disabled = product.isReviewStatusChangeDisabled
Expand Down Expand Up @@ -534,8 +535,9 @@ def editProduct(self, product_id, new_config):
msg)

conn_str = SQLServer \
.from_cmdline_args(conn_str_args, IDENTIFIER, None,
False, None).get_connection_string()
.from_cmdline_args(conn_str_args, product.endpoint,
IDENTIFIER, None, False, None) \
.get_connection_string()

# If endpoint or database arguments change, the product
# configuration has changed so severely, that it needs
Expand Down
8 changes: 4 additions & 4 deletions web/server/codechecker_server/api/report_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3400,8 +3400,8 @@ def removeRunReports(self, run_ids, report_filter, cmp_data):
# access timestamp to file entries to delay their removal (and avoid
# removing frequently accessed files). The same comment applies to
# removeRun() function.
db_cleanup.remove_unused_comments(self._Session)
db_cleanup.remove_unused_analysis_info(self._Session)
db_cleanup.remove_unused_comments(self._product)
db_cleanup.remove_unused_analysis_info(self._product)

return True

Expand Down Expand Up @@ -3445,8 +3445,8 @@ def removeRun(self, run_id, run_filter):
# error. An alternative solution can be adding a timestamp to file
# entries to delay their removal. The same comment applies to
# removeRunReports() function.
db_cleanup.remove_unused_comments(self._Session)
db_cleanup.remove_unused_analysis_info(self._Session)
db_cleanup.remove_unused_comments(self._product)
db_cleanup.remove_unused_analysis_info(self._product)

return bool(runs)

Expand Down
Loading
Loading