From 22c172b4c4f5b96103328625d4c94b0649a4977f Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Tue, 29 Aug 2023 12:05:54 +0200 Subject: [PATCH] Properly integrate Python `logging` facility - removed `loguru` - added `databricks.labs.ucx.logger.NiceFormatter` Fix #87 --- CONTRIBUTING.md | 6 ++ pyproject.toml | 6 +- src/databricks/labs/ucx/__init__.py | 3 + .../labs/ucx/inventory/inventorizer.py | 3 +- src/databricks/labs/ucx/inventory/listing.py | 4 +- .../labs/ucx/inventory/permissions.py | 4 +- src/databricks/labs/ucx/inventory/table.py | 5 +- src/databricks/labs/ucx/logger.py | 63 +++++++++++++++++++ src/databricks/labs/ucx/managers/group.py | 4 +- src/databricks/labs/ucx/providers/client.py | 4 +- src/databricks/labs/ucx/providers/logger.py | 18 ------ src/databricks/labs/ucx/providers/spark.py | 4 +- src/databricks/labs/ucx/tacl/_internal.py | 4 +- src/databricks/labs/ucx/tacl/tables.py | 4 +- .../labs/ucx/toolkits/group_migration.py | 7 +-- src/databricks/labs/ucx/utils.py | 7 ++- tests/integration/conftest.py | 6 +- tests/integration/test_e2e.py | 4 +- tests/integration/test_jobs.py | 5 +- tests/integration/test_setup.py | 3 +- tests/integration/test_tacls.py | 4 +- tests/integration/utils.py | 4 +- tests/unit/__init__.py | 7 +++ tests/unit/conftest.py | 3 +- tests/unit/test_logger.py | 10 +++ 25 files changed, 146 insertions(+), 46 deletions(-) create mode 100644 src/databricks/labs/ucx/logger.py delete mode 100644 src/databricks/labs/ucx/providers/logger.py create mode 100644 tests/unit/test_logger.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 41955e4500..98a7e9bdc6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -20,6 +20,12 @@ Integration tests: - tests that require their own unique fixture setup must limit the wall clock time of fixture initialization to under one second. - each integration test must be debuggable in IntelliJ IDEA (Community Edition) with the Python plugin (community edition). +Logging: + +- get current logger via `import logging; logger = logging.getLogger(__name__)`. It can be done at the to of the module as well. +- enable debug logging via `import logging; logging.getLogger("databricks").setLevel("DEBUG")` +- enable debug logging only for UCX via `import logging; logging.getLogger("databricks.labs.ucx").setLevel("DEBUG")` + IDE setup: - The only supported IDE for developing this project is based on IntelliJ. This means that both PyCharm (commercial) and IntelliJ IDEA (Community Edition) with Python plugin (community edition) are supported. diff --git a/pyproject.toml b/pyproject.toml index 49460abea3..ca27c4a4b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,6 @@ dependencies = [ "typer[all]>=0.9.0,<0.10.0", "pyhocon>=0.3.60,<0.4.0", "pydantic>=2.0.3, <3.0.0", - "loguru>=0.7.0, <1.0.0", "PyYAML>=6.0.0,<7.0.0", "ratelimit>=2.2.1,<3.0.0", "pandas>=2.0.3,<3.0.0", @@ -89,14 +88,14 @@ dependencies = [ ] [tool.hatch.envs.lint.scripts] fmt = [ + "isort .", "black .", "ruff . --fix", - "isort ." ] verify = [ "black --check .", + "isort . --check-only", "ruff .", - "isort . --check-only" ] [tool.isort] @@ -107,7 +106,6 @@ profile = "black" [tool.pytest.ini_options] addopts = "-s -p no:warnings -vv --cache-clear" -log_cli = true filterwarnings = [ "ignore:::.*pyspark.broadcast*", "ignore:::.*pyspark.sql.pandas.utils*" diff --git a/src/databricks/labs/ucx/__init__.py b/src/databricks/labs/ucx/__init__.py index e69de29bb2..98b7d21354 100644 --- a/src/databricks/labs/ucx/__init__.py +++ b/src/databricks/labs/ucx/__init__.py @@ -0,0 +1,3 @@ +from databricks.labs.ucx.logger import _install + +_install() diff --git a/src/databricks/labs/ucx/inventory/inventorizer.py b/src/databricks/labs/ucx/inventory/inventorizer.py index 069cffea22..376354d0a2 100644 --- a/src/databricks/labs/ucx/inventory/inventorizer.py +++ b/src/databricks/labs/ucx/inventory/inventorizer.py @@ -1,4 +1,5 @@ import json +import logging from abc import ABC, abstractmethod from collections.abc import Callable, Iterator from functools import partial @@ -24,10 +25,10 @@ ) from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient from databricks.labs.ucx.providers.groups_info import GroupMigrationState -from databricks.labs.ucx.providers.logger import logger from databricks.labs.ucx.utils import ProgressReporter, ThreadedExecution InventoryObject = TypeVar("InventoryObject") +logger = logging.getLogger(__name__) class BaseInventorizer(ABC, Generic[InventoryObject]): diff --git a/src/databricks/labs/ucx/inventory/listing.py b/src/databricks/labs/ucx/inventory/listing.py index 946755d20e..2830fd98ef 100644 --- a/src/databricks/labs/ucx/inventory/listing.py +++ b/src/databricks/labs/ucx/inventory/listing.py @@ -1,11 +1,13 @@ import datetime as dt +import logging from concurrent.futures import FIRST_COMPLETED, ThreadPoolExecutor, wait from itertools import groupby from databricks.sdk.service.workspace import ObjectInfo, ObjectType from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient -from databricks.labs.ucx.providers.logger import logger + +logger = logging.getLogger(__name__) class WorkspaceListing: diff --git a/src/databricks/labs/ucx/inventory/permissions.py b/src/databricks/labs/ucx/inventory/permissions.py index 716304e8e5..85578bb095 100644 --- a/src/databricks/labs/ucx/inventory/permissions.py +++ b/src/databricks/labs/ucx/inventory/permissions.py @@ -1,3 +1,4 @@ +import logging import random import time from copy import deepcopy @@ -20,9 +21,10 @@ ) from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient from databricks.labs.ucx.providers.groups_info import GroupMigrationState -from databricks.labs.ucx.providers.logger import logger from databricks.labs.ucx.utils import ThreadedExecution, safe_get_acls +logger = logging.getLogger(__name__) + @dataclass class PermissionRequestPayload: diff --git a/src/databricks/labs/ucx/inventory/table.py b/src/databricks/labs/ucx/inventory/table.py index 433bef9b06..4d12ce7a3b 100644 --- a/src/databricks/labs/ucx/inventory/table.py +++ b/src/databricks/labs/ucx/inventory/table.py @@ -1,3 +1,5 @@ +import logging + import pandas as pd from databricks.sdk import WorkspaceClient from databricks.sdk.service.iam import ObjectPermissions @@ -11,9 +13,10 @@ PermissionsInventoryItem, RequestObjectType, ) -from databricks.labs.ucx.providers.logger import logger from databricks.labs.ucx.providers.spark import SparkMixin +logger = logging.getLogger(__name__) + class InventoryTableManager(SparkMixin): def __init__(self, config: InventoryConfig, ws: WorkspaceClient): diff --git a/src/databricks/labs/ucx/logger.py b/src/databricks/labs/ucx/logger.py new file mode 100644 index 0000000000..46f293b7f7 --- /dev/null +++ b/src/databricks/labs/ucx/logger.py @@ -0,0 +1,63 @@ +import logging +import sys + + +class NiceFormatter(logging.Formatter): + BOLD = "\033[1m" + RESET = "\033[0m" + GREEN = "\033[32m" + BLACK = "\033[30m" + CYAN = "\033[36m" + YELLOW = "\033[33m" + RED = "\033[31m" + MAGENTA = "\033[35m" + GRAY = "\033[90m" + + def __init__(self, *, probe_tty: bool = False) -> None: + super().__init__(fmt="%(asctime)s %(levelname)s [%(name)s] %(message)s", datefmt="%H:%M") + self._levels = { + logging.NOTSET: self._bold(f"{self.BLACK}TRACE"), + logging.DEBUG: self._bold(f"{self.CYAN}DEBUG"), + logging.INFO: self._bold(f"{self.GREEN} INFO"), + logging.WARNING: self._bold(f"{self.YELLOW} WARN"), + logging.ERROR: self._bold(f"{self.RED}ERROR"), + logging.CRITICAL: self._bold(f"{self.MAGENTA}FATAL"), + } + # show colors in runtime, github actions, and while debugging + self.colors = sys.stdout.isatty() if probe_tty else True + + def _bold(self, text): + return f"{self.BOLD}{text}{self.RESET}" + + def format(self, record: logging.LogRecord): # noqa: A003 + if not self.colors: + return super().format(record) + ts = self.formatTime(record, datefmt="%H:%M") + level = self._levels[record.levelno] + # databricks.labs.ucx.foo -> d.l.ucx.foo + name = ".".join(part if i > 1 else part[0] for i, part in enumerate(record.name.split("."))) + msg = record.msg + if record.exc_info and not record.exc_text: + record.exc_text = self.formatException(record.exc_info) + if record.exc_text: + msg += ": " + record.exc_text + if record.stack_info: + if msg[-1:] != "\n": + msg += "\n" + msg += self.formatStack(record.stack_info) + + color_marker = self.GRAY + if record.levelno in (logging.INFO, logging.WARNING): + color_marker = self.BLACK + self.BOLD + elif record.levelno in (logging.ERROR, logging.FATAL): + color_marker = self.RED + self.BOLD + return f"{self.GRAY}{ts}{self.RESET} {level} {color_marker}[{name}] {msg}{self.RESET}" + + +def _install(): + for h in logging.root.handlers: + logging.root.removeHandler(h) + console_handler = logging.StreamHandler(sys.stderr) + console_handler.setFormatter(NiceFormatter()) + console_handler.setLevel("DEBUG") + logging.root.addHandler(console_handler) diff --git a/src/databricks/labs/ucx/managers/group.py b/src/databricks/labs/ucx/managers/group.py index 7067a3d20f..3e7e9e330f 100644 --- a/src/databricks/labs/ucx/managers/group.py +++ b/src/databricks/labs/ucx/managers/group.py @@ -1,3 +1,4 @@ +import logging import typing from functools import partial @@ -10,9 +11,10 @@ GroupMigrationState, MigrationGroupInfo, ) -from databricks.labs.ucx.providers.logger import logger from databricks.labs.ucx.utils import ThreadedExecution +logger = logging.getLogger(__name__) + class GroupLevel(StrEnum): WORKSPACE = "workspace" diff --git a/src/databricks/labs/ucx/providers/client.py b/src/databricks/labs/ucx/providers/client.py index 5ec2194f69..76c7da81d7 100644 --- a/src/databricks/labs/ucx/providers/client.py +++ b/src/databricks/labs/ucx/providers/client.py @@ -1,4 +1,5 @@ import json +import logging from collections.abc import Iterator from databricks.sdk import WorkspaceClient @@ -7,7 +8,8 @@ from ratelimit import limits, sleep_and_retry from databricks.labs.ucx.inventory.types import RequestObjectType -from databricks.labs.ucx.providers.logger import logger + +logger = logging.getLogger(__name__) class ImprovedWorkspaceClient(WorkspaceClient): diff --git a/src/databricks/labs/ucx/providers/logger.py b/src/databricks/labs/ucx/providers/logger.py deleted file mode 100644 index ff1ba5e112..0000000000 --- a/src/databricks/labs/ucx/providers/logger.py +++ /dev/null @@ -1,18 +0,0 @@ -import logging -from logging import LogRecord - -from loguru import logger as _loguru_logger - -# reassigning the logger to the loguru logger -# for flexibility and simple dependency injection -logger = _loguru_logger - -_sdk_logger = logging.getLogger("databricks.sdk") - - -class _LoguruHandler(logging.Handler): - def emit(self, record: LogRecord) -> None: - logger.debug(record.msg) - - -_sdk_logger.addHandler(_LoguruHandler()) diff --git a/src/databricks/labs/ucx/providers/spark.py b/src/databricks/labs/ucx/providers/spark.py index e8b8d78afe..12bc5fecff 100644 --- a/src/databricks/labs/ucx/providers/spark.py +++ b/src/databricks/labs/ucx/providers/spark.py @@ -1,7 +1,9 @@ +import logging + from databricks.sdk import WorkspaceClient from pyspark.sql import SparkSession -from databricks.labs.ucx.providers.logger import logger +logger = logging.getLogger(__name__) class SparkMixin: diff --git a/src/databricks/labs/ucx/tacl/_internal.py b/src/databricks/labs/ucx/tacl/_internal.py index 2428199bf6..77b770bf54 100644 --- a/src/databricks/labs/ucx/tacl/_internal.py +++ b/src/databricks/labs/ucx/tacl/_internal.py @@ -1,13 +1,15 @@ import dataclasses +import logging import os from abc import ABC, abstractmethod from collections.abc import Iterator from databricks.sdk import WorkspaceClient -from databricks.labs.ucx.providers.logger import logger from databricks.labs.ucx.providers.mixins.sql import StatementExecutionExt +logger = logging.getLogger(__name__) + class SqlBackend(ABC): @abstractmethod diff --git a/src/databricks/labs/ucx/tacl/tables.py b/src/databricks/labs/ucx/tacl/tables.py index da78b0d7fa..568afc2c44 100644 --- a/src/databricks/labs/ucx/tacl/tables.py +++ b/src/databricks/labs/ucx/tacl/tables.py @@ -1,11 +1,13 @@ +import logging from collections.abc import Iterator from dataclasses import dataclass from functools import partial -from databricks.labs.ucx.providers.logger import logger from databricks.labs.ucx.tacl._internal import CrawlerBase, SqlBackend from databricks.labs.ucx.utils import ThreadedExecution +logger = logging.getLogger(__name__) + @dataclass class Table: diff --git a/src/databricks/labs/ucx/toolkits/group_migration.py b/src/databricks/labs/ucx/toolkits/group_migration.py index 7463798054..299ff40a6a 100644 --- a/src/databricks/labs/ucx/toolkits/group_migration.py +++ b/src/databricks/labs/ucx/toolkits/group_migration.py @@ -1,4 +1,4 @@ -import sys +import logging from databricks.labs.ucx.config import MigrationConfig from databricks.labs.ucx.inventory.inventorizer import Inventorizers @@ -6,7 +6,6 @@ from databricks.labs.ucx.inventory.table import InventoryTableManager from databricks.labs.ucx.managers.group import GroupManager from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient -from databricks.labs.ucx.providers.logger import logger class GroupMigrationToolkit: @@ -36,8 +35,8 @@ def _verify_ws_client(w: ImprovedWorkspaceClient): @staticmethod def _configure_logger(level: str): - logger.remove() # TODO: why removing loggers? - logger.add(sys.stderr, level=level) + ucx_logger = logging.getLogger("databricks.labs.ucx") + ucx_logger.setLevel(level) def prepare_environment(self): self.group_manager.prepare_groups_in_environment() diff --git a/src/databricks/labs/ucx/utils.py b/src/databricks/labs/ucx/utils.py index cc996ee3a1..de74b9ac9c 100644 --- a/src/databricks/labs/ucx/utils.py +++ b/src/databricks/labs/ucx/utils.py @@ -1,5 +1,6 @@ import concurrent import datetime as dt +import logging from collections.abc import Callable from concurrent.futures import ALL_COMPLETED, ThreadPoolExecutor from typing import Generic, TypeVar @@ -8,10 +9,10 @@ from databricks.labs.ucx.generic import StrEnum from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient -from databricks.labs.ucx.providers.logger import logger ExecutableResult = TypeVar("ExecutableResult") ExecutableFunction = Callable[..., ExecutableResult] +logger = logging.getLogger(__name__) class ProgressReporter: @@ -52,7 +53,7 @@ def gather(cls, name: str, tasks: list[ExecutableFunction]) -> list[ExecutableRe return cls(tasks, num_threads=4, progress_reporter=reporter).run() def run(self) -> list[ExecutableResult]: - logger.trace(f"Starting {len(self._executables)} tasks in {self._num_threads} threads") + logger.debug(f"Starting {len(self._executables)} tasks in {self._num_threads} threads") with ThreadPoolExecutor(self._num_threads) as executor: for executable in self._executables: @@ -64,7 +65,7 @@ def run(self) -> list[ExecutableResult]: results = concurrent.futures.wait(self._futures, return_when=ALL_COMPLETED) - logger.trace("Collecting the results from threaded execution") + logger.debug("Collecting the results from threaded execution") collected = [future.result() for future in results.done] return collected diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index e6f81cb97c..35235edb6d 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -46,7 +46,6 @@ from databricks.labs.ucx.config import InventoryTable from databricks.labs.ucx.inventory.types import RequestObjectType from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient -from databricks.labs.ucx.providers.logger import logger from databricks.labs.ucx.providers.mixins.sql import StatementExecutionExt from databricks.labs.ucx.utils import ThreadedExecution @@ -61,7 +60,10 @@ _set_random_permissions, ) -logging.getLogger("databricks.sdk").setLevel(logging.INFO) +logging.getLogger("tests").setLevel("DEBUG") +logging.getLogger("databricks.labs.ucx").setLevel("DEBUG") + +logger = logging.getLogger(__name__) NUM_TEST_GROUPS = int(os.environ.get("NUM_TEST_GROUPS", 5)) NUM_TEST_INSTANCE_PROFILES = int(os.environ.get("NUM_TEST_INSTANCE_PROFILES", 3)) diff --git a/tests/integration/test_e2e.py b/tests/integration/test_e2e.py index b4ea362d91..a8d4b2dc22 100644 --- a/tests/integration/test_e2e.py +++ b/tests/integration/test_e2e.py @@ -1,3 +1,4 @@ +import logging from typing import Literal import pytest @@ -20,12 +21,13 @@ from databricks.labs.ucx.inventory.types import RequestObjectType from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient from databricks.labs.ucx.providers.groups_info import GroupMigrationState -from databricks.labs.ucx.providers.logger import logger from databricks.labs.ucx.toolkits.group_migration import GroupMigrationToolkit from databricks.labs.ucx.utils import safe_get_acls from .utils import EnvironmentInfo, WorkspaceObjects +logger = logging.getLogger(__name__) + def _verify_group_permissions( objects: list | WorkspaceObjects | None, diff --git a/tests/integration/test_jobs.py b/tests/integration/test_jobs.py index 72093a57c2..d8bb57ea63 100644 --- a/tests/integration/test_jobs.py +++ b/tests/integration/test_jobs.py @@ -1,3 +1,5 @@ +import logging + import pytest from pyspark.errors import AnalysisException @@ -10,12 +12,13 @@ ) from databricks.labs.ucx.inventory.types import RequestObjectType from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient -from databricks.labs.ucx.providers.logger import logger from databricks.labs.ucx.toolkits.group_migration import GroupMigrationToolkit from .test_e2e import _verify_group_permissions, _verify_roles_and_entitlements from .utils import EnvironmentInfo +logger = logging.getLogger(__name__) + def test_jobs( env: EnvironmentInfo, diff --git a/tests/integration/test_setup.py b/tests/integration/test_setup.py index e1b6a80b3b..fee02461cc 100644 --- a/tests/integration/test_setup.py +++ b/tests/integration/test_setup.py @@ -1,12 +1,13 @@ +import logging from functools import partial import pytest from databricks.sdk import WorkspaceClient from databricks.sdk.service.iam import ComplexValue -from databricks.labs.ucx.providers.logger import logger from databricks.labs.ucx.utils import ThreadedExecution +logger = logging.getLogger(__name__) Threader = partial(ThreadedExecution, num_threads=40) diff --git a/tests/integration/test_tacls.py b/tests/integration/test_tacls.py index 6c8c0d938e..4f1c90e241 100644 --- a/tests/integration/test_tacls.py +++ b/tests/integration/test_tacls.py @@ -1,9 +1,11 @@ +import logging import os from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient -from databricks.labs.ucx.providers.logger import logger from databricks.labs.ucx.toolkits.table_acls import TaclToolkit +logger = logging.getLogger(__name__) + def test_describe_all_tables(ws: ImprovedWorkspaceClient, make_catalog, make_schema, make_table): warehouse_id = os.environ["TEST_DEFAULT_WAREHOUSE_ID"] diff --git a/tests/integration/utils.py b/tests/integration/utils.py index 15d2379498..e9cd656786 100644 --- a/tests/integration/utils.py +++ b/tests/integration/utils.py @@ -1,3 +1,4 @@ +import logging import random from dataclasses import dataclass from functools import partial @@ -17,9 +18,10 @@ from databricks.labs.ucx.inventory.types import RequestObjectType from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient -from databricks.labs.ucx.providers.logger import logger from databricks.labs.ucx.utils import WorkspaceLevelEntitlement +logger = logging.getLogger(__name__) + @dataclass class InstanceProfile: diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index e69de29bb2..e581930fef 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -0,0 +1,7 @@ +import logging + +from databricks.labs.ucx.logger import _install + +_install() + +logging.getLogger("tests").setLevel("DEBUG") diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 52297e1a8c..27fe1310fd 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -1,3 +1,4 @@ +import logging import shutil import tempfile from pathlib import Path @@ -6,7 +7,7 @@ from delta import configure_spark_with_delta_pip from pyspark.sql import SparkSession -from databricks.labs.ucx.providers.logger import logger +logger = logging.getLogger(__name__) @pytest.fixture(scope="session") diff --git a/tests/unit/test_logger.py b/tests/unit/test_logger.py new file mode 100644 index 0000000000..8194896c18 --- /dev/null +++ b/tests/unit/test_logger.py @@ -0,0 +1,10 @@ +import logging + + +def test_logger(): + logger = logging.getLogger(__name__) + logger.debug("This is a debug message") + logger.info("This is an info message") + logger.warning("This is a warning message") + logger.error("This is an error message", exc_info=KeyError(123)) + logger.critical("This is a critical message")