Skip to content

Commit

Permalink
Properly integrate Python logging facility (#118)
Browse files Browse the repository at this point in the history
- removed `loguru`
- added `databricks.labs.ucx.logger.NiceFormatter`

Fix #87

<img width="733" alt="image"
src="https://github.com/databricks/ucx/assets/259697/159a84ae-938d-44e1-a986-7b241aa79f04">
  • Loading branch information
nfx committed Aug 29, 2023
1 parent 32dd2dd commit 9d5bfac
Show file tree
Hide file tree
Showing 25 changed files with 146 additions and 46 deletions.
6 changes: 6 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ Integration tests:
- tests that require their own unique fixture setup must limit the wall clock time of fixture initialization to under one second.
- each integration test must be debuggable in IntelliJ IDEA (Community Edition) with the Python plugin (community edition).

Logging:

- get current logger via `import logging; logger = logging.getLogger(__name__)`. It can be done at the to of the module as well.
- enable debug logging via `import logging; logging.getLogger("databricks").setLevel("DEBUG")`
- enable debug logging only for UCX via `import logging; logging.getLogger("databricks.labs.ucx").setLevel("DEBUG")`

IDE setup:

- The only supported IDE for developing this project is based on IntelliJ. This means that both PyCharm (commercial) and IntelliJ IDEA (Community Edition) with Python plugin (community edition) are supported.
Expand Down
6 changes: 2 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ dependencies = [
"typer[all]>=0.9.0,<0.10.0",
"pyhocon>=0.3.60,<0.4.0",
"pydantic>=2.0.3, <3.0.0",
"loguru>=0.7.0, <1.0.0",
"PyYAML>=6.0.0,<7.0.0",
"ratelimit>=2.2.1,<3.0.0",
"pandas>=2.0.3,<3.0.0",
Expand Down Expand Up @@ -89,14 +88,14 @@ dependencies = [
]
[tool.hatch.envs.lint.scripts]
fmt = [
"isort .",
"black .",
"ruff . --fix",
"isort ."
]
verify = [
"black --check .",
"isort . --check-only",
"ruff .",
"isort . --check-only"
]

[tool.isort]
Expand All @@ -107,7 +106,6 @@ profile = "black"

[tool.pytest.ini_options]
addopts = "-s -p no:warnings -vv --cache-clear"
log_cli = true
filterwarnings = [
"ignore:::.*pyspark.broadcast*",
"ignore:::.*pyspark.sql.pandas.utils*"
Expand Down
3 changes: 3 additions & 0 deletions src/databricks/labs/ucx/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from databricks.labs.ucx.logger import _install

_install()
3 changes: 2 additions & 1 deletion src/databricks/labs/ucx/inventory/inventorizer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import logging
from abc import ABC, abstractmethod
from collections.abc import Callable, Iterator
from functools import partial
Expand All @@ -24,10 +25,10 @@
)
from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient
from databricks.labs.ucx.providers.groups_info import GroupMigrationState
from databricks.labs.ucx.providers.logger import logger
from databricks.labs.ucx.utils import ProgressReporter, ThreadedExecution

InventoryObject = TypeVar("InventoryObject")
logger = logging.getLogger(__name__)


class BaseInventorizer(ABC, Generic[InventoryObject]):
Expand Down
4 changes: 3 additions & 1 deletion src/databricks/labs/ucx/inventory/listing.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import datetime as dt
import logging
from concurrent.futures import FIRST_COMPLETED, ThreadPoolExecutor, wait
from itertools import groupby

from databricks.sdk.service.workspace import ObjectInfo, ObjectType

from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient
from databricks.labs.ucx.providers.logger import logger

logger = logging.getLogger(__name__)


class WorkspaceListing:
Expand Down
4 changes: 3 additions & 1 deletion src/databricks/labs/ucx/inventory/permissions.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import random
import time
from copy import deepcopy
Expand All @@ -20,9 +21,10 @@
)
from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient
from databricks.labs.ucx.providers.groups_info import GroupMigrationState
from databricks.labs.ucx.providers.logger import logger
from databricks.labs.ucx.utils import ThreadedExecution, safe_get_acls

logger = logging.getLogger(__name__)


@dataclass
class PermissionRequestPayload:
Expand Down
5 changes: 4 additions & 1 deletion src/databricks/labs/ucx/inventory/table.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import logging

import pandas as pd
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.iam import ObjectPermissions
Expand All @@ -11,9 +13,10 @@
PermissionsInventoryItem,
RequestObjectType,
)
from databricks.labs.ucx.providers.logger import logger
from databricks.labs.ucx.providers.spark import SparkMixin

logger = logging.getLogger(__name__)


class InventoryTableManager(SparkMixin):
def __init__(self, config: InventoryConfig, ws: WorkspaceClient):
Expand Down
63 changes: 63 additions & 0 deletions src/databricks/labs/ucx/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import logging
import sys


class NiceFormatter(logging.Formatter):
BOLD = "\033[1m"
RESET = "\033[0m"
GREEN = "\033[32m"
BLACK = "\033[30m"
CYAN = "\033[36m"
YELLOW = "\033[33m"
RED = "\033[31m"
MAGENTA = "\033[35m"
GRAY = "\033[90m"

def __init__(self, *, probe_tty: bool = False) -> None:
super().__init__(fmt="%(asctime)s %(levelname)s [%(name)s] %(message)s", datefmt="%H:%M")
self._levels = {
logging.NOTSET: self._bold(f"{self.BLACK}TRACE"),
logging.DEBUG: self._bold(f"{self.CYAN}DEBUG"),
logging.INFO: self._bold(f"{self.GREEN} INFO"),
logging.WARNING: self._bold(f"{self.YELLOW} WARN"),
logging.ERROR: self._bold(f"{self.RED}ERROR"),
logging.CRITICAL: self._bold(f"{self.MAGENTA}FATAL"),
}
# show colors in runtime, github actions, and while debugging
self.colors = sys.stdout.isatty() if probe_tty else True

def _bold(self, text):
return f"{self.BOLD}{text}{self.RESET}"

def format(self, record: logging.LogRecord): # noqa: A003
if not self.colors:
return super().format(record)
ts = self.formatTime(record, datefmt="%H:%M")
level = self._levels[record.levelno]
# databricks.labs.ucx.foo -> d.l.ucx.foo
name = ".".join(part if i > 1 else part[0] for i, part in enumerate(record.name.split(".")))
msg = record.msg
if record.exc_info and not record.exc_text:
record.exc_text = self.formatException(record.exc_info)
if record.exc_text:
msg += ": " + record.exc_text
if record.stack_info:
if msg[-1:] != "\n":
msg += "\n"
msg += self.formatStack(record.stack_info)

color_marker = self.GRAY
if record.levelno in (logging.INFO, logging.WARNING):
color_marker = self.BLACK + self.BOLD
elif record.levelno in (logging.ERROR, logging.FATAL):
color_marker = self.RED + self.BOLD
return f"{self.GRAY}{ts}{self.RESET} {level} {color_marker}[{name}] {msg}{self.RESET}"


def _install():
for h in logging.root.handlers:
logging.root.removeHandler(h)
console_handler = logging.StreamHandler(sys.stderr)
console_handler.setFormatter(NiceFormatter())
console_handler.setLevel("DEBUG")
logging.root.addHandler(console_handler)
4 changes: 3 additions & 1 deletion src/databricks/labs/ucx/managers/group.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import typing
from functools import partial

Expand All @@ -10,9 +11,10 @@
GroupMigrationState,
MigrationGroupInfo,
)
from databricks.labs.ucx.providers.logger import logger
from databricks.labs.ucx.utils import ThreadedExecution

logger = logging.getLogger(__name__)


class GroupLevel(StrEnum):
WORKSPACE = "workspace"
Expand Down
4 changes: 3 additions & 1 deletion src/databricks/labs/ucx/providers/client.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import logging
from collections.abc import Iterator

from databricks.sdk import WorkspaceClient
Expand All @@ -7,7 +8,8 @@
from ratelimit import limits, sleep_and_retry

from databricks.labs.ucx.inventory.types import RequestObjectType
from databricks.labs.ucx.providers.logger import logger

logger = logging.getLogger(__name__)


class ImprovedWorkspaceClient(WorkspaceClient):
Expand Down
18 changes: 0 additions & 18 deletions src/databricks/labs/ucx/providers/logger.py

This file was deleted.

4 changes: 3 additions & 1 deletion src/databricks/labs/ucx/providers/spark.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import logging

from databricks.sdk import WorkspaceClient
from pyspark.sql import SparkSession

from databricks.labs.ucx.providers.logger import logger
logger = logging.getLogger(__name__)


class SparkMixin:
Expand Down
4 changes: 3 additions & 1 deletion src/databricks/labs/ucx/tacl/_internal.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import dataclasses
import logging
import os
from abc import ABC, abstractmethod
from collections.abc import Iterator

from databricks.sdk import WorkspaceClient

from databricks.labs.ucx.providers.logger import logger
from databricks.labs.ucx.providers.mixins.sql import StatementExecutionExt

logger = logging.getLogger(__name__)


class SqlBackend(ABC):
@abstractmethod
Expand Down
4 changes: 3 additions & 1 deletion src/databricks/labs/ucx/tacl/tables.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import logging
from collections.abc import Iterator
from dataclasses import dataclass
from functools import partial

from databricks.labs.ucx.providers.logger import logger
from databricks.labs.ucx.tacl._internal import CrawlerBase, SqlBackend
from databricks.labs.ucx.utils import ThreadedExecution

logger = logging.getLogger(__name__)


@dataclass
class Table:
Expand Down
7 changes: 3 additions & 4 deletions src/databricks/labs/ucx/toolkits/group_migration.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import sys
import logging

from databricks.labs.ucx.config import MigrationConfig
from databricks.labs.ucx.inventory.inventorizer import Inventorizers
from databricks.labs.ucx.inventory.permissions import PermissionManager
from databricks.labs.ucx.inventory.table import InventoryTableManager
from databricks.labs.ucx.managers.group import GroupManager
from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient
from databricks.labs.ucx.providers.logger import logger


class GroupMigrationToolkit:
Expand Down Expand Up @@ -36,8 +35,8 @@ def _verify_ws_client(w: ImprovedWorkspaceClient):

@staticmethod
def _configure_logger(level: str):
logger.remove() # TODO: why removing loggers?
logger.add(sys.stderr, level=level)
ucx_logger = logging.getLogger("databricks.labs.ucx")
ucx_logger.setLevel(level)

def prepare_environment(self):
self.group_manager.prepare_groups_in_environment()
Expand Down
7 changes: 4 additions & 3 deletions src/databricks/labs/ucx/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import concurrent
import datetime as dt
import logging
from collections.abc import Callable
from concurrent.futures import ALL_COMPLETED, ThreadPoolExecutor
from typing import Generic, TypeVar
Expand All @@ -8,10 +9,10 @@

from databricks.labs.ucx.generic import StrEnum
from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient
from databricks.labs.ucx.providers.logger import logger

ExecutableResult = TypeVar("ExecutableResult")
ExecutableFunction = Callable[..., ExecutableResult]
logger = logging.getLogger(__name__)


class ProgressReporter:
Expand Down Expand Up @@ -52,7 +53,7 @@ def gather(cls, name: str, tasks: list[ExecutableFunction]) -> list[ExecutableRe
return cls(tasks, num_threads=4, progress_reporter=reporter).run()

def run(self) -> list[ExecutableResult]:
logger.trace(f"Starting {len(self._executables)} tasks in {self._num_threads} threads")
logger.debug(f"Starting {len(self._executables)} tasks in {self._num_threads} threads")

with ThreadPoolExecutor(self._num_threads) as executor:
for executable in self._executables:
Expand All @@ -64,7 +65,7 @@ def run(self) -> list[ExecutableResult]:

results = concurrent.futures.wait(self._futures, return_when=ALL_COMPLETED)

logger.trace("Collecting the results from threaded execution")
logger.debug("Collecting the results from threaded execution")
collected = [future.result() for future in results.done]
return collected

Expand Down
6 changes: 4 additions & 2 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
from databricks.labs.ucx.config import InventoryTable
from databricks.labs.ucx.inventory.types import RequestObjectType
from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient
from databricks.labs.ucx.providers.logger import logger
from databricks.labs.ucx.providers.mixins.sql import StatementExecutionExt
from databricks.labs.ucx.utils import ThreadedExecution

Expand All @@ -61,7 +60,10 @@
_set_random_permissions,
)

logging.getLogger("databricks.sdk").setLevel(logging.INFO)
logging.getLogger("tests").setLevel("DEBUG")
logging.getLogger("databricks.labs.ucx").setLevel("DEBUG")

logger = logging.getLogger(__name__)

NUM_TEST_GROUPS = int(os.environ.get("NUM_TEST_GROUPS", 5))
NUM_TEST_INSTANCE_PROFILES = int(os.environ.get("NUM_TEST_INSTANCE_PROFILES", 3))
Expand Down
4 changes: 3 additions & 1 deletion tests/integration/test_e2e.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from typing import Literal

import pytest
Expand All @@ -20,12 +21,13 @@
from databricks.labs.ucx.inventory.types import RequestObjectType
from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient
from databricks.labs.ucx.providers.groups_info import GroupMigrationState
from databricks.labs.ucx.providers.logger import logger
from databricks.labs.ucx.toolkits.group_migration import GroupMigrationToolkit
from databricks.labs.ucx.utils import safe_get_acls

from .utils import EnvironmentInfo, WorkspaceObjects

logger = logging.getLogger(__name__)


def _verify_group_permissions(
objects: list | WorkspaceObjects | None,
Expand Down
5 changes: 4 additions & 1 deletion tests/integration/test_jobs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import logging

import pytest
from pyspark.errors import AnalysisException

Expand All @@ -10,12 +12,13 @@
)
from databricks.labs.ucx.inventory.types import RequestObjectType
from databricks.labs.ucx.providers.client import ImprovedWorkspaceClient
from databricks.labs.ucx.providers.logger import logger
from databricks.labs.ucx.toolkits.group_migration import GroupMigrationToolkit

from .test_e2e import _verify_group_permissions, _verify_roles_and_entitlements
from .utils import EnvironmentInfo

logger = logging.getLogger(__name__)


def test_jobs(
env: EnvironmentInfo,
Expand Down
Loading

0 comments on commit 9d5bfac

Please sign in to comment.