Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Propagate source location information within the import package dependency graph #1431

Merged
merged 2 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/databricks/labs/ucx/source_code/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ def as_deprecation(self) -> 'Deprecation':
def as_convention(self) -> 'Convention':
return Convention(**self.__dict__)

def as_location(self) -> 'Location':
return Location(**self.__dict__)


class Advisory(Advice):
"""A warning that does not prevent the code from running."""
Expand All @@ -69,6 +72,10 @@ class Convention(Advice):
"""A suggestion for a better way to write the code."""


class Location(Advice):
"""A location in the code, which is not yet an advice"""


class Linter:
@abstractmethod
def lint(self, code: str) -> Iterable[Advice]: ...
Expand Down
111 changes: 86 additions & 25 deletions src/databricks/labs/ucx/source_code/dependencies.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,26 @@
from __future__ import annotations

import abc
from abc import ABC, abstractmethod
from collections.abc import Callable, Iterable

from databricks.sdk.service.workspace import ObjectType, ObjectInfo, ExportFormat, Language
from databricks.sdk import WorkspaceClient

from databricks.labs.ucx.source_code.base import Advice, Deprecation
from databricks.labs.ucx.source_code.base import Advice
from databricks.labs.ucx.source_code.python_linter import ImportSource
from databricks.labs.ucx.source_code.whitelist import Whitelist, UCCompatibility


class AbstractVisitor(ABC):
@abstractmethod
def process(self, graph) -> bool | None:
raise NotImplementedError()

@abstractmethod
def get_advices(self) -> Iterable[Advice]:
raise NotImplementedError()


class Dependency:

@staticmethod
Expand All @@ -18,9 +29,20 @@ def from_object_info(object_info: ObjectInfo):
assert object_info.path is not None
return Dependency(object_info.object_type, object_info.path)

def __init__(self, object_type: ObjectType | None, path: str):
def __init__(self, object_type: ObjectType | None, path: str, source: ImportSource | None = None):
self._type = object_type
self._path = path
self._source = source
self._compatibility: UCCompatibility | None = None

@property
def compatibility(self) -> UCCompatibility | None:
return self._compatibility

# cannot do this in construction as the resolver must decide the compatibility
@compatibility.setter
def compatibility(self, value: UCCompatibility):
self._compatibility = value

@property
def type(self) -> ObjectType | None:
Expand All @@ -30,21 +52,25 @@ def type(self) -> ObjectType | None:
def path(self) -> str:
return self._path

@property
def source(self) -> ImportSource | None:
return self._source

def __hash__(self):
return hash(self.path)

def __eq__(self, other):
return isinstance(other, Dependency) and self.path == other.path


class SourceContainer(abc.ABC):
class SourceContainer(ABC):

@property
@abc.abstractmethod
@abstractmethod
def object_type(self) -> ObjectType:
raise NotImplementedError()

@abc.abstractmethod
@abstractmethod
def build_dependency_graph(self, graph) -> None:
raise NotImplementedError()

Expand Down Expand Up @@ -108,31 +134,17 @@ def _load_source(self, object_info: ObjectInfo) -> str:
class DependencyResolver:
def __init__(self, whitelist: Whitelist | None = None):
self._whitelist = Whitelist() if whitelist is None else whitelist
self._advices: list[Advice] = []

def compatibility(self, dependency: Dependency) -> UCCompatibility | None:
return self._whitelist.compatibility(dependency.path)

def resolve(self, dependency: Dependency) -> Dependency | None:
if dependency.type == ObjectType.NOTEBOOK:
return dependency
compatibility = self._whitelist.compatibility(dependency.path)
dependency.compatibility = self.compatibility(dependency)
# TODO attach compatibility to dependency, see https://github.com/databrickslabs/ucx/issues/1382
if compatibility is not None:
if compatibility == UCCompatibility.NONE:
self._advices.append(
Deprecation(
code="dependency-check",
message=f"Use of dependency {dependency.path} is deprecated",
start_line=0,
start_col=0,
end_line=0,
end_col=0,
)
)
return None
return dependency

def get_advices(self) -> Iterable[Advice]:
yield from self._advices


class DependencyGraph:

Expand All @@ -148,6 +160,7 @@ def __init__(
self._loader = loader
self._resolver = resolver or DependencyResolver()
self._dependencies: dict[Dependency, DependencyGraph] = {}
self._processors: list[AbstractVisitor] = []

@property
def dependency(self):
Expand All @@ -157,19 +170,33 @@ def dependency(self):
def path(self):
return self._dependency.path

def register_processors(self, *processors: AbstractVisitor):
"""Register processors to be run on the graph via the process method"""
if not processors:
raise ValueError("At least one processor must be provided")
self._processors.extend(processors)

def register_dependency(self, dependency: Dependency) -> DependencyGraph | None:
resolved = self._resolver.resolve(dependency)
if resolved is None:
return None

# already registered ?
child_graph = self.locate_dependency(resolved)
if child_graph is not None:
self._dependencies[resolved] = child_graph
return child_graph

# nay, create the child graph and populate it
child_graph = DependencyGraph(resolved, self, self._loader, self._resolver)
self._dependencies[resolved] = child_graph
if resolved.compatibility:
# We do not descend into dependencies that are already known in some
# way as they will be linted or migrated.
return None

container = self._loader.load_dependency(resolved)

if not container:
return None
container.build_dependency_graph(child_graph)
Expand Down Expand Up @@ -204,16 +231,30 @@ def dependencies(self) -> set[Dependency]:
def add_to_dependencies(graph: DependencyGraph) -> bool:
if graph.dependency in dependencies:
return True

dependencies.add(graph.dependency)
return False

self.visit(add_to_dependencies)
# Do not add self as a dependency on itself
self.visit_children(add_to_dependencies)

return dependencies

@property
def paths(self) -> set[str]:
return {d.path for d in self.dependencies}

@property
def dependency_count(self) -> int:
return len(self._dependencies)

def visit_children(self, visit_node: Callable[[DependencyGraph], bool | None]) -> bool | None:
"""Use when it is important not to visit the root node"""
for dependency in self._dependencies.values():
if dependency.visit(visit_node):
return True
return False

# when visit_node returns True it interrupts the visit
def visit(self, visit_node: Callable[[DependencyGraph], bool | None]) -> bool | None:
if visit_node(self):
Expand All @@ -222,3 +263,23 @@ def visit(self, visit_node: Callable[[DependencyGraph], bool | None]) -> bool |
if dependency.visit(visit_node):
return True
return False

def walk(self, visitor: AbstractVisitor) -> bool | None:
"""
Process the graph with the given visitor, used by linters or any other processing
system after the graph is constructed
"""
if visitor.process(self):
return True
for dependency in self._dependencies.values():
if dependency.walk(visitor):
return True
return False

def process(self) -> Iterable[Advice]:
for processor in self._processors:
self.walk(processor)
yield from processor.get_advices()

def compatibility(self, dependency: Dependency) -> UCCompatibility | None:
return self._resolver.compatibility(dependency)
48 changes: 48 additions & 0 deletions src/databricks/labs/ucx/source_code/dependency_linters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from collections.abc import Iterable

from databricks.labs.ucx.source_code.base import Advice, Deprecation
from databricks.labs.ucx.source_code.dependencies import AbstractVisitor
from databricks.labs.ucx.source_code.whitelist import UCCompatibility


class ImportChecker(AbstractVisitor):

def __init__(self):
self._advices: list[Advice] = []
self._trace: list[str] = []

def _build_trace(self):
return " <- ".join(self._trace)

def process(self, graph) -> bool | None:
dependency = graph.dependency
self._trace.append(dependency.path)
compatibility = graph.compatibility(dependency)
if compatibility and compatibility == UCCompatibility.NONE:
if dependency.source is not None:
import_type = dependency.source.location.code
self._advices.append(
dependency.source.location.as_deprecation().replace(
code="dependency-check",
message="Deprecated " + import_type + ": " + self._build_trace(),
)
)

else:
self._advices.append(
Deprecation(
code="dependency-check",
message=f"Use of dependency {dependency.path} is deprecated",
start_col=0,
end_col=0,
start_line=0,
end_line=0,
)
)
if graph.dependency_count == 0 and self._trace:
self._trace.pop()
# Do not interrupt the visit
return False

def get_advices(self) -> Iterable[Advice]:
yield from self._advices
6 changes: 3 additions & 3 deletions src/databricks/labs/ucx/source_code/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ def build_dependency_graph(self, graph: DependencyGraph) -> None:
for path in notebook_paths:
graph.register_dependency(Dependency(ObjectType.NOTEBOOK, path))
# TODO https://github.com/databrickslabs/ucx/issues/1287
import_names = PythonLinter.list_import_sources(linter)
for import_name in import_names:
imports = PythonLinter.list_import_sources(linter)
for import_source in imports:
# we don't know yet if it's a file or a library
graph.register_dependency(Dependency(None, import_name))
graph.register_dependency(Dependency(None, import_source.import_string, import_source))


class WorkspaceFile(SourceFile):
Expand Down
6 changes: 3 additions & 3 deletions src/databricks/labs/ucx/source_code/notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ def build_dependency_graph(self, parent: DependencyGraph):
if isinstance(path, ast.Constant):
dependency = Dependency(ObjectType.NOTEBOOK, path.value.strip("'").strip('"'))
parent.register_dependency(dependency)
names = PythonLinter.list_import_sources(linter)
for name in names:
dependency = Dependency(None, name)
imports = PythonLinter.list_import_sources(linter)
for import_dependency in imports:
dependency = Dependency(None, import_dependency.import_string, import_dependency)
parent.register_dependency(dependency)


Expand Down
Loading
Loading