From 5860a3cc923464b2cbf9863a964562d4861dcaab Mon Sep 17 00:00:00 2001 From: Eric Vergnaud Date: Tue, 7 May 2024 22:58:12 +0200 Subject: [PATCH] Simulate loading of local files or notebooks after manipulation of `sys.path` (#1633) ## Changes Update PathLookup in sequence during building of dependency graph ### Linked issues closes #1202 Resolves #1468 ### Functionality - [ ] added relevant user documentation - [ ] added new CLI command - [ ] modified existing command: `databricks labs ucx ...` - [ ] added a new workflow - [ ] modified existing workflow: `...` - [ ] added a new table - [ ] modified existing table: `...` ### Tests - [ ] manually tested - [x] added unit tests - [ ] added integration tests - [ ] verified on staging environment (screenshot attached) --------- Co-authored-by: Serge Smertin --- pyproject.toml | 1 + .../labs/ucx/contexts/application.py | 16 +- src/databricks/labs/ucx/source_code/files.py | 8 +- src/databricks/labs/ucx/source_code/graph.py | 195 ++++++++++-------- src/databricks/labs/ucx/source_code/jobs.py | 6 +- .../labs/ucx/source_code/notebooks/cells.py | 65 ++---- .../labs/ucx/source_code/notebooks/loaders.py | 2 +- .../labs/ucx/source_code/notebooks/sources.py | 8 +- .../labs/ucx/source_code/path_lookup.py | 13 +- .../labs/ucx/source_code/python_linter.py | 124 +++++++---- .../labs/ucx/source_code/whitelist.py | 68 +++--- tests/unit/__init__.py | 22 +- .../via-sys-path/import_file_1.py | 8 + .../via-sys-path/import_file_2.py | 8 + .../via-sys-path/run_notebook_1.py | 4 + .../via-sys-path/run_notebook_2.py | 4 + .../via-sys-path/run_notebook_4.py | 8 + .../via-sys-path/some-folder/some_file.py | 2 + .../via-sys-path/some-folder/some_notebook.py | 2 + tests/unit/source_code/test_dependencies.py | 122 ++++++----- tests/unit/source_code/test_notebook.py | 12 +- tests/unit/source_code/test_path_lookup.py | 16 -- .../test_path_lookup_simulation.py | 116 ++++++++++- tests/unit/source_code/test_python_linter.py | 26 +-- tests/unit/source_code/test_s3fs.py | 15 +- tests/unit/source_code/test_whitelist.py | 2 +- 26 files changed, 500 insertions(+), 373 deletions(-) create mode 100644 tests/unit/source_code/samples/simulate-sys-path/via-sys-path/import_file_1.py create mode 100644 tests/unit/source_code/samples/simulate-sys-path/via-sys-path/import_file_2.py create mode 100644 tests/unit/source_code/samples/simulate-sys-path/via-sys-path/run_notebook_1.py create mode 100644 tests/unit/source_code/samples/simulate-sys-path/via-sys-path/run_notebook_2.py create mode 100644 tests/unit/source_code/samples/simulate-sys-path/via-sys-path/run_notebook_4.py create mode 100644 tests/unit/source_code/samples/simulate-sys-path/via-sys-path/some-folder/some_file.py create mode 100644 tests/unit/source_code/samples/simulate-sys-path/via-sys-path/some-folder/some_notebook.py diff --git a/pyproject.toml b/pyproject.toml index 1893d41ad0..7bdbdcbfdc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,6 +111,7 @@ skip-string-normalization = true cache-dir = ".venv/ruff-cache" target-version = "py310" line-length = 120 +exclude = ["tests/unit/source_code/samples/*"] [tool.ruff.lint] ignore = [ diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py index d0dd01338f..1f9f6b68f7 100644 --- a/src/databricks/labs/ucx/contexts/application.py +++ b/src/databricks/labs/ucx/contexts/application.py @@ -41,7 +41,7 @@ ) from databricks.labs.ucx.source_code.files import FileLoader, LocalFileResolver from databricks.labs.ucx.source_code.path_lookup import PathLookup -from databricks.labs.ucx.source_code.graph import DependencyResolver, DependencyGraphBuilder +from databricks.labs.ucx.source_code.graph import DependencyResolver from databricks.labs.ucx.source_code.whitelist import WhitelistResolver, Whitelist from databricks.labs.ucx.source_code.site_packages import SitePackageResolver, SitePackages from databricks.labs.ucx.source_code.languages import Languages @@ -390,17 +390,9 @@ def file_resolver(self): @cached_property def dependency_resolver(self): - # TODO: link back self.site_packages_resolver and self.whitelist_resolver, - return DependencyResolver( - [ - self.notebook_resolver, - self.file_resolver, - ] - ) - - @cached_property - def dependency_graph_builder(self): - return DependencyGraphBuilder(self.dependency_resolver, self.path_lookup) + # TODO: link back self.site_packages_resolver + resolvers = [self.notebook_resolver, self.file_resolver, self.whitelist_resolver] + return DependencyResolver(resolvers, self.path_lookup) @cached_property def workflow_linter(self): diff --git a/src/databricks/labs/ucx/source_code/files.py b/src/databricks/labs/ucx/source_code/files.py index 88b8955f91..122128aacc 100644 --- a/src/databricks/labs/ucx/source_code/files.py +++ b/src/databricks/labs/ucx/source_code/files.py @@ -33,11 +33,7 @@ def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProb if self._language is not CellLanguage.PYTHON: logger.warning(f"Unsupported language: {self._language.language}") return [] - maybe = parent.build_graph_from_python_source(self._original_code) - problems = [] - for problem in maybe.problems: - problems.append(problem.replace(source_path=self._path)) - return problems + return parent.build_graph_from_python_source(self._original_code) @property def path(self) -> Path: @@ -128,6 +124,8 @@ def resolve_local_file(self, path_lookup, path: Path) -> MaybeDependency: return super().resolve_local_file(path_lookup, path) def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency: + if not name: + return MaybeDependency(None, [DependencyProblem("ucx-bug", "Import name is empty")]) parts = [] # Relative imports use leading dots. A single leading dot indicates a relative import, starting with # the current package. Two or more leading dots indicate a relative import to the parent(s) of the current diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index ca271e2cf9..6fe5fefa1d 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -1,14 +1,19 @@ from __future__ import annotations import abc -import ast -import sys from dataclasses import dataclass from pathlib import Path from collections.abc import Callable from databricks.labs.ucx.source_code.base import Advisory -from databricks.labs.ucx.source_code.python_linter import ASTLinter, PythonLinter +from databricks.labs.ucx.source_code.python_linter import ( + ASTLinter, + PythonLinter, + SysPathChange, + NotebookRunCall, + ImportSource, + NodeBase, +) from databricks.labs.ucx.source_code.path_lookup import PathLookup @@ -46,17 +51,21 @@ def register_library(self, name: str) -> MaybeGraph: # TODO: https://github.com/databrickslabs/ucx/issues/1640 return MaybeGraph(None, [DependencyProblem('not-yet-implemented', f'Library dependency: {name}')]) - def register_notebook(self, path: Path) -> MaybeGraph: + def register_notebook(self, path: Path) -> list[DependencyProblem]: maybe = self._resolver.resolve_notebook(self.path_lookup, path) if not maybe.dependency: - return MaybeGraph(None, maybe.problems) - return self.register_dependency(maybe.dependency) + return maybe.problems + maybe_graph = self.register_dependency(maybe.dependency) + return maybe_graph.problems - def register_import(self, name: str) -> MaybeGraph: + def register_import(self, name: str) -> list[DependencyProblem]: + if not name: + return [DependencyProblem('import-empty', 'Empty import name')] maybe = self._resolver.resolve_import(self.path_lookup, name) if not maybe.dependency: - return MaybeGraph(None, maybe.problems) - return self.register_dependency(maybe.dependency) + return maybe.problems + maybe_graph = self.register_dependency(maybe.dependency) + return maybe_graph.problems def register_dependency(self, dependency: Dependency) -> MaybeGraph: # TODO: this has to be a private method, because we don't want to allow free-form dependencies. @@ -74,7 +83,15 @@ def register_dependency(self, dependency: Dependency) -> MaybeGraph: problem = DependencyProblem('dependency-register-failed', 'Failed to register dependency', dependency.path) return MaybeGraph(child_graph, [problem]) problems = container.build_dependency_graph(child_graph) - return MaybeGraph(child_graph, problems) + return MaybeGraph( + child_graph, + [ + problem.replace( + source_path=dependency.path if problem.is_path_missing() else problem.source_path, + ) + for problem in problems + ], + ) def locate_dependency(self, path: Path) -> MaybeGraph: # need a list since unlike JS, Python won't let you assign closure variables @@ -127,7 +144,7 @@ def all_paths(self) -> set[Path]: return {d.path for d in self.all_dependencies} def all_relative_names(self) -> set[str]: - """This method is intented to simplify testing""" + """This method is intended to simplify testing""" return {d.path.relative_to(self._path_lookup.cwd).as_posix() for d in self.all_dependencies} # when visit_node returns True it interrupts the visit @@ -142,51 +159,45 @@ def visit(self, visit_node: Callable[[DependencyGraph], bool | None], visited: s return True return False - def _traverse_notebook_run(self, call: ast.Call): - notebook_path_arg = PythonLinter.get_dbutils_notebook_run_path_arg(call) - if not isinstance(notebook_path_arg, ast.Constant): - yield DependencyProblem( - code='dependency-not-constant', - message="Can't check dependency not provided as a constant", - start_line=call.lineno, - start_col=call.col_offset, - end_line=call.end_lineno or 0, - end_col=call.end_col_offset or 0, - ) - return - notebook_path = notebook_path_arg.value - maybe = self.register_notebook(Path(notebook_path)) - for problem in maybe.problems: - yield problem.replace( - start_line=call.lineno, - start_col=call.col_offset, - end_line=call.end_lineno or 0, - end_col=call.end_col_offset or 0, - ) - - def build_graph_from_python_source(self, python_code: str) -> MaybeGraph: - linter = ASTLinter.parse(python_code) + def build_graph_from_python_source(self, python_code: str) -> list[DependencyProblem]: problems: list[DependencyProblem] = [] - run_notebook_calls = PythonLinter.list_dbutils_notebook_run_calls(linter) - for call in run_notebook_calls: - assert isinstance(call, ast.Call) - for problem in self._traverse_notebook_run(call): - problems.append(problem) - for import_name, node in PythonLinter.list_import_sources(linter): - if import_name.split('.')[0] in {'databricks'} | sys.stdlib_module_names: - # TODO: redundant: see databricks.labs.ucx.source_code.whitelist.Whitelist.__init__ - # we don't need to register stdlib or databricks.* imports - continue - maybe = self.register_import(import_name) - for problem in maybe.problems: - with_lines = problem.replace( - start_line=node.lineno, - start_col=node.col_offset, - end_line=node.end_lineno or 0, - end_col=node.end_col_offset or 0, + linter = ASTLinter.parse(python_code) + syspath_changes = PythonLinter.list_sys_path_changes(linter) + run_calls = PythonLinter.list_dbutils_notebook_run_calls(linter) + import_sources = PythonLinter.list_import_sources(linter) + nodes = syspath_changes + run_calls + import_sources + # need to execute things in intertwined sequence so concat and sort + for base_node in sorted(nodes, key=lambda node: node.node.lineno * 10000 + node.node.col_offset): + for problem in self._process_node(base_node): + problem = problem.replace( + start_line=base_node.node.lineno, + start_col=base_node.node.col_offset, + end_line=base_node.node.end_lineno or 0, + end_col=base_node.node.end_col_offset or 0, ) - problems.append(with_lines) - return MaybeGraph(self, problems) + problems.append(problem) + return problems + + def _process_node(self, base_node: NodeBase): + if isinstance(base_node, SysPathChange): + self._mutate_path_lookup(base_node) + if isinstance(base_node, NotebookRunCall): + strpath = base_node.get_constant_path() + if strpath is None: + yield DependencyProblem('dependency-not-constant', "Can't check dependency not provided as a constant") + else: + yield from self.register_notebook(Path(strpath)) + if isinstance(base_node, ImportSource): + yield from self.register_import(base_node.name) + + def _mutate_path_lookup(self, change: SysPathChange): + path = Path(change.path) + if not path.is_absolute(): + path = self._path_lookup.cwd / path + if change.is_append: + self._path_lookup.append_path(path) + return + self._path_lookup.prepend_path(path) def __repr__(self): return f"" @@ -297,12 +308,13 @@ class MaybeDependency: class DependencyResolver: - def __init__(self, resolvers: list[BaseDependencyResolver]): + def __init__(self, resolvers: list[BaseDependencyResolver], path_lookup: PathLookup): previous: BaseDependencyResolver = StubResolver() for resolver in resolvers: resolver = resolver.with_next_resolver(previous) previous = resolver self._resolver: BaseDependencyResolver = previous + self._path_lookup = path_lookup def resolve_notebook(self, path_lookup: PathLookup, path: Path) -> MaybeDependency: return self._resolver.resolve_notebook(path_lookup, path) @@ -313,8 +325,45 @@ def resolve_local_file(self, path_lookup: PathLookup, path: Path) -> MaybeDepend def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency: return self._resolver.resolve_import(path_lookup, name) + def build_local_file_dependency_graph(self, path: Path) -> MaybeGraph: + """Builds a dependency graph starting from a file. This method is mainly intended for testing purposes. + In case of problems, the paths in the problems will be relative to the starting path lookup.""" + maybe = self._resolver.resolve_local_file(self._path_lookup, path) + if not maybe.dependency: + return MaybeGraph(None, self._make_relative_paths(maybe.problems, path)) + graph = DependencyGraph(maybe.dependency, None, self, self._path_lookup) + container = maybe.dependency.load(graph.path_lookup) + if container is not None: + problems = container.build_dependency_graph(graph) + if problems: + return MaybeGraph(None, self._make_relative_paths(problems, path)) + return MaybeGraph(graph, []) + + def build_notebook_dependency_graph(self, path: Path) -> MaybeGraph: + """Builds a dependency graph starting from a notebook. This method is mainly intended for testing purposes. + In case of problems, the paths in the problems will be relative to the starting path lookup.""" + maybe = self._resolver.resolve_notebook(self._path_lookup, path) + if not maybe.dependency: + return MaybeGraph(None, self._make_relative_paths(maybe.problems, path)) + graph = DependencyGraph(maybe.dependency, None, self, self._path_lookup) + container = maybe.dependency.load(graph.path_lookup) + if container is not None: + problems = container.build_dependency_graph(graph) + if problems: + return MaybeGraph(None, self._make_relative_paths(problems, path)) + return MaybeGraph(graph, []) + + def _make_relative_paths(self, problems: list[DependencyProblem], path: Path) -> list[DependencyProblem]: + adjusted_problems = [] + for problem in problems: + out_path = path if problem.is_path_missing() else problem.source_path + if out_path.is_absolute() and out_path.is_relative_to(self._path_lookup.cwd): + out_path = out_path.relative_to(self._path_lookup.cwd) + adjusted_problems.append(problem.replace(source_path=out_path)) + return adjusted_problems + def __repr__(self): - return f"" + return f"" MISSING_SOURCE_PATH = "" @@ -372,37 +421,3 @@ class MaybeGraph: @property def failed(self): return len(self.problems) > 0 - - -class DependencyGraphBuilder: - - def __init__(self, resolver: DependencyResolver, path_lookup: PathLookup): - self._resolver = resolver - self._path_lookup = path_lookup - - def build_local_file_dependency_graph(self, path: Path) -> MaybeGraph: - maybe = self._resolver.resolve_local_file(self._path_lookup, path) - if not maybe.dependency: - return MaybeGraph(None, maybe.problems) - graph = DependencyGraph(maybe.dependency, None, self._resolver, self._path_lookup) - container = maybe.dependency.load(graph.path_lookup) - if container is not None: - problems = container.build_dependency_graph(graph) - if problems: - return MaybeGraph(None, problems) - return MaybeGraph(graph, []) - - def build_notebook_dependency_graph(self, path: Path) -> MaybeGraph: - maybe = self._resolver.resolve_notebook(self._path_lookup, path) - if not maybe.dependency: - return MaybeGraph(None, maybe.problems) - graph = DependencyGraph(maybe.dependency, None, self._resolver, self._path_lookup) - container = maybe.dependency.load(graph.path_lookup) - if container is not None: - problems = container.build_dependency_graph(graph) - if problems: - return MaybeGraph(None, problems) - return MaybeGraph(graph, []) - - def __repr__(self): - return f"" diff --git a/src/databricks/labs/ucx/source_code/jobs.py b/src/databricks/labs/ucx/source_code/jobs.py index 1d6cb06ee7..ef161af0ad 100644 --- a/src/databricks/labs/ucx/source_code/jobs.py +++ b/src/databricks/labs/ucx/source_code/jobs.py @@ -109,13 +109,11 @@ def _lint_library(self, library: compute.Library, graph: DependencyGraph) -> Ite def _register_notebook(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: if not self._task.notebook_task: - return + return [] notebook_path = self._task.notebook_task.notebook_path logger.info(f'Disovering {self._task.task_key} entrypoint: {notebook_path}') path = WorkspacePath(self._ws, notebook_path) - maybe = graph.register_notebook(path) - if maybe.problems: - yield from maybe.problems + return graph.register_notebook(path) def _register_spark_python_task(self, graph: DependencyGraph): # pylint: disable=unused-argument if not self._task.spark_python_task: diff --git a/src/databricks/labs/ucx/source_code/notebooks/cells.py b/src/databricks/labs/ucx/source_code/notebooks/cells.py index cbfd2015dc..970ced55f2 100644 --- a/src/databricks/labs/ucx/source_code/notebooks/cells.py +++ b/src/databricks/labs/ucx/source_code/notebooks/cells.py @@ -8,7 +8,7 @@ from sqlglot import parse as parse_sql, ParseError as SQLParseError from databricks.sdk.service.workspace import Language -from databricks.labs.ucx.source_code.graph import DependencyGraph, DependencyProblem, MaybeGraph +from databricks.labs.ucx.source_code.graph import DependencyGraph, DependencyProblem # use a specific logger for sqlglot warnings so we can disable them selectively sqlglot_logger = logging.getLogger(f"{__name__}.sqlglot") @@ -53,9 +53,8 @@ def language(self) -> CellLanguage: def is_runnable(self) -> bool: raise NotImplementedError() - @abstractmethod - def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: - raise NotImplementedError() + def build_dependency_graph(self, _: DependencyGraph) -> list[DependencyProblem]: + return [] def __repr__(self): return f"{self.language.name}: {self._original_code[:20]}" @@ -74,7 +73,7 @@ def is_runnable(self) -> bool: except SyntaxError: return True - def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: + def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]: return parent.build_graph_from_python_source(self._original_code) @@ -87,9 +86,6 @@ def language(self): def is_runnable(self) -> bool: return True # TODO - def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: - return MaybeGraph(None, []) - class ScalaCell(Cell): @@ -100,9 +96,6 @@ def language(self): def is_runnable(self) -> bool: return True # TODO - def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: - return MaybeGraph(None, []) - class SQLCell(Cell): @@ -118,9 +111,6 @@ def is_runnable(self) -> bool: sqlglot_logger.warning(f"Failed to parse SQL using 'sqlglot': {self._original_code}", exc_info=e) return True - def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: - return MaybeGraph(None, []) - class MarkdownCell(Cell): @@ -131,9 +121,6 @@ def language(self): def is_runnable(self) -> bool: return True # TODO - def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: - return MaybeGraph(None, []) - class RunCell(Cell): @@ -144,7 +131,7 @@ def language(self): def is_runnable(self) -> bool: return True # TODO - def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: + def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]: command = f'{LANGUAGE_PREFIX}{self.language.magic_name}' lines = self._original_code.split('\n') for idx, line in enumerate(lines): @@ -155,14 +142,12 @@ def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: if len(path) == 0: continue notebook_path = Path(path) - maybe = parent.register_notebook(notebook_path) - if not maybe.problems: - return maybe start_line = self._original_offset + idx + 1 - problems = self._adjust_problem_source_paths_and_line_numbers(parent, start_line, line, maybe.problems) - if problems: - return MaybeGraph(None, problems) - return MaybeGraph(maybe.graph, []) + problems = parent.register_notebook(notebook_path) + return [ + problem.replace(start_line=start_line, start_col=0, end_line=start_line, end_col=len(line)) + for problem in problems + ] start_line = self._original_offset + 1 problem = DependencyProblem( 'invalid-run-cell', @@ -172,27 +157,7 @@ def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: end_line=start_line, end_col=len(self._original_code), ) - return MaybeGraph(None, [problem]) - - @staticmethod - def _adjust_problem_source_paths_and_line_numbers( - parent: DependencyGraph, - line_number: int, - source_code_line: str, - raw_problems: list[DependencyProblem], - ): - problems: list[DependencyProblem] = [] - for problem in raw_problems: - if problem.is_path_missing(): - problem = problem.replace(source_path=parent.dependency.path.absolute()) - problem = problem.replace( - start_line=line_number, - start_col=0, - end_line=line_number, - end_col=len(source_code_line), - ) - problems.append(problem) - return problems + return [problem] def migrate_notebook_path(self): pass @@ -207,9 +172,6 @@ def language(self): def is_runnable(self) -> bool: return True # TODO - def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: - return MaybeGraph(None, []) - class PipCell(Cell): @@ -220,8 +182,9 @@ def language(self): def is_runnable(self) -> bool: return True # TODO - def build_dependency_graph(self, parent: DependencyGraph) -> MaybeGraph: - return MaybeGraph(None, []) + def build_dependency_graph(self, _: DependencyGraph) -> list[DependencyProblem]: + # TODO: https://github.com/databrickslabs/ucx/issues/1642 + return [] class CellLanguage(Enum): diff --git a/src/databricks/labs/ucx/source_code/notebooks/loaders.py b/src/databricks/labs/ucx/source_code/notebooks/loaders.py index 2ac21547f7..ab23d80e7d 100644 --- a/src/databricks/labs/ucx/source_code/notebooks/loaders.py +++ b/src/databricks/labs/ucx/source_code/notebooks/loaders.py @@ -42,7 +42,7 @@ def resolve(self, path_lookup: PathLookup, path: Path) -> Path | None: """When exported through Git, notebooks are saved with a .py extension. If the path is a notebook, return the path to the notebook. If the path is a Python file, return the path to the Python file. If the path is neither, return None.""" - for candidate in (self._adjust_path(path), path): + for candidate in (path, self._adjust_path(path)): absolute_path = path_lookup.resolve(candidate) if not absolute_path: continue diff --git a/src/databricks/labs/ucx/source_code/notebooks/sources.py b/src/databricks/labs/ucx/source_code/notebooks/sources.py index caff152c49..941bfbe449 100644 --- a/src/databricks/labs/ucx/source_code/notebooks/sources.py +++ b/src/databricks/labs/ucx/source_code/notebooks/sources.py @@ -63,12 +63,8 @@ def to_migrated_code(self): def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]: problems: list[DependencyProblem] = [] for cell in self._cells: - # create - maybe = cell.build_dependency_graph(parent) - for problem in maybe.problems: - if problem.is_path_missing(): - problem = problem.replace(source_path=parent.path) - problems.append(problem) + cell_problems = cell.build_dependency_graph(parent) + problems.extend(cell_problems) return problems def __repr__(self): diff --git a/src/databricks/labs/ucx/source_code/path_lookup.py b/src/databricks/labs/ucx/source_code/path_lookup.py index 90ba996816..f81474d8e5 100644 --- a/src/databricks/labs/ucx/source_code/path_lookup.py +++ b/src/databricks/labs/ucx/source_code/path_lookup.py @@ -37,20 +37,21 @@ def resolve(self, path: Path) -> Path | None: logger.warning(f"Permission denied to access {absolute_path}") return None - def push_path(self, path: Path): + def has_path(self, path: Path): + return next(p for p in self._sys_paths if path == p) is not None + + def prepend_path(self, path: Path): self._sys_paths.insert(0, path) def insert_path(self, index: int, path: Path): self._sys_paths.insert(index, path) + def append_path(self, path: Path): + self._sys_paths.append(path) + def remove_path(self, index: int): del self._sys_paths[index] - def pop_path(self) -> Path: - result = self._sys_paths[0] - del self._sys_paths[0] - return result - @property def paths(self) -> list[Path]: return [self.cwd] + self._sys_paths diff --git a/src/databricks/labs/ucx/source_code/python_linter.py b/src/databricks/labs/ucx/source_code/python_linter.py index bddca8b58c..50e73d3988 100644 --- a/src/databricks/labs/ucx/source_code/python_linter.py +++ b/src/databricks/labs/ucx/source_code/python_linter.py @@ -4,7 +4,7 @@ import ast import logging from collections.abc import Iterable -from typing import TypeVar, Generic +from typing import TypeVar, Generic, cast from databricks.labs.ucx.source_code.base import Linter, Advice, Advisory @@ -63,23 +63,46 @@ def _matches(self, node: ast.AST, depth: int): return self._matches(next_node, depth + 1) -class SysPath(abc.ABC): +class NodeBase(abc.ABC): - def __init__(self, path: str): + def __init__(self, node: ast.AST): + self._node = node + + @property + def node(self): + return self._node + + def __repr__(self): + return f"<{self.__class__.__name__}: {ast.unparse(self._node)}>" + + +class SysPathChange(NodeBase, abc.ABC): + + def __init__(self, node: ast.AST, path: str, is_append: bool): + super().__init__(node) self._path = path + self._is_append = is_append + + @property + def node(self): + return self._node @property def path(self): return self._path + @property + def is_append(self): + return self._is_append + # path directly added to sys.path -class AbsolutePath(SysPath): +class AbsolutePath(SysPathChange): pass # path added to sys.path using os.path.abspath -class RelativePath(SysPath): +class RelativePath(SysPathChange): pass @@ -87,11 +110,11 @@ class SysPathVisitor(ast.NodeVisitor): def __init__(self): self._aliases: dict[str, str] = {} - self._appended_paths: list[SysPath] = [] + self._syspath_changes: list[SysPathChange] = [] @property - def appended_paths(self): - return self._appended_paths + def syspath_changes(self): + return self._syspath_changes def visit_Import(self, node: ast.Import): for alias in node.names: @@ -109,14 +132,18 @@ def visit_ImportFrom(self, node: ast.ImportFrom): break def visit_Call(self, node: ast.Call): + func = cast(ast.Attribute, node.func) # check for 'sys.path.append' - if not self._match_aliases(node.func, ["sys", "path", "append"]): + if not ( + self._match_aliases(func, ["sys", "path", "append"]) or self._match_aliases(func, ["sys", "path", "insert"]) + ): return - appended = node.args[0] - if isinstance(appended, ast.Constant): - self._appended_paths.append(AbsolutePath(appended.value)) - elif isinstance(appended, ast.Call): - self._append_relative_path(appended) + is_append = func.attr == "append" + changed = node.args[0] if is_append else node.args[1] + if isinstance(changed, ast.Constant): + self._syspath_changes.append(AbsolutePath(node, changed.value, is_append)) + elif isinstance(changed, ast.Call): + self._visit_relative_path(changed, is_append) def _match_aliases(self, node: ast.AST, names: list[str]): if isinstance(node, ast.Attribute): @@ -131,13 +158,13 @@ def _match_aliases(self, node: ast.AST, names: list[str]): return node.id == alias return False - def _append_relative_path(self, node: ast.Call): + def _visit_relative_path(self, node: ast.Call, is_append: bool): # check for 'os.path.abspath' if not self._match_aliases(node.func, ["os", "path", "abspath"]): return - appended = node.args[0] - if isinstance(appended, ast.Constant): - self._appended_paths.append(RelativePath(appended.value)) + changed = node.args[0] + if isinstance(changed, ast.Constant): + self._syspath_changes.append(RelativePath(changed, changed.value, is_append)) T = TypeVar("T", bound=ast.AST) @@ -159,10 +186,10 @@ def locate(self, node_type: type[T], match_nodes: list[tuple[str, type]]) -> lis visitor.visit(self._root) return visitor.matched_nodes - def collect_appended_sys_paths(self): + def collect_sys_paths_changes(self): visitor = SysPathVisitor() visitor.visit(self._root) - return visitor.appended_paths + return visitor.syspath_changes def extract_callchain(self) -> ast.Call | None: """If 'node' is an assignment or expression, extract its full call-chain (if it has one)""" @@ -224,12 +251,31 @@ def __repr__(self): return f"" +class ImportSource(NodeBase): + + def __init__(self, node: ast.AST, name: str): + super().__init__(node) + self.name = name + + +class NotebookRunCall(NodeBase): + + def __init__(self, node: ast.Call): + super().__init__(node) + + def get_constant_path(self) -> str | None: + path = PythonLinter.get_dbutils_notebook_run_path_arg(cast(ast.Call, self.node)) + if isinstance(path, ast.Constant): + return path.value.strip().strip("'").strip('"') + return None + + class PythonLinter(Linter): def lint(self, code: str) -> Iterable[Advice]: linter = ASTLinter.parse(code) nodes = self.list_dbutils_notebook_run_calls(linter) - return [self._convert_dbutils_notebook_run_to_advice(node) for node in nodes] + return [self._convert_dbutils_notebook_run_to_advice(node.node) for node in nodes] @classmethod def _convert_dbutils_notebook_run_to_advice(cls, node: ast.AST) -> Advisory: @@ -261,23 +307,27 @@ def get_dbutils_notebook_run_path_arg(node: ast.Call): return arg.value if arg is not None else None @staticmethod - def list_dbutils_notebook_run_calls(linter: ASTLinter) -> list[ast.Call]: - return linter.locate(ast.Call, [("run", ast.Attribute), ("notebook", ast.Attribute), ("dbutils", ast.Name)]) + def list_dbutils_notebook_run_calls(linter: ASTLinter) -> list[NotebookRunCall]: + calls = linter.locate(ast.Call, [("run", ast.Attribute), ("notebook", ast.Attribute), ("dbutils", ast.Name)]) + return [NotebookRunCall(call) for call in calls] @staticmethod - def list_import_sources(linter: ASTLinter) -> list[tuple[str, ast.AST]]: - nodes = linter.locate(ast.Import, []) - tuples = [(alias.name, node) for node in nodes for alias in node.names] - for node in linter.locate(ast.ImportFrom, []): - leading_dots = "." * node.level - import_name = f"{leading_dots}{node.module}" if node.module else leading_dots - tuples.append((import_name, node)) - nodes = linter.locate(ast.Call, [("import_module", ast.Attribute), ("importlib", ast.Name)]) - tuples.extend((node.args[0].value, node) for node in nodes) - nodes = linter.locate(ast.Call, [("__import__", ast.Attribute), ("importlib", ast.Name)]) - tuples.extend((node.args[0].value, node) for node in nodes) - return tuples + def list_import_sources(linter: ASTLinter) -> list[ImportSource]: + # TODO: make this code more robust, because it fails detecting imports on UCX codebase + try: # pylint: disable=too-many-try-statements + nodes = linter.locate(ast.Import, []) + sources = [ImportSource(node, alias.name) for node in nodes for alias in node.names] + nodes = linter.locate(ast.ImportFrom, []) + sources.extend(ImportSource(node, node.module) for node in nodes) + nodes = linter.locate(ast.Call, [("import_module", ast.Attribute), ("importlib", ast.Name)]) + sources.extend(ImportSource(node, node.args[0].value) for node in nodes) + nodes = linter.locate(ast.Call, [("__import__", ast.Attribute), ("importlib", ast.Name)]) + sources.extend(ImportSource(node, node.args[0].value) for node in nodes) + return sources + except Exception as e: # pylint: disable=broad-except + logger.warning(f"{linter} imports: {e}") + return [] @staticmethod - def list_appended_sys_paths(linter: ASTLinter) -> list[SysPath]: - return linter.collect_appended_sys_paths() + def list_sys_path_changes(linter: ASTLinter) -> list[SysPathChange]: + return linter.collect_sys_paths_changes() diff --git a/src/databricks/labs/ucx/source_code/whitelist.py b/src/databricks/labs/ucx/source_code/whitelist.py index 080605d8c7..8612777fbc 100644 --- a/src/databricks/labs/ucx/source_code/whitelist.py +++ b/src/databricks/labs/ucx/source_code/whitelist.py @@ -1,18 +1,17 @@ from __future__ import annotations import abc +import logging import sys from dataclasses import dataclass, field from enum import Enum -from pathlib import Path from collections.abc import Iterable + from yaml import load_all as load_yaml, Loader from databricks.labs.ucx.source_code.path_lookup import PathLookup from databricks.labs.ucx.source_code.graph import ( - Dependency, - WrappingLoader, SourceContainer, DependencyGraph, BaseDependencyResolver, @@ -20,6 +19,8 @@ MaybeDependency, ) +logger = logging.getLogger(__name__) + class WhitelistResolver(BaseDependencyResolver): @@ -30,32 +31,19 @@ def __init__(self, whitelist: Whitelist, next_resolver: BaseDependencyResolver | def with_next_resolver(self, resolver: BaseDependencyResolver) -> BaseDependencyResolver: return WhitelistResolver(self._whitelist, resolver) - # TODO problem_collector is tactical, pending https://github.com/databrickslabs/ucx/issues/1559 def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency: - problem, is_known = self._is_whitelisted(name) - if problem is not None: - return MaybeDependency(None, [problem]) - if is_known: - container = StubContainer() - dependency = Dependency(WrappingLoader(container), Path(name)) - return MaybeDependency(dependency, []) - return super().resolve_import(path_lookup, name) - - def _is_whitelisted(self, name: str) -> tuple[DependencyProblem | None, bool]: - compatibility = self._whitelist.compatibility(name) # TODO attach compatibility to dependency, see https://github.com/databrickslabs/ucx/issues/1382 - if compatibility is None: - return None, False + compatibility = self._whitelist.compatibility(name) + if compatibility == UCCompatibility.FULL: + return MaybeDependency(None, []) if compatibility == UCCompatibility.NONE: # TODO move to linter, see https://github.com/databrickslabs/ucx/issues/1527 - return ( - DependencyProblem( - code="dependency-check", - message=f"Use of dependency {name} is deprecated", - ), - True, - ) - return None, True + problem = DependencyProblem("dependency-check", f"Use of dependency {name} is deprecated") + return MaybeDependency(None, [problem]) + if compatibility == UCCompatibility.PARTIAL: + problem = DependencyProblem("dependency-check", f"Package {name} is only partially supported by UC") + return MaybeDependency(None, [problem]) + return super().resolve_import(path_lookup, name) class StubContainer(SourceContainer): @@ -65,6 +53,7 @@ def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProb class UCCompatibility(Enum): + UNKNOWN = "unknown" NONE = "none" PARTIAL = "partial" FULL = "full" @@ -118,6 +107,16 @@ def __post_init__(self): class PipPackage(KnownPackage): packages: dict[str, PythonPackage] + @classmethod + def compatible(cls, name: str): + return cls( + Identifier(name=name), + name, + { + name: PythonPackage(name=name, compatibility=UCCompatibility.FULL), + }, + ) + def compatibility_of(self, name: str) -> UCCompatibility: while len(name) > 0: package = self.packages.get(name, None) @@ -151,6 +150,19 @@ def __init__(self, pips: Iterable[PipPackage] | None = None): ] if pips is not None: known_packages.extend(pips) + known_packages.extend( + [ + PipPackage.compatible("click"), + PipPackage.compatible("databricks"), + PipPackage.compatible("google"), + PipPackage.compatible("pandas"), + PipPackage.compatible("pytest"), + PipPackage.compatible("requests"), + PipPackage.compatible("sqlglot"), + PipPackage.compatible("urllib3"), + PipPackage.compatible("yaml"), + ] + ) self._known_packages: dict[str, list[KnownPackage]] = {} for known in known_packages: top_levels: list[str] = known.top_level if isinstance(known.top_level, list) else [known.top_level] @@ -161,11 +173,13 @@ def __init__(self, pips: Iterable[PipPackage] | None = None): self._known_packages[top_level] = packs packs.append(known) - def compatibility(self, name: str) -> UCCompatibility | None: + def compatibility(self, name: str) -> UCCompatibility: + if not name: + return UCCompatibility.UNKNOWN root = name.split('.')[0] packages = self._known_packages.get(root, None) if packages is None: - return None + return UCCompatibility.UNKNOWN # TODO ignore versioning for now, see https://github.com/databrickslabs/ucx/issues/1382 known_package = packages[0] return known_package.compatibility_of(name) diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index 6504deae65..1a567ed4dd 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -20,7 +20,6 @@ from databricks.labs.ucx.hive_metastore.mapping import TableMapping, TableToMigrate from databricks.labs.ucx.source_code.graph import SourceContainer from databricks.labs.ucx.source_code.path_lookup import PathLookup -from databricks.labs.ucx.source_code.whitelist import Whitelist logging.getLogger("tests").setLevel("DEBUG") @@ -161,17 +160,14 @@ def change_directory(self, new_working_directory: Path) -> 'MockPathLookup': return MockPathLookup(new_working_directory, self._sys_paths) def resolve(self, path: pathlib.Path) -> pathlib.Path | None: - if path.is_absolute() and path.exists(): - return path - filename = path.as_posix() - candidates = [filename] - if not filename.endswith('.txt'): - candidates.append(f'{filename}.txt') + candidates = [path] + if not path.name.endswith('.txt'): + candidates.append(Path(f"{path}.txt")) for candidate in candidates: - some_file = self._cwd / candidate - if not some_file.exists(): + absolute_path = super().resolve(candidate) + if not absolute_path: continue - return some_file + return absolute_path return None def __repr__(self): @@ -210,12 +206,6 @@ def table_mapping_mock(tables: list[str] | None = None): return table_mapping -def whitelist_mock(): - wls = create_autospec(Whitelist) - wls.compatibility.return_value = None - return wls - - def locate_site_packages() -> pathlib.Path: project_path = pathlib.Path(os.path.dirname(__file__)).parent.parent python_lib_path = pathlib.Path(project_path, ".venv", "lib") diff --git a/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/import_file_1.py b/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/import_file_1.py new file mode 100644 index 0000000000..f2105f6441 --- /dev/null +++ b/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/import_file_1.py @@ -0,0 +1,8 @@ +import sys + + +def func(): + sys.path.append("./some-folder") + from some_file import stuff + + stuff() diff --git a/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/import_file_2.py b/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/import_file_2.py new file mode 100644 index 0000000000..1f0f8e963f --- /dev/null +++ b/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/import_file_2.py @@ -0,0 +1,8 @@ +import sys + + +def func(): + sys.path.insert(0, "./some-folder") + from some_file import stuff + + stuff() diff --git a/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/run_notebook_1.py b/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/run_notebook_1.py new file mode 100644 index 0000000000..8f3de8ed53 --- /dev/null +++ b/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/run_notebook_1.py @@ -0,0 +1,4 @@ +# Databricks notebook source +import sys + +sys.path.append("./some-folder") diff --git a/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/run_notebook_2.py b/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/run_notebook_2.py new file mode 100644 index 0000000000..097c8a9478 --- /dev/null +++ b/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/run_notebook_2.py @@ -0,0 +1,4 @@ +# Databricks notebook source +import sys + +sys.path.insert(0, "./some-folder") diff --git a/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/run_notebook_4.py b/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/run_notebook_4.py new file mode 100644 index 0000000000..579ad7b3c3 --- /dev/null +++ b/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/run_notebook_4.py @@ -0,0 +1,8 @@ +# Databricks notebook source +import sys + +sys.path.append("./some-folder") + +from some_file import stuff + +stuff() diff --git a/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/some-folder/some_file.py b/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/some-folder/some_file.py new file mode 100644 index 0000000000..da902935b1 --- /dev/null +++ b/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/some-folder/some_file.py @@ -0,0 +1,2 @@ +def stuff(): + print("stuff() was called from some_file.py") diff --git a/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/some-folder/some_notebook.py b/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/some-folder/some_notebook.py new file mode 100644 index 0000000000..491230d9a4 --- /dev/null +++ b/tests/unit/source_code/samples/simulate-sys-path/via-sys-path/some-folder/some_notebook.py @@ -0,0 +1,2 @@ +# Databricks notebook source +whatever = 12 diff --git a/tests/unit/source_code/test_dependencies.py b/tests/unit/source_code/test_dependencies.py index 71697661ea..190fd49680 100644 --- a/tests/unit/source_code/test_dependencies.py +++ b/tests/unit/source_code/test_dependencies.py @@ -5,7 +5,6 @@ SourceContainer, DependencyResolver, DependencyProblem, - DependencyGraphBuilder, ) from databricks.labs.ucx.source_code.notebooks.loaders import ( NotebookResolver, @@ -13,10 +12,9 @@ ) from databricks.labs.ucx.source_code.files import FileLoader, LocalFileResolver from databricks.labs.ucx.source_code.path_lookup import PathLookup -from databricks.labs.ucx.source_code.whitelist import WhitelistResolver +from databricks.labs.ucx.source_code.whitelist import WhitelistResolver, Whitelist from databricks.labs.ucx.source_code.site_packages import SitePackageResolver, SitePackages from tests.unit import ( - whitelist_mock, locate_site_packages, _samples_path, MockPathLookup, @@ -26,9 +24,8 @@ def test_dependency_graph_builder_visits_workspace_notebook_dependencies(): lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)]) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_notebook_dependency_graph(Path("root3.run.py")) + dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)], lookup) + maybe = dependency_resolver.build_notebook_dependency_graph(Path("root3.run.py")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root3.run.py.txt", "root1.run.py.txt", "leaf1.py.txt", "leaf2.py.txt"} @@ -36,15 +33,14 @@ def test_dependency_graph_builder_visits_workspace_notebook_dependencies(): def test_dependency_graph_builder_visits_local_notebook_dependencies(): lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)]) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_notebook_dependency_graph(Path("root4.py.txt")) + dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)], lookup) + maybe = dependency_resolver.build_notebook_dependency_graph(Path("root4.py.txt")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root4.py.txt", "leaf3.py.txt"} def test_dependency_graph_builder_visits_workspace_file_dependencies(): - whi = whitelist_mock() + whi = Whitelist() site_packages = SitePackages.parse(locate_site_packages()) file_loader = FileLoader() lookup = MockPathLookup() @@ -55,16 +51,16 @@ def test_dependency_graph_builder_visits_workspace_file_dependencies(): SitePackageResolver(site_packages, file_loader, lookup), WhitelistResolver(whi), LocalFileResolver(file_loader), - ] + ], + lookup, ) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_local_file_dependency_graph(Path('./root8.py')) + maybe = dependency_resolver.build_local_file_dependency_graph(Path('./root8.py')) assert not maybe.failed assert maybe.graph.all_relative_names() == {'leaf1.py.txt', 'leaf2.py.txt', 'root8.py.txt'} def test_dependency_graph_builder_raises_problem_with_unfound_workspace_notebook_dependency(): - whi = whitelist_mock() + whi = Whitelist() site_packages = SitePackages.parse(locate_site_packages()) file_loader = FileLoader() lookup = MockPathLookup() @@ -75,15 +71,15 @@ def test_dependency_graph_builder_raises_problem_with_unfound_workspace_notebook SitePackageResolver(site_packages, file_loader, lookup), WhitelistResolver(whi), LocalFileResolver(file_loader), - ] + ], + lookup, ) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_notebook_dependency_graph(Path("./root1-no-leaf.run.py")) + maybe = dependency_resolver.build_notebook_dependency_graph(Path("root1-no-leaf.run.py")) assert list(maybe.problems) == [ DependencyProblem( 'notebook-not-found', 'Notebook not found: __NOT_FOUND__', - lookup.cwd / 'root1-no-leaf.run.py.txt', + Path('root1-no-leaf.run.py'), 19, 0, 19, @@ -95,12 +91,11 @@ def test_dependency_graph_builder_raises_problem_with_unfound_workspace_notebook def test_dependency_graph_builder_raises_problem_with_unfound_local_notebook_dependency(): lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)]) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_notebook_dependency_graph(Path("./root4-no-leaf.py")) + dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)], lookup) + maybe = dependency_resolver.build_notebook_dependency_graph(Path("root4-no-leaf.py")) assert list(maybe.problems) == [ DependencyProblem( - 'notebook-not-found', 'Notebook not found: __NO_LEAF__', lookup.cwd / 'root4-no-leaf.py.txt', 1, 0, 1, 37 + 'notebook-not-found', 'Notebook not found: __NO_LEAF__', Path('root4-no-leaf.py'), 1, 0, 1, 37 ) ] @@ -108,14 +103,13 @@ def test_dependency_graph_builder_raises_problem_with_unfound_local_notebook_dep def test_dependency_graph_builder_raises_problem_with_non_constant_local_notebook_dependency(): lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)]) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_notebook_dependency_graph(Path('./root10.py.txt')) + dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)], lookup) + maybe = dependency_resolver.build_notebook_dependency_graph(Path('root10.py.txt')) assert list(maybe.problems) == [ DependencyProblem( 'dependency-not-constant', "Can't check dependency not provided as a constant", - lookup.cwd / 'root10.py.txt', + Path('root10.py.txt'), 2, 0, 2, @@ -127,22 +121,20 @@ def test_dependency_graph_builder_raises_problem_with_non_constant_local_noteboo def test_dependency_graph_builder_raises_problem_with_invalid_run_cell(): lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)]) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_notebook_dependency_graph(Path('leaf6.py.txt')) + dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)], lookup) + maybe = dependency_resolver.build_notebook_dependency_graph(Path('leaf6.py.txt')) assert list(maybe.problems) == [ - DependencyProblem( - 'invalid-run-cell', 'Missing notebook path in %run command', lookup.cwd / 'leaf6.py.txt', 5, 0, 5, 4 - ) + DependencyProblem('invalid-run-cell', 'Missing notebook path in %run command', Path('leaf6.py.txt'), 5, 0, 5, 4) ] def test_dependency_graph_builder_visits_recursive_file_dependencies(): lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader), LocalFileResolver(FileLoader())]) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_local_file_dependency_graph(Path("root6.py.txt")) + dependency_resolver = DependencyResolver( + [NotebookResolver(notebook_loader), LocalFileResolver(FileLoader())], lookup + ) + maybe = dependency_resolver.build_local_file_dependency_graph(Path("root6.py.txt")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root6.py.txt", "root5.py.txt", "leaf4.py.txt"} @@ -150,12 +142,13 @@ def test_dependency_graph_builder_visits_recursive_file_dependencies(): def test_dependency_graph_builder_raises_problem_with_unresolved_import(): lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader), LocalFileResolver(FileLoader())]) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_local_file_dependency_graph(Path('root7.py.txt')) + dependency_resolver = DependencyResolver( + [NotebookResolver(notebook_loader), LocalFileResolver(FileLoader())], lookup + ) + maybe = dependency_resolver.build_local_file_dependency_graph(Path('root7.py.txt')) assert list(maybe.problems) == [ DependencyProblem( - 'import-not-found', 'Could not locate import: some_library', lookup.cwd / "root7.py.txt", 1, 0, 1, 19 + 'import-not-found', 'Could not locate import: some_library', Path("root7.py.txt"), 1, 0, 1, 19 ) ] @@ -163,14 +156,14 @@ def test_dependency_graph_builder_raises_problem_with_unresolved_import(): def test_dependency_graph_builder_raises_problem_with_non_constant_notebook_argument(): lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader), LocalFileResolver(FileLoader())]) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_local_file_dependency_graph(Path("run_notebooks.py.txt")) + resolvers = [NotebookResolver(notebook_loader), WhitelistResolver(Whitelist()), LocalFileResolver(FileLoader())] + dependency_resolver = DependencyResolver(resolvers, lookup) + maybe = dependency_resolver.build_local_file_dependency_graph(Path("run_notebooks.py.txt")) assert list(maybe.problems) == [ DependencyProblem( 'dependency-not-constant', "Can't check dependency not provided as a constant", - lookup.cwd / "run_notebooks.py.txt", + Path("run_notebooks.py.txt"), 14, 13, 14, @@ -182,9 +175,10 @@ def test_dependency_graph_builder_raises_problem_with_non_constant_notebook_argu def test_dependency_graph_builder_visits_file_dependencies(): lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader), LocalFileResolver(FileLoader())]) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_local_file_dependency_graph(Path("root5.py.txt")) + dependency_resolver = DependencyResolver( + [NotebookResolver(notebook_loader), LocalFileResolver(FileLoader())], lookup + ) + maybe = dependency_resolver.build_local_file_dependency_graph(Path("root5.py.txt")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root5.py.txt", "leaf4.py.txt"} @@ -192,9 +186,9 @@ def test_dependency_graph_builder_visits_file_dependencies(): def test_dependency_graph_builder_skips_builtin_dependencies(): lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader), LocalFileResolver(FileLoader())]) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_local_file_dependency_graph(Path("python_builtins.py.txt")) + resolvers = [NotebookResolver(notebook_loader), WhitelistResolver(Whitelist()), LocalFileResolver(FileLoader())] + dependency_resolver = DependencyResolver(resolvers, lookup) + maybe = dependency_resolver.build_local_file_dependency_graph(Path("python_builtins.py.txt")) assert not maybe.failed graph = maybe.graph maybe = graph.locate_dependency(Path("os")) @@ -206,9 +200,9 @@ def test_dependency_graph_builder_skips_builtin_dependencies(): def test_dependency_graph_builder_ignores_known_dependencies(): lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader), LocalFileResolver(FileLoader())]) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_local_file_dependency_graph(Path("python_builtins.py.txt")) + resolvers = [NotebookResolver(notebook_loader), WhitelistResolver(Whitelist()), LocalFileResolver(FileLoader())] + dependency_resolver = DependencyResolver(resolvers, lookup) + maybe = dependency_resolver.build_local_file_dependency_graph(Path("python_builtins.py.txt")) assert maybe.graph graph = maybe.graph maybe_graph = graph.locate_dependency(Path("databricks")) @@ -223,12 +217,12 @@ def test_dependency_graph_builder_visits_site_packages(empty_index): notebook_loader = NotebookLoader() resolvers = [ NotebookResolver(notebook_loader), + WhitelistResolver(Whitelist()), SitePackageResolver(site_packages, file_loader, provider), LocalFileResolver(file_loader), ] - dependency_resolver = DependencyResolver(resolvers) - builder = DependencyGraphBuilder(dependency_resolver, provider) - maybe = builder.build_local_file_dependency_graph(Path("import-site-package.py.txt")) + dependency_resolver = DependencyResolver(resolvers, provider) + maybe = dependency_resolver.build_local_file_dependency_graph(Path("import-site-package.py.txt")) assert not maybe.failed graph = maybe.graph maybe = graph.locate_dependency(Path(site_packages_path, "certifi/core.py")) @@ -239,16 +233,18 @@ def test_dependency_graph_builder_visits_site_packages(empty_index): def test_dependency_graph_builder_raises_problem_with_unfound_root_file(empty_index): lookup = MockPathLookup() - dependency_resolver = DependencyResolver([LocalFileResolver(FileLoader())]) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_local_file_dependency_graph(Path("non-existing.py.txt")) - assert list(maybe.problems) == [DependencyProblem('file-not-found', 'File not found: non-existing.py.txt')] + dependency_resolver = DependencyResolver([LocalFileResolver(FileLoader())], lookup) + maybe = dependency_resolver.build_local_file_dependency_graph(Path("non-existing.py.txt")) + assert list(maybe.problems) == [ + DependencyProblem('file-not-found', 'File not found: non-existing.py.txt', Path("non-existing.py.txt")) + ] def test_dependency_graph_builder_raises_problem_with_unfound_root_notebook(empty_index): lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)]) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_notebook_dependency_graph(Path("unknown_notebook")) - assert list(maybe.problems) == [DependencyProblem('notebook-not-found', 'Notebook not found: unknown_notebook')] + dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)], lookup) + maybe = dependency_resolver.build_notebook_dependency_graph(Path("unknown_notebook")) + assert list(maybe.problems) == [ + DependencyProblem('notebook-not-found', 'Notebook not found: unknown_notebook', Path("unknown_notebook")) + ] diff --git a/tests/unit/source_code/test_notebook.py b/tests/unit/source_code/test_notebook.py index b03f7c2427..b39bd8c523 100644 --- a/tests/unit/source_code/test_notebook.py +++ b/tests/unit/source_code/test_notebook.py @@ -128,7 +128,7 @@ def test_notebook_generates_runnable_cells(source: tuple[str, Language, list[str def test_notebook_builds_leaf_dependency_graph(): lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)]) + dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)], lookup) maybe = dependency_resolver.resolve_notebook(lookup, Path("leaf1.py.txt")) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, lookup) container = maybe.dependency.load(lookup) @@ -146,7 +146,7 @@ def test_notebook_builds_depth1_dependency_graph(): paths = ["root1.run.py.txt", "leaf1.py.txt", "leaf2.py.txt"] lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)]) + dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)], lookup) maybe = dependency_resolver.resolve_notebook(lookup, Path(paths[0])) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, lookup) container = maybe.dependency.load(lookup) @@ -159,7 +159,7 @@ def test_notebook_builds_depth2_dependency_graph(): paths = ["root2.run.py.txt", "root1.run.py.txt", "leaf1.py.txt", "leaf2.py.txt"] lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)]) + dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)], lookup) maybe = dependency_resolver.resolve_notebook(lookup, Path(paths[0])) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, lookup) container = maybe.dependency.load(lookup) @@ -172,7 +172,7 @@ def test_notebook_builds_dependency_graph_avoiding_duplicates(): paths = ["root3.run.py.txt", "root1.run.py.txt", "leaf1.py.txt", "leaf2.py.txt"] lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)]) + dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)], lookup) maybe = dependency_resolver.resolve_notebook(lookup, Path(paths[0])) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, lookup) container = maybe.dependency.load(lookup) @@ -186,7 +186,7 @@ def test_notebook_builds_cyclical_dependency_graph(): paths = ["cyclical1.run.py.txt", "cyclical2.run.py.txt"] lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)]) + dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)], lookup) maybe = dependency_resolver.resolve_notebook(lookup, Path(paths[0])) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, lookup) container = maybe.dependency.load(lookup) @@ -199,7 +199,7 @@ def test_notebook_builds_python_dependency_graph(): paths = ["root4.py.txt", "leaf3.py.txt"] lookup = MockPathLookup() notebook_loader = NotebookLoader() - dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)]) + dependency_resolver = DependencyResolver([NotebookResolver(notebook_loader)], lookup) maybe = dependency_resolver.resolve_notebook(lookup, Path(paths[0])) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, lookup) container = maybe.dependency.load(lookup) diff --git a/tests/unit/source_code/test_path_lookup.py b/tests/unit/source_code/test_path_lookup.py index 3116860edb..920dfd682b 100644 --- a/tests/unit/source_code/test_path_lookup.py +++ b/tests/unit/source_code/test_path_lookup.py @@ -18,14 +18,6 @@ def test_lookup_is_initialized_with_handmade_string(): assert ["what", "on", "earth"] == [path.as_posix() for path in paths] -def test_lookup_pushes_path(): - provider = PathLookup.from_pathlike_string(Path.cwd(), "what:on:earth") - provider.push_path(Path("is")) - provider.push_path(Path("this")) - paths = list(provider.paths)[1:] - assert [path.as_posix() for path in paths] == ["this", "is", "what", "on", "earth"] - - def test_lookup_inserts_path(): provider = PathLookup.from_pathlike_string(Path.cwd(), "what:on:earth") provider.insert_path(1, Path("is")) @@ -38,11 +30,3 @@ def test_lookup_removes_path(): provider.remove_path(1) paths = list(provider.paths)[1:] assert [path.as_posix() for path in paths] == ["what", "on", "earth"] - - -def test_lookup_pops_path(): - provider = PathLookup.from_pathlike_string(Path.cwd(), "what:on:earth") - popped = provider.pop_path() - assert popped.as_posix() == "what" - paths = list(provider.paths)[1:] - assert [path.as_posix() for path in paths] == ["on", "earth"] diff --git a/tests/unit/source_code/test_path_lookup_simulation.py b/tests/unit/source_code/test_path_lookup_simulation.py index 745a943026..f0b741ca9f 100644 --- a/tests/unit/source_code/test_path_lookup_simulation.py +++ b/tests/unit/source_code/test_path_lookup_simulation.py @@ -1,15 +1,15 @@ from pathlib import Path +from tempfile import TemporaryDirectory import pytest from databricks.labs.ucx.source_code.files import LocalFileResolver, FileLoader from databricks.labs.ucx.source_code.path_lookup import PathLookup -from databricks.labs.ucx.source_code.graph import SourceContainer, DependencyGraphBuilder, DependencyResolver +from databricks.labs.ucx.source_code.graph import SourceContainer, DependencyResolver from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader from databricks.labs.ucx.source_code.site_packages import SitePackages, SitePackageResolver -from databricks.labs.ucx.source_code.whitelist import WhitelistResolver +from databricks.labs.ucx.source_code.whitelist import WhitelistResolver, Whitelist from tests.unit import ( _samples_path, - whitelist_mock, locate_site_packages, MockPathLookup, ) @@ -32,6 +32,9 @@ ], 3, ), + (["simulate-sys-path", "via-sys-path", "run_notebook_1.py"], 1), + (["simulate-sys-path", "via-sys-path", "run_notebook_2.py"], 1), + (["simulate-sys-path", "via-sys-path", "run_notebook_4.py"], 2), ], ) def test_locates_notebooks(source: list[str], expected: int): @@ -44,23 +47,30 @@ def test_locates_notebooks(source: list[str], expected: int): site_packages = SitePackages.parse(locate_site_packages()) resolvers = [ NotebookResolver(notebook_loader), + WhitelistResolver(Whitelist()), SitePackageResolver(site_packages, file_loader, lookup), LocalFileResolver(file_loader), ] - dependency_resolver = DependencyResolver(resolvers) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_notebook_dependency_graph(notebook_path) + dependency_resolver = DependencyResolver(resolvers, lookup) + maybe = dependency_resolver.build_notebook_dependency_graph(notebook_path) assert not maybe.problems assert maybe.graph is not None assert len(maybe.graph.all_paths) == expected -@pytest.mark.parametrize("source, expected", [(["simulate-sys-path", "siblings", "sibling1_file.py"], 2)]) +@pytest.mark.parametrize( + "source, expected", + [ + (["simulate-sys-path", "siblings", "sibling1_file.py"], 2), + (["simulate-sys-path", "via-sys-path", "import_file_1.py"], 2), + (["simulate-sys-path", "via-sys-path", "import_file_2.py"], 2), + ], +) def test_locates_files(source: list[str], expected: int): elems = [_samples_path(SourceContainer)] elems.extend(source) file_path = Path(*elems) - whitelist = whitelist_mock() + whitelist = Whitelist() provider = PathLookup.from_sys_path(Path.cwd()) file_loader = FileLoader() notebook_loader = NotebookLoader() @@ -71,8 +81,94 @@ def test_locates_files(source: list[str], expected: int): WhitelistResolver(whitelist), LocalFileResolver(file_loader), ] - builder = DependencyGraphBuilder(DependencyResolver(resolvers), provider) - maybe = builder.build_local_file_dependency_graph(file_path) + resolver = DependencyResolver(resolvers, provider) + maybe = resolver.build_local_file_dependency_graph(file_path) assert not maybe.problems assert maybe.graph is not None assert len(maybe.graph.all_dependencies) == expected + + +def test_locates_notebooks_with_absolute_path(): + with TemporaryDirectory() as parent_dir: + parent_dir_path = Path(parent_dir) + child_dir_path = Path(parent_dir_path, "some_folder") + child_dir_path.mkdir() + child_file_path = Path(child_dir_path, "some_notebook.py") + child_file_path.write_text( + """# Databricks notebook source_code +whatever = 12 +""", + "utf-8", + ) + parent_file_path = Path(child_dir_path, "run_notebook.py") + parent_file_path.write_text( + f"""# Databricks notebook source_code +import sys + +sys.path.append('{child_dir_path.as_posix()}') + +# COMMAND ---------- + +# MAGIC %run some_notebook +""", + "utf-8", + ) + whitelist = Whitelist() + provider = PathLookup.from_sys_path(Path.cwd()) + file_loader = FileLoader() + notebook_loader = NotebookLoader() + site_packages = SitePackages.parse(locate_site_packages()) + resolvers = [ + NotebookResolver(notebook_loader), + SitePackageResolver(site_packages, file_loader, provider), + WhitelistResolver(whitelist), + LocalFileResolver(file_loader), + ] + resolver = DependencyResolver(resolvers, provider) + maybe = resolver.build_notebook_dependency_graph(parent_file_path) + assert not maybe.problems + assert maybe.graph is not None + assert len(maybe.graph.all_paths) == 2 + + +def test_locates_files_with_absolute_path(): + with TemporaryDirectory() as parent_dir: + parent_dir_path = Path(parent_dir) + child_dir_path = Path(parent_dir_path, "some_folder") + child_dir_path.mkdir() + child_file_path = Path(child_dir_path, "some_file.py") + child_file_path.write_text( + """def stuff(): + pass +""", + "utf-8", + ) + parent_file_path = Path(child_dir_path, "import_file.py") + parent_file_path.write_text( + f"""# Databricks notebook source + +import sys + +def func(): + sys.path.append("{child_file_path.as_posix()}") + from some_file import stuff + stuff() +""", + "utf-8", + ) + whitelist = Whitelist() + provider = PathLookup.from_sys_path(Path.cwd()) + file_loader = FileLoader() + notebook_loader = NotebookLoader() + site_packages = SitePackages.parse(locate_site_packages()) + resolvers = [ + NotebookResolver(notebook_loader), + SitePackageResolver(site_packages, file_loader, provider), + WhitelistResolver(whitelist), + LocalFileResolver(file_loader), + ] + resolver = DependencyResolver(resolvers, provider) + maybe = resolver.build_notebook_dependency_graph(parent_file_path) + assert not maybe.problems + assert maybe.graph is not None + assert maybe.graph.all_relative_names() == {"some_file.py", "import_file.py"} diff --git a/tests/unit/source_code/test_python_linter.py b/tests/unit/source_code/test_python_linter.py index 587314b448..b1694e85c9 100644 --- a/tests/unit/source_code/test_python_linter.py +++ b/tests/unit/source_code/test_python_linter.py @@ -19,7 +19,7 @@ def test_linter_returns_list_of_dbutils_notebook_run_calls(): """ linter = ASTLinter.parse(code) calls = PythonLinter.list_dbutils_notebook_run_calls(linter) - assert {"toto", "stuff"} == {str(call.args[0].value) for call in calls} + assert {"toto", "stuff"} == {str(call.node.args[0].value) for call in calls} def test_linter_returns_empty_list_of_imports(): @@ -29,22 +29,22 @@ def test_linter_returns_empty_list_of_imports(): def test_linter_returns_import(): linter = ASTLinter.parse('import x') - assert ["x"] == [pair[0] for pair in PythonLinter.list_import_sources(linter)] + assert ["x"] == [node.name for node in PythonLinter.list_import_sources(linter)] def test_linter_returns_import_from(): linter = ASTLinter.parse('from x import z') - assert ["x"] == [pair[0] for pair in PythonLinter.list_import_sources(linter)] + assert ["x"] == [node.name for node in PythonLinter.list_import_sources(linter)] def test_linter_returns_import_module(): linter = ASTLinter.parse('importlib.import_module("x")') - assert ["x"] == [pair[0] for pair in PythonLinter.list_import_sources(linter)] + assert ["x"] == [node.name for node in PythonLinter.list_import_sources(linter)] def test_linter_returns__import__(): linter = ASTLinter.parse('importlib.__import__("x")') - assert ["x"] == [pair[0] for pair in PythonLinter.list_import_sources(linter)] + assert ["x"] == [node.name for node in PythonLinter.list_import_sources(linter)] def test_linter_returns_appended_absolute_paths(): @@ -54,7 +54,7 @@ def test_linter_returns_appended_absolute_paths(): sys.path.append("absolute_path_2") """ linter = ASTLinter.parse(code) - appended = PythonLinter.list_appended_sys_paths(linter) + appended = PythonLinter.list_sys_path_changes(linter) assert ["absolute_path_1", "absolute_path_2"] == [p.path for p in appended] @@ -65,7 +65,7 @@ def test_linter_returns_appended_absolute_paths_with_sys_alias(): stuff.path.append("absolute_path_2") """ linter = ASTLinter.parse(code) - appended = PythonLinter.list_appended_sys_paths(linter) + appended = PythonLinter.list_sys_path_changes(linter) assert ["absolute_path_1", "absolute_path_2"] == [p.path for p in appended] @@ -75,7 +75,7 @@ def test_linter_returns_appended_absolute_paths_with_sys_path_alias(): stuff.append("absolute_path") """ linter = ASTLinter.parse(code) - appended = PythonLinter.list_appended_sys_paths(linter) + appended = PythonLinter.list_sys_path_changes(linter) assert "absolute_path" in [p.path for p in appended] @@ -86,7 +86,7 @@ def test_linter_returns_appended_relative_paths(): sys.path.append(os.path.abspath("relative_path")) """ linter = ASTLinter.parse(code) - appended = PythonLinter.list_appended_sys_paths(linter) + appended = PythonLinter.list_sys_path_changes(linter) assert "relative_path" in [p.path for p in appended] @@ -97,7 +97,7 @@ def test_linter_returns_appended_relative_paths_with_os_alias(): sys.path.append(stuff.path.abspath("relative_path")) """ linter = ASTLinter.parse(code) - appended = PythonLinter.list_appended_sys_paths(linter) + appended = PythonLinter.list_sys_path_changes(linter) assert "relative_path" in [p.path for p in appended] @@ -108,7 +108,7 @@ def test_linter_returns_appended_relative_paths_with_os_path_alias(): sys.path.append(stuff.abspath("relative_path")) """ linter = ASTLinter.parse(code) - appended = PythonLinter.list_appended_sys_paths(linter) + appended = PythonLinter.list_sys_path_changes(linter) assert "relative_path" in [p.path for p in appended] @@ -119,7 +119,7 @@ def test_linter_returns_appended_relative_paths_with_os_path_abspath_import(): sys.path.append(abspath("relative_path")) """ linter = ASTLinter.parse(code) - appended = PythonLinter.list_appended_sys_paths(linter) + appended = PythonLinter.list_sys_path_changes(linter) assert "relative_path" in [p.path for p in appended] @@ -130,7 +130,7 @@ def test_linter_returns_appended_relative_paths_with_os_path_abspath_alias(): sys.path.append(stuff("relative_path")) """ linter = ASTLinter.parse(code) - appended = PythonLinter.list_appended_sys_paths(linter) + appended = PythonLinter.list_sys_path_changes(linter) assert "relative_path" in [p.path for p in appended] diff --git a/tests/unit/source_code/test_s3fs.py b/tests/unit/source_code/test_s3fs.py index 325b6e99a9..8567fecce9 100644 --- a/tests/unit/source_code/test_s3fs.py +++ b/tests/unit/source_code/test_s3fs.py @@ -5,7 +5,6 @@ from databricks.labs.ucx.source_code.graph import ( DependencyResolver, DependencyProblem, - DependencyGraphBuilder, ) from databricks.labs.ucx.source_code.files import FileLoader, LocalFileResolver from databricks.labs.ucx.source_code.notebooks.loaders import NotebookLoader, NotebookResolver @@ -120,9 +119,8 @@ def test_detect_s3fs_import(empty_index, source: str, expected: list[DependencyP notebook_loader = NotebookLoader() file_loader = FileLoader() resolvers = [NotebookResolver(notebook_loader), LocalFileResolver(file_loader), WhitelistResolver(whitelist)] - dependency_resolver = DependencyResolver(resolvers) - builder = DependencyGraphBuilder(dependency_resolver, lookup) - maybe = builder.build_local_file_dependency_graph(sample) + dependency_resolver = DependencyResolver(resolvers, lookup) + maybe = dependency_resolver.build_local_file_dependency_graph(sample) assert maybe.problems == [_.replace(source_path=sample) for _ in expected] @@ -133,7 +131,7 @@ def test_detect_s3fs_import(empty_index, source: str, expected: list[DependencyP DependencyProblem( code='dependency-check', message='Use of dependency s3fs is deprecated', - source_path=Path('root9.py.txt'), + source_path=Path('leaf9.py.txt'), start_line=1, start_col=0, end_line=1, @@ -148,8 +146,7 @@ def test_detect_s3fs_import_in_dependencies(empty_index, expected: list[Dependen file_loader = FileLoader() whitelist = Whitelist.parse(yml.read_text()) resolvers = [LocalFileResolver(file_loader), WhitelistResolver(whitelist)] - dependency_resolver = DependencyResolver(resolvers) - builder = DependencyGraphBuilder(dependency_resolver, lookup) + dependency_resolver = DependencyResolver(resolvers, lookup) sample = lookup.cwd / "root9.py.txt" - maybe = builder.build_local_file_dependency_graph(sample) - assert maybe.problems == [_.replace(source_path=sample) for _ in expected] + maybe = dependency_resolver.build_local_file_dependency_graph(sample) + assert maybe.problems == expected diff --git a/tests/unit/source_code/test_whitelist.py b/tests/unit/source_code/test_whitelist.py index 7cb151c987..4a7a399724 100644 --- a/tests/unit/source_code/test_whitelist.py +++ b/tests/unit/source_code/test_whitelist.py @@ -6,7 +6,7 @@ def test_reads_whitelist(): datas = _load_sources(SourceContainer, "sample-python-compatibility-catalog.yml") whitelist = Whitelist.parse(datas[0]) - assert not whitelist.compatibility("spark.sql") + assert whitelist.compatibility("spark.sql") == UCCompatibility.UNKNOWN assert whitelist.compatibility("databricks.sdk.service.compute") == UCCompatibility.FULL assert whitelist.compatibility("sys") == UCCompatibility.FULL assert whitelist.compatibility("os.path") == UCCompatibility.FULL