Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added check for circular view dependency #1502

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion src/databricks/labs/ucx/hive_metastore/view_migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def _next_batch(self, views: set[ViewToMigrate]) -> set[ViewToMigrate]:
result: set[ViewToMigrate] = set()
for view in views:
view_deps = set(view.dependencies)
self._check_circular_dependency(view, views)
if len(view_deps) == 0:
result.add(view)
else:
Expand All @@ -128,5 +129,29 @@ def _next_batch(self, views: set[ViewToMigrate]) -> set[ViewToMigrate]:
result.add(view)
# prevent infinite loop
if len(result) == 0 and len(views) > 0:
raise ValueError(f"Circular view references are preventing migration: {views}")
raise ValueError(f"Invalid table references are preventing migration: {views}")
return result

def _check_circular_dependency(self, initial_view, views):
queue = []
queue.extend(dep for dep in initial_view.dependencies)
while queue:
current_view = self._get_view_instance(queue.pop(0).key, views)
if not current_view:
continue
if current_view == initial_view:
raise ValueError(
f"Circular dependency detected between {initial_view.src.name} and {current_view.src.name} "
)
queue.extend(dep for dep in current_view.dependencies)

def _get_view_instance(self, key: str, views: set[ViewToMigrate]) -> ViewToMigrate | None:
# This method acts as a mapper between TableView and ViewToMigrate. We check if the key passed matches with
# any of the views in the list of views. This means the circular dependency will be identified only
# if the dependencies are present in the list of views passed to _next_batch() or the _result_view_list
# ToDo: see if a mapper between TableView and ViewToMigrate can be implemented
all_views = list(views) + self._result_view_list
for view in all_views:
if view.src.key == key:
return view
return None
5 changes: 5 additions & 0 deletions tests/unit/hive_metastore/tables/tables_and_views.json
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@
"db": "db1",
"table": "v11",
"view_text": "select * from db1.v10"
},
{
"db": "db1",
"table": "v12",
"view_text": "select * from db1.v11"
}


Expand Down
20 changes: 17 additions & 3 deletions tests/unit/hive_metastore/test_views_sequencer.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,10 @@ def test_migrate_invalid_sql_tables_raises_value_error() -> None:
batches = sequencer.sequence_batches()
sequence = list(flatten(batches))
assert sequence is None # should never get there
assert "Circular view references are preventing migration:" in str(error)
assert "Invalid table references are preventing migration:" in str(error)


def test_migrate_circular_vues_raises_value_error() -> None:
def test_migrate_circular_views_raises_value_error() -> None:
with pytest.raises(ValueError) as error:
samples = Samples.load("db1.v10", "db1.v11")
sql_backend = mock_backend(samples, "db1")
Expand All @@ -141,7 +141,21 @@ def test_migrate_circular_vues_raises_value_error() -> None:
batches = sequencer.sequence_batches()
sequence = list(flatten(batches))
assert sequence is None # should never get there
assert "Circular view references are preventing migration:" in str(error)
assert "Circular dependency detected between" in str(error)


def test_migrate_circular_view_chain_raises_value_error() -> None:
with pytest.raises(ValueError) as error:
samples = Samples.load("db1.v10", "db1.v11", "db1.v12")
sql_backend = mock_backend(samples, "db1")
crawler = TablesCrawler(sql_backend, SCHEMA_NAME, ["db1"])
tables = [TableToMigrate(table, create_autospec(Rule)) for table in crawler.snapshot()]
migration_index = MigrationIndex([])
sequencer = ViewsMigrationSequencer(tables, migration_index)
batches = sequencer.sequence_batches()
sequence = list(flatten(batches))
assert sequence is None # should never get there
assert "Circular dependency detected between" in str(error)


def mock_backend(samples: list[dict], *dbnames: str) -> SqlBackend:
Expand Down
Loading