Skip to content

Commit

Permalink
Use LTS Databricks runtime version (#3459)
Browse files Browse the repository at this point in the history
## Changes
Use LTS Databricks runtime version because the convert to external table
migration strategy fails on the latest runtime

> Note: This postpones the problem! Which is preferred for the next
release as users can experience this problem with the latest UCX
release. However, it does not solve the problem.

### Linked issues

Resolves #3453 (for now)
Reslves #3461

### Functionality

- [x] modified existing workflow: `migrate-tables`

### Tests

- [x] reused integration tests
  • Loading branch information
JCZuurmond authored Dec 20, 2024
1 parent 3f6da0d commit 546ffcf
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 4 deletions.
7 changes: 6 additions & 1 deletion src/databricks/labs/ucx/hive_metastore/table_migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,12 @@ def _catalog_type(self):
def _catalog_table(self):
return self._spark._jvm.org.apache.spark.sql.catalyst.catalog.CatalogTable # pylint: disable=protected-access

def _convert_hms_table_to_external(self, src_table: Table):
def _convert_hms_table_to_external(self, src_table: Table) -> bool:
"""Converts a Hive metastore table to external using Spark JVM methods.
TODO:
This method fails for Databricks runtime 16.0, probably due to the JDK update (https://docs.databricks.com/en/release-notes/runtime/16.0.html#breaking-change-jdk-17-is-now-the-default).
"""
logger.info(f"Changing HMS managed table {src_table.name} to External Table type.")
inventory_table = self._tables_crawler.full_name
try:
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/installer/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def _get_instance_pool_id(self) -> str | None:
return None

def _definition(self, conf: dict, instance_profile: str | None, instance_pool_id: str | None) -> str:
latest_lts_dbr = self._ws.clusters.select_spark_version(latest=True)
latest_lts_dbr = self._ws.clusters.select_spark_version(latest=True, long_term_support=True)
node_type_id = self._ws.clusters.select_node_type(local_disk=True, min_memory_gb=32, min_cores=4)
policy_definition = {
"spark_version": self._policy_config(latest_lts_dbr),
Expand Down
6 changes: 6 additions & 0 deletions tests/integration/hive_metastore/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ def test_table_migration_job_refreshes_migration_status(


def test_table_migration_convert_manged_to_external(installation_ctx, make_table_migration_context) -> None:
"""Convert managed tables to external before migrating.
Note:
This test fails from Databricks runtime 16.0 (https://docs.databricks.com/en/release-notes/runtime/16.0.html),
probably due to the JDK update (https://docs.databricks.com/en/release-notes/runtime/16.0.html#breaking-change-jdk-17-is-now-the-default).
"""
tables, dst_schema = make_table_migration_context("managed", installation_ctx)
ctx = installation_ctx.replace(
config_transform=lambda wc: dataclasses.replace(
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/install/test_installation.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,15 @@ def test_job_failure_propagates_correct_error_message_and_logs(ws, sql_backend,


@retried(on=[NotFound, InvalidParameterValue], timeout=timedelta(minutes=3))
def test_job_cluster_policy(ws, installation_ctx):
def test_job_cluster_policy(ws, installation_ctx) -> None:
installation_ctx.workspace_installation.run()
user_name = ws.current_user.me().user_name
cluster_policy = ws.cluster_policies.get(policy_id=installation_ctx.config.policy_id)
policy_definition = json.loads(cluster_policy.definition)

assert cluster_policy.name == f"Unity Catalog Migration ({installation_ctx.inventory_database}) ({user_name})"

spark_version = ws.clusters.select_spark_version(latest=True)
spark_version = ws.clusters.select_spark_version(latest=True, long_term_support=True)
assert policy_definition["spark_version"]["value"] == spark_version
assert policy_definition["node_type_id"]["value"] == ws.clusters.select_node_type(local_disk=True, min_memory_gb=32)
if ws.config.is_azure:
Expand Down

0 comments on commit 546ffcf

Please sign in to comment.