Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactored workspace_client_mock to use installation.load() #1056

Merged
merged 1 commit into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 0 additions & 18 deletions src/databricks/labs/ucx/hive_metastore/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import re
from dataclasses import dataclass
from functools import partial
from typing import Any

from databricks.labs.blueprint.installation import Installation
from databricks.labs.blueprint.parallel import Threads
Expand All @@ -28,18 +27,6 @@ class Rule:
src_table: str
dst_table: str

@classmethod
def from_dict(cls, data: dict[str, Any]):
"""Deserializes the Rule from a dictionary."""
return cls(
workspace_name=data["workspace_name"],
catalog_name=data["catalog_name"],
src_schema=data["src_schema"],
dst_schema=data["dst_schema"],
src_table=data["src_table"],
dst_table=data["dst_table"],
)

@classmethod
def initial(cls, workspace_name: str, catalog_name: str, table: Table) -> "Rule":
return cls(
Expand All @@ -65,11 +52,6 @@ class TableToMigrate:
src: Table
rule: Rule

@classmethod
def from_dict(cls, data: dict[str, Any]):
"""Deserializes the TableToMigrate from a dictionary."""
return cls(Table.from_dict(data["table"]), Rule.from_dict(data["rule"]))


class TableMapping:
UCX_SKIP_PROPERTY = "databricks.labs.ucx.skip"
Expand Down
14 changes: 0 additions & 14 deletions src/databricks/labs/ucx/hive_metastore/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,20 +56,6 @@ class Table:

UPGRADED_FROM_WS_PARAM: typing.ClassVar[str] = "upgraded_from_workspace_id"

@classmethod
def from_dict(cls, data: dict[str, typing.Any]):
return cls(
catalog=data.get("catalog", "UNKNOWN"),
database=data.get("database", "UNKNOWN"),
name=data.get("name", "UNKNOWN"),
object_type=data.get("object_type", "UNKNOWN"),
table_format=data.get("table_format", "UNKNOWN"),
location=data.get("location", None),
view_text=data.get("view_text", None),
upgraded_to=data.get("upgraded_to", None),
storage_properties=data.get("storage_properties", None),
)

@property
def is_delta(self) -> bool:
if self.table_format is None:
Expand Down
23 changes: 12 additions & 11 deletions tests/unit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pathlib
from unittest.mock import create_autospec

from databricks.labs.blueprint.installation import MockInstallation
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import NotFound
from databricks.sdk.service.compute import ClusterDetails, Policy
Expand Down Expand Up @@ -59,30 +60,32 @@ def _load_fixture(filename: str):
PipelineStateInfo: 'assessment/pipelines',
Policy: 'assessment/policies',
TableToMigrate: 'hive_metastore/tables',
EndpointConfPair: 'assessment/warehouses',
}


def _load_list(cls: type, filename: str, ids=None):
if not ids: # TODO: remove
return [cls.from_dict(_) for _ in _load_fixture(filename)] # type: ignore[attr-defined]
return _id_list(cls, ids)
def _load_list(cls: type, filename: str):
fixtures = _load_fixture(f'{_FOLDERS[cls]}/{filename}.json')
installation = MockInstallation(DEFAULT_CONFIG | {str(num): fixture for num, fixture in enumerate(fixtures)})
return [installation.load(cls, filename=str(num)) for num in range(len(fixtures))]


def _id_list(cls: type, ids=None):
if not ids:
return []
return [cls.from_dict(_load_fixture(f'{_FOLDERS[cls]}/{_}.json')) for _ in ids] # type: ignore[attr-defined]
installation = MockInstallation(DEFAULT_CONFIG | {_: _load_fixture(f'{_FOLDERS[cls]}/{_}.json') for _ in ids})
return [installation.load(cls, filename=_) for _ in ids]


def _cluster_policy(policy_id: str):
fixture = _load_fixture(f"assessment/policies/{policy_id}.json")
fixture = _load_fixture(f"{_FOLDERS[Policy]}/{policy_id}.json")
definition = json.dumps(fixture["definition"])
overrides = json.dumps(fixture["policy_family_definition_overrides"])
return Policy(description=definition, policy_family_definition_overrides=overrides)


def _pipeline(pipeline_id: str):
fixture = _load_fixture(f"assessment/pipelines/{pipeline_id}.json")
fixture = _load_fixture(f"{_FOLDERS[PipelineStateInfo]}/{pipeline_id}.json")
return GetPipelineResponse.from_dict(fixture)


Expand All @@ -97,7 +100,7 @@ def workspace_client_mock(
job_ids: list[str] | None = None,
jobruns_ids: list[str] | None = None,
policy_ids: list[str] | None = None,
warehouse_config="single-config.json",
warehouse_config="single-config",
secret_exists=True,
):
ws = create_autospec(WorkspaceClient)
Expand All @@ -108,9 +111,7 @@ def workspace_client_mock(
ws.pipelines.get = _pipeline
ws.jobs.list.return_value = _id_list(BaseJob, job_ids)
ws.jobs.list_runs.return_value = _id_list(BaseRun, jobruns_ids)
ws.warehouses.get_workspace_warehouse_config().data_access_config = _load_list(
EndpointConfPair, f"assessment/warehouses/{warehouse_config}"
)
ws.warehouses.get_workspace_warehouse_config().data_access_config = _load_list(EndpointConfPair, warehouse_config)
ws.workspace.export = _workspace_export
if secret_exists:
ws.secrets.get_secret.return_value = GetSecretResponse(key="username", value="SGVsbG8sIFdvcmxkIQ==")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"max_workers": 6,
"min_workers": 1
},
"cluster_source": "ClusterSource.UI",
"cluster_source": "UI",
"spark_context_id": 5134472582179565315,
"spark_version": "9.3.x-cpu-ml-scala2.12",
"cluster_id": "0810-225833-atlanta69",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"max_workers": 6,
"min_workers": 1
},
"cluster_source": "ClusterSource.UI",
"cluster_source": "UI",
"spark_context_id": 5134472582179565315,
"spark_version": "9.3.x-cpu-ml-scala2.12",
"cluster_id": "0810-225833-atlanta69",
Expand Down
1 change: 1 addition & 0 deletions tests/unit/assessment/jobruns/gitsource_task.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"start_time": 1704085200000,
"timeout_seconds": 86400,
"git_source": {
"git_provider": "gitHub",
"git_url": "https://foo/bar"
},

Expand Down
4 changes: 4 additions & 0 deletions tests/unit/assessment/jobruns/sql_tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"task_key": "path",
"existing_cluster_id": "outdated-autoscale",
"sql_task": {
"warehouse_id": "outdated-autoscale",
"file": {
"path": "/foo/bar"
}
Expand All @@ -17,6 +18,7 @@
"task_key": "alert",
"existing_cluster_id": "outdated-autoscale",
"sql_task": {
"warehouse_id": "outdated-autoscale",
"alert": {
"alert_id": "a123"
}
Expand All @@ -26,6 +28,7 @@
"task_key": "dashboard",
"existing_cluster_id": "outdated-autoscale",
"sql_task": {
"warehouse_id": "outdated-autoscale",
"dashboard": {
"dashboard_id": "d123"
}
Expand All @@ -35,6 +38,7 @@
"task_key": "query",
"existing_cluster_id": "outdated-autoscale",
"sql_task": {
"warehouse_id": "outdated-autoscale",
"dashboard": {
"dashboard_id": "q123"
}
Expand Down
14 changes: 7 additions & 7 deletions tests/unit/assessment/test_azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_azure_service_principal_info_crawl():
cluster_ids=['azure-spn-secret', 'simplest-autoscale'],
pipeline_ids=['spec-with-spn'],
job_ids=['some-spn'],
warehouse_config="spn-config.json",
warehouse_config="spn-config",
secret_exists=True,
)
spn_crawler = AzureServicePrincipalCrawler(ws, MockBackend(), "ucx").snapshot()
Expand All @@ -23,7 +23,7 @@ def test_azure_service_principal_info_spark_conf_crawl():
cluster_ids=['simplest-autoscale'],
pipeline_ids=['empty-spec'],
job_ids=['some-spn'],
warehouse_config="spn-config.json",
warehouse_config="spn-config",
)

spn_crawler = AzureServicePrincipalCrawler(ws, MockBackend(), "ucx").snapshot()
Expand All @@ -36,7 +36,7 @@ def test_azure_service_principal_info_no_spark_conf_crawl():
cluster_ids=['simplest-autoscale'],
pipeline_ids=['empty-spec'],
job_ids=['single-job'],
warehouse_config="single-config.json",
warehouse_config="single-config",
)

spn_crawler = AzureServicePrincipalCrawler(ws, MockBackend(), "ucx").snapshot()
Expand Down Expand Up @@ -70,7 +70,7 @@ def test_list_all_cluster_with_spn_in_spark_conf_with_secret():

def test_list_all_wh_config_with_spn_no_secret():
ws = workspace_client_mock(
cluster_ids=['simplest-autoscale'], pipeline_ids=['empty-spec'], warehouse_config="spn-config.json"
cluster_ids=['simplest-autoscale'], pipeline_ids=['empty-spec'], warehouse_config="spn-config"
)
result_set = AzureServicePrincipalCrawler(ws, MockBackend(), "ucx").snapshot()

Expand All @@ -84,7 +84,7 @@ def test_list_all_wh_config_with_spn_and_secret():
ws = workspace_client_mock(
cluster_ids=['simplest-autoscale'],
pipeline_ids=['empty-spec'],
warehouse_config="spn-secret-config.json",
warehouse_config="spn-secret-config",
secret_exists=True,
)
result_set = AzureServicePrincipalCrawler(ws, MockBackend(), "ucx").snapshot()
Expand All @@ -107,7 +107,7 @@ def test_azure_service_principal_info_policy_conf():
cluster_ids=['policy-single-user-with-spn', 'policy-azure-oauth'],
pipeline_ids=['spec-with-spn'],
job_ids=['policy-single-job-with-spn'],
warehouse_config="spn-config.json",
warehouse_config="spn-config",
secret_exists=True,
)
spn_crawler = AzureServicePrincipalCrawler(ws, MockBackend(), "ucx").snapshot()
Expand All @@ -120,7 +120,7 @@ def test_azure_service_principal_info_dedupe():
cluster_ids=['policy-single-user-with-spn'],
pipeline_ids=['spec-with-spn'],
job_ids=['policy-single-job-with-spn'],
warehouse_config="dupe-spn-config.json",
warehouse_config="dupe-spn-config",
secret_exists=True,
)
spn_crawler = AzureServicePrincipalCrawler(ws, MockBackend(), "ucx").snapshot()
Expand Down
24 changes: 12 additions & 12 deletions tests/unit/assessment/test_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,38 +64,38 @@ def test_job_crawler_with_no_owner_should_have_empty_creator_name():
(
['notebook_task'],
['outdated-autoscale'],
'["123"]',
'[123]',
'["not supported DBR: 9.3.x-cpu-ml-scala2.12"]',
),
(
['notebook_task', 'notebook_dupe_task'],
['outdated-autoscale'],
'["123", "124"]',
'[123, 124]',
'["not supported DBR: 9.3.x-cpu-ml-scala2.12"]',
),
(
['sql_tasks'],
['outdated-autoscale'],
'["123"]',
'[123]',
'["not supported DBR: 9.3.x-cpu-ml-scala2.12"]',
),
(['gitsource_task'], ['outdated-autoscale'], '["123"]', '["not supported DBR: 9.3.x-cpu-ml-scala2.12"]'),
(['dbt_task'], ['outdated-autoscale'], '["123"]', '["not supported DBR: 9.3.x-cpu-ml-scala2.12"]'),
(['jar_task'], ['outdated-autoscale'], '["123"]', '["not supported DBR: 9.3.x-cpu-ml-scala2.12"]'),
(['python_wheel_task'], ['outdated-autoscale'], '["123"]', '["not supported DBR: 9.3.x-cpu-ml-scala2.12"]'),
(['run_condition_task'], ['outdated-autoscale'], '["123"]', '["not supported DBR: 9.3.x-cpu-ml-scala2.12"]'),
(['notebook_no_failure_task'], ['simplest-autoscale'], '["123"]', '[]'),
(['gitsource_task'], ['outdated-autoscale'], '[123]', '["not supported DBR: 9.3.x-cpu-ml-scala2.12"]'),
(['dbt_task'], ['outdated-autoscale'], '[123]', '["not supported DBR: 9.3.x-cpu-ml-scala2.12"]'),
(['jar_task'], ['outdated-autoscale'], '[123]', '["not supported DBR: 9.3.x-cpu-ml-scala2.12"]'),
(['python_wheel_task'], ['outdated-autoscale'], '[123]', '["not supported DBR: 9.3.x-cpu-ml-scala2.12"]'),
(['run_condition_task'], ['outdated-autoscale'], '[123]', '["not supported DBR: 9.3.x-cpu-ml-scala2.12"]'),
(['notebook_no_failure_task'], ['simplest-autoscale'], '[123]', '[]'),
(
['notebook_no_sec_no_comp_task'],
['simplest-autoscale'],
'["123"]',
'[123]',
'["not supported DBR: 9.3.x-cpu-ml-scala2.12"]',
),
(['notebook_no_sec_comp_task'], ['simplest-autoscale'], '["123"]', '["no data security mode specified"]'),
(['notebook_no_sec_comp_task'], ['simplest-autoscale'], '[123]', '["no data security mode specified"]'),
(
['notebook_spark_conf_task'],
['simplest-autoscale'],
'["123"]',
'[123]',
'["unsupported config: spark.databricks.passthrough.enabled"]',
),
],
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/hive_metastore/tables/external_src.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"table": {
"src": {
"catalog": "hive_metastore",
"database": "db1_src",
"name": "external_src",
"type": "EXTERNAL",
"object_type": "EXTERNAL",
"table_format": "DELTA"
},
"rule": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"table": {
"src": {
"catalog": "hive_metastore",
"database": "db1_src",
"name": "external_src",
"type": "EXTERNAL",
"object_type": "EXTERNAL",
"table_format": "UNSUPPORTED_FORMAT"
},
"rule": {
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/hive_metastore/tables/managed_dbfs.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"table": {
"src": {
"catalog": "hive_metastore",
"database": "db1_src",
"name": "managed_dbfs",
"type": "MANAGED",
"object_type": "MANAGED",
"table_format": "DELTA",
"location": "dbfs:/some_location"
},
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/hive_metastore/tables/managed_mnt.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"table": {
"src": {
"catalog": "hive_metastore",
"database": "db1_src",
"name": "managed_mnt",
"type": "MANAGED",
"object_type": "MANAGED",
"table_format": "DELTA",
"location": "s3:/mnt/location"
},
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/hive_metastore/tables/managed_other.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"table": {
"src": {
"catalog": "hive_metastore",
"database": "db1_src",
"name": "managed_other",
"type": "MANAGED",
"object_type": "MANAGED",
"table_format": "DELTA",
"location": "s3:/location"
},
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/hive_metastore/tables/view.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"table": {
"src": {
"catalog": "hive_metastore",
"database": "db1_src",
"name": "view_src",
"type": "VIEW",
"format": "VIEW",
"object_type": "VIEW",
"table_format": "VIEW",
"view_text": "SELECT * FROM table"
},
"rule": {
Expand Down
Loading