databrickslabs · nfx · Mar 20, 2024 · Mar 1, 2024 · Mar 1, 2024 · Mar 1, 2024
@@ -143,3 +143,9 @@ commands:
   - name: create-catalogs-schemas
     description: Create UC external catalogs and schemas based on the destinations created from create_table_mapping command.
       This command is supposed to be run before migrating tables to UC.
+
+  - name: cluster-remap
+    description: Re-mapping the cluster to UC
+
+  - name: revert-cluster-remap
+    description: Reverting the Re-mapping of the  cluster from  UC
@@ -26,6 +26,7 @@
 from databricks.labs.ucx.hive_metastore.table_migrate import TablesMigrate
 from databricks.labs.ucx.hive_metastore.table_move import TableMove
 from databricks.labs.ucx.install import WorkspaceInstallation
+from databricks.labs.ucx.workspace_access.clusters import ClusterAccess
 from databricks.labs.ucx.workspace_access.groups import GroupManager
 
 ucx = App(__file__)
@@ -504,5 +505,46 @@ def create_catalogs_schemas(w: WorkspaceClient, prompts: Prompts):
     catalog_schema.create_catalog_schema()
 
 
+@ucx.command
+def cluster_remap(w: WorkspaceClient, prompts: Prompts):
+    """Re-mapping the cluster to UC"""
+    logger.info("Remapping the Clusters to UC")
+    installation = Installation.current(w, 'ucx')
+    cluster = ClusterAccess(installation, w, prompts)
+    cluster_list = cluster.list_cluster()
+    if not cluster_list:
+        logger.info("No cluster information present in the workspace")
+        return
+    print(f"{'Cluster Name':<50}\t{'Cluster Id':<50}")
+    for cluster_details in cluster_list:
+        print(f"{cluster_details.cluster_name:<50}\t{cluster_details.cluster_id:<50}")
+    cluster_ids = prompts.question(
+        "Please provide the cluster id's as comma separated value from the above list", default="<ALL>"
+    )
+    cluster.map_cluster_to_uc(cluster_ids, cluster_list)
+
+
+@ucx.command
+def revert_cluster_remap(w: WorkspaceClient, prompts: Prompts):
+    """Reverting Re-mapping of  clusters from UC"""
+    logger.info("Reverting the Remapping of the Clusters from UC")
+    installation = Installation.current(w, 'ucx')
+    cluster_ids = [
+        cluster_files.path.split("/")[-1].split(".")[0]
+        for cluster_files in installation.files()
+        if cluster_files.path is not None and cluster_files.path.find("backup/clusters") > 0
+    ]
+    if not cluster_ids:
+        logger.info("There is no cluster files in the backup folder. Skipping the reverting process")
+        return
+    for cluster in cluster_ids:
+        logger.info(cluster)
+    cluster_list = prompts.question(
+        "Please provide the cluster id's as comma separated value from the above list", default="<ALL>"
+    )
+    cluster_details = ClusterAccess(installation, w, prompts)
+    cluster_details.revert_cluster_remap(cluster_list, cluster_ids)
+
+
 if __name__ == "__main__":
     ucx()
@@ -0,0 +1,114 @@
+import logging
+
+from databricks.labs.blueprint.installation import Installation
+from databricks.labs.blueprint.tui import Prompts
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.errors import InvalidParameterValue
+from databricks.sdk.service.compute import ClusterDetails, DataSecurityMode
+
+logger = logging.getLogger(__name__)
+
+
+class ClusterAccess:
+    def __init__(self, installation: Installation, ws: WorkspaceClient, prompts: Prompts):
+        self._ws = ws
+        self._prompts = prompts
+        self._installation = installation
+
+    def list_cluster(self):
+        clusters = [
+            clusters
+            for clusters in self._ws.clusters.list()
+            if clusters.cluster_source is not None and clusters.cluster_source.name != "JOB"
+        ]
+        return clusters
+
+    def _get_access_mode(self, access_mode: str):
+        if access_mode in {"LEGACY_SINGLE_USER", "SINGLE_USER"}:
+            return DataSecurityMode.SINGLE_USER
+        return DataSecurityMode.USER_ISOLATION
+
+    def map_cluster_to_uc(self, cluster_id: str, cluster_details: list[ClusterDetails]):
+        if cluster_id != "<ALL>":
+            cluster_ids = [x.strip() for x in cluster_id.split(",")]
+            cluster_id_list = [cluster for cluster in cluster_details if cluster.cluster_id in cluster_ids]
+        else:
+            cluster_id_list = cluster_details
+        spark_version = self._ws.clusters.select_spark_version(latest=True, long_term_support=True)
+        for cluster in cluster_id_list:
+            try:
+                assert cluster.cluster_id is not None
+                if cluster.data_security_mode is None:
+                    raise InvalidParameterValue(f"Data security Mode is None for the cluster {cluster.cluster_id}")
+                access_mode = self._get_access_mode(cluster.data_security_mode.name)
+                self._installation.save(cluster, filename=f'backup/clusters/{cluster.cluster_id}.json')
+                logger.info(f"Editing the cluster of cluster: {cluster.cluster_id} with access_mode as {access_mode}")
+                self._ws.clusters.edit(
+                    cluster_id=cluster.cluster_id,
+                    cluster_name=cluster.cluster_name,
+                    spark_version=spark_version,
+                    num_workers=cluster.num_workers,
+                    spark_conf=cluster.spark_conf,
+                    spark_env_vars=cluster.spark_env_vars,
+                    data_security_mode=access_mode,
+                    node_type_id=cluster.node_type_id,
+                    autoscale=cluster.autoscale,
+                    policy_id=cluster.policy_id,
+                    autotermination_minutes=cluster.autotermination_minutes,
+                    custom_tags=cluster.custom_tags,
+                    init_scripts=cluster.init_scripts,
+                    cluster_log_conf=cluster.cluster_log_conf,
+                    aws_attributes=cluster.aws_attributes,
+                    ssh_public_keys=cluster.ssh_public_keys,
+                    enable_elastic_disk=cluster.enable_elastic_disk,
+                    cluster_source=cluster.cluster_source,
+                    instance_pool_id=cluster.instance_pool_id,
+                    enable_local_disk_encryption=cluster.enable_local_disk_encryption,
+                    driver_instance_pool_id=cluster.driver_instance_pool_id,
+                )
+            except InvalidParameterValue as e:
+                logger.warning(f"skipping cluster remapping: {e}")
+
+    def revert_cluster_remap(self, cluster_ids: str, total_cluster_ids: list):
+        if cluster_ids != "<ALL>":
+            cluster_list = [x.strip() for x in cluster_ids.split(",")]
+        else:
+            cluster_list = total_cluster_ids
+        logger.info(f"Reverting the configurations for the cluster {cluster_list}")
+        for cluster in cluster_list:
+            try:
+                cluster_details = self._installation.load(ClusterDetails, filename=f"backup/clusters/{cluster}.json")
+                if cluster_details.spark_version is None:
+                    raise InvalidParameterValue(
+                        f"Spark Version is not present in the config file for the cluster:{cluster}"
+                    )
+                if cluster_details.cluster_id is None:
+                    raise InvalidParameterValue(
+                        f"cluster Id is not present in the config file for the cluster:{cluster}"
+                    )
+                num_workers = cluster_details.num_workers if cluster_details.num_workers else 0
+                self._ws.clusters.edit(
+                    cluster_id=cluster_details.cluster_id,
+                    cluster_name=cluster_details.cluster_name,
+                    spark_version=cluster_details.spark_version,
+                    num_workers=num_workers,
+                    spark_conf=cluster_details.spark_conf,
+                    spark_env_vars=cluster_details.spark_env_vars,
+                    data_security_mode=cluster_details.data_security_mode,
+                    node_type_id=cluster_details.node_type_id,
+                    autoscale=cluster_details.autoscale,
+                    policy_id=cluster_details.policy_id,
+                    autotermination_minutes=cluster_details.autotermination_minutes,
+                    custom_tags=cluster_details.custom_tags,
+                    init_scripts=cluster_details.init_scripts,
+                    cluster_log_conf=cluster_details.cluster_log_conf,
+                    aws_attributes=cluster_details.aws_attributes,
+                    ssh_public_keys=cluster_details.ssh_public_keys,
+                    enable_elastic_disk=cluster_details.enable_elastic_disk,
+                    cluster_source=cluster_details.cluster_source,
+                    instance_pool_id=cluster_details.instance_pool_id,
+                    enable_local_disk_encryption=cluster_details.enable_local_disk_encryption,
+                    driver_instance_pool_id=cluster_details.driver_instance_pool_id,
+                )
+            except InvalidParameterValue as e:
+                logger.warning(f"skipping cluster remapping: {e}")
@@ -5,16 +5,20 @@
 
 import pytest
 import yaml
+from databricks.labs.blueprint.installation import Installation
 from databricks.labs.blueprint.tui import MockPrompts
 from databricks.sdk import AccountClient, WorkspaceClient
 from databricks.sdk.errors import NotFound
 from databricks.sdk.service import iam, sql
+from databricks.sdk.service.compute import ClusterDetails, ClusterSource
+from databricks.sdk.service.workspace import ObjectInfo
 
 from databricks.labs.ucx.assessment.aws import AWSResources
 from databricks.labs.ucx.aws.access import AWSResourcePermissions
 from databricks.labs.ucx.azure.access import AzureResourcePermissions
 from databricks.labs.ucx.cli import (
     alias,
+    cluster_remap,
     create_account_groups,
     create_catalogs_schemas,
     create_table_mapping,
@@ -28,6 +32,7 @@
     open_remote_config,
     principal_prefix_access,
     repair_run,
+    revert_cluster_remap,
     revert_migrated_tables,
     skip,
     sync_workspace_info,
@@ -439,3 +444,42 @@ def test_create_catalogs_schemas(ws):
     prompts = MockPrompts({'.*': 's3://test'})
     create_catalogs_schemas(ws, prompts)
     ws.catalogs.list.assert_called_once()
+
+
+def test_cluster_remap(ws, caplog):
+    prompts = MockPrompts({"Please provide the cluster id's as comma separated value from the above list.*": "1"})
+    ws = create_autospec(WorkspaceClient)
+    ws.clusters.get.return_value = ClusterDetails(cluster_id="123", cluster_name="test_cluster")
+    ws.clusters.list.return_value = [
+        ClusterDetails(cluster_id="123", cluster_name="test_cluster", cluster_source=ClusterSource.UI),
+        ClusterDetails(cluster_id="1234", cluster_name="test_cluster1", cluster_source=ClusterSource.JOB),
+    ]
+    installation = create_autospec(Installation)
+    installation.save.return_value = "a/b/c"
+    cluster_remap(ws, prompts)
+    assert "Remapping the Clusters to UC" in caplog.messages
+
+
+def test_cluster_remap_error(ws, caplog):
+    prompts = MockPrompts({"Please provide the cluster id's as comma separated value from the above list.*": "1"})
+    ws = create_autospec(WorkspaceClient)
+    ws.clusters.list.return_value = []
+    installation = create_autospec(Installation)
+    installation.save.return_value = "a/b/c"
+    cluster_remap(ws, prompts)
+    assert "No cluster information present in the workspace" in caplog.messages
+
+
+def test_revert_cluster_remap(ws, caplog, mocker):
+    prompts = MockPrompts({"Please provide the cluster id's as comma separated value from the above list.*": "1"})
+    ws = create_autospec(WorkspaceClient)
+    ws.workspace.list.return_value = [ObjectInfo(path='/ucx/backup/clusters/123.json')]
+    with pytest.raises(TypeError):
+        revert_cluster_remap(ws, prompts)
+
+
+def test_revert_cluster_remap_empty(ws, caplog):
+    prompts = MockPrompts({"Please provide the cluster id's as comma separated value from the above list.*": "1"})
+    ws = create_autospec(WorkspaceClient)
+    revert_cluster_remap(ws, prompts)
+    assert "There is no cluster files in the backup folder. Skipping the reverting process" in caplog.messages
@@ -0,0 +1,124 @@
+from unittest.mock import create_autospec
+
+from databricks.labs.blueprint.installation import Installation
+from databricks.labs.blueprint.tui import MockPrompts
+from databricks.sdk import WorkspaceClient
+from databricks.sdk.service.compute import (
+    ClusterDetails,
+    ClusterSource,
+    DataSecurityMode,
+)
+
+from databricks.labs.ucx.workspace_access.clusters import ClusterAccess
+
+
+def test_map_cluster_to_uc(caplog):
+    ws = create_autospec(WorkspaceClient)
+    cluster_details = [
+        ClusterDetails(
+            cluster_id="123", cluster_name="test_cluster", data_security_mode=DataSecurityMode.LEGACY_SINGLE_USER
+        )
+    ]
+    prompts = MockPrompts({})
+    installation = create_autospec(Installation)
+    installation.save.return_value = "a/b/c"
+    cluster = ClusterAccess(installation, ws, prompts)
+    with caplog.at_level('INFO'):
+        cluster.map_cluster_to_uc(cluster_id="123", cluster_details=cluster_details)
+        assert 'Editing the cluster of cluster: 123 with access_mode as DataSecurityMode.SINGLE_USER' in caplog.messages
+
+
+def test_map_cluster_to_uc_shared(caplog):
+    ws = create_autospec(WorkspaceClient)
+    ws.clusters.list.return_value = [
+        ClusterDetails(
+            cluster_id="123",
+            cluster_name="test_cluster",
+            cluster_source=ClusterSource.UI,
+            data_security_mode=DataSecurityMode.LEGACY_TABLE_ACL,
+        ),
+        ClusterDetails(cluster_id="1234", cluster_name="test_cluster", cluster_source=ClusterSource.JOB),
+    ]
+    cluster_details = [
+        ClusterDetails(
+            cluster_id="123",
+            cluster_name="test_cluster",
+            cluster_source=ClusterSource.UI,
+            data_security_mode=DataSecurityMode.LEGACY_TABLE_ACL,
+        ),
+        ClusterDetails(cluster_id="1234", cluster_name="test_cluster", cluster_source=ClusterSource.JOB),
+    ]
+    prompts = MockPrompts({})
+    installation = create_autospec(Installation)
+    installation.save.return_value = "a/b/c"
+    cluster = ClusterAccess(installation, ws, prompts)
+    with caplog.at_level('INFO'):
+        cluster.map_cluster_to_uc(cluster_id="<ALL>", cluster_details=cluster_details)
+        assert (
+            'Editing the cluster of cluster: 123 with access_mode as DataSecurityMode.USER_ISOLATION' in caplog.messages
+        )
+
+
+def test_list_clusters():
+    ws = create_autospec(WorkspaceClient)
+    ws.clusters.list.return_value = [
+        ClusterDetails(cluster_id="123", cluster_name="test_cluster", cluster_source=ClusterSource.UI),
+        ClusterDetails(cluster_id="1234", cluster_name="test_cluster1", cluster_source=ClusterSource.JOB),
+    ]
+    prompts = MockPrompts({})
+    installation = create_autospec(Installation)
+    installation.save.return_value = "a/b/c"
+    cluster = ClusterAccess(installation, ws, prompts)
+    cluster_list = cluster.list_cluster()
+    assert cluster_list[0].cluster_id == "123"
+    assert len(cluster_list) == 1
+
+
+def test_map_cluster_to_uc_error(caplog):
+    ws = create_autospec(WorkspaceClient)
+    cluster_details = [ClusterDetails(cluster_id="123", cluster_name="test_cluster")]
+    prompts = MockPrompts({})
+    installation = create_autospec(Installation)
+    installation.save.return_value = "a/b/c"
+    cluster = ClusterAccess(installation, ws, prompts)
+    with caplog.at_level('INFO'):
+        cluster.map_cluster_to_uc(cluster_id="123", cluster_details=cluster_details)
+        assert 'skipping cluster remapping: Data security Mode is None for the cluster 123' in caplog.messages
+
+
+def test_revert_map_cluster_to_uc(caplog):
+    ws = create_autospec(WorkspaceClient)
+    installation = create_autospec(Installation)
+    prompts = MockPrompts({})
+    installation.load.return_value = ClusterDetails(
+        cluster_id="123", cluster_name="test_cluster", spark_version="13.3.x-cpu-ml-scala2.12"
+    )
+    cluster = ClusterAccess(installation, ws, prompts)
+    cluster.revert_cluster_remap(cluster_ids="123", total_cluster_ids=["123"])
+
+
+def test_revert_all_cluster_to_uc(caplog):
+    ws = create_autospec(WorkspaceClient)
+    installation = create_autospec(Installation)
+    prompts = MockPrompts({})
+    installation.load.return_value = ClusterDetails(cluster_id="123", cluster_name="test_cluster")
+    cluster = ClusterAccess(installation, ws, prompts)
+    with caplog.at_level('INFO'):
+        cluster.revert_cluster_remap(cluster_ids="<ALL>", total_cluster_ids=["123", "234"])
+        assert "Reverting the configurations for the cluster ['123', '234']" in caplog.messages
+
+
+def test_revert_cluster_to_uc_empty_cluster(caplog):
+    ws = create_autospec(WorkspaceClient)
+    installation = create_autospec(Installation)
+    prompts = MockPrompts({})
+    installation.load.return_value = ClusterDetails(
+        cluster_name="test_cluster", spark_version="13.3.x-cpu-ml-scala2.12"
+    )
+    cluster = ClusterAccess(installation, ws, prompts)
+    with caplog.at_level('INFO'):
+        cluster.revert_cluster_remap(cluster_ids="123", total_cluster_ids=["123"])
+        assert (
+            'skipping cluster remapping: cluster Id is not present in the config file for the cluster:123'
+            in caplog.messages
+        )