Skip to content

Commit

Permalink
Adding upload_dependencies flag to include dependencies upload in air…
Browse files Browse the repository at this point in the history
…-gapped workspaces
  • Loading branch information
aminmovahed-db committed May 22, 2024
1 parent 63bf0d4 commit 6334fd0
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 11 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ classifiers = [

dependencies = ["databricks-sdk~=0.27.0",
"databricks-labs-lsql~=0.4.0",
"databricks-labs-blueprint>=0.4.3,<0.7.0",
"databricks-labs-blueprint>=0.6.0",
"PyYAML>=6.0.0,<7.0.0",
"sqlglot>=23.9,<23.18"]

Expand Down
3 changes: 3 additions & 0 deletions src/databricks/labs/ucx/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ class WorkspaceConfig: # pylint: disable=too-many-instance-attributes
# List of workspace ids ucx is installed on, only applied to account-level installation
installed_workspace_ids: list[int] | None = None

# Whether to upload dependent libraries to the workspace
upload_dependencies: bool = False

# [INTERNAL ONLY] Whether the assessment should capture only specific object permissions.
include_object_permissions: list[str] | None = None

Expand Down
2 changes: 2 additions & 0 deletions src/databricks/labs/ucx/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ def _prompt_for_new_installation(self) -> WorkspaceConfig:
configure_groups = ConfigureGroups(self.prompts)
configure_groups.run()
include_databases = self._select_databases()
upload_dependencies = self.prompts.confirm("Does given workspace block Internet access?")
trigger_job = self.prompts.confirm("Do you want to trigger assessment job after installation?")
return WorkspaceConfig(
inventory_database=inventory_database,
Expand All @@ -226,6 +227,7 @@ def _prompt_for_new_installation(self) -> WorkspaceConfig:
num_threads=num_threads,
include_databases=include_databases,
trigger_job=trigger_job,
upload_dependencies=upload_dependencies,
)

def _compare_remote_local_versions(self):
Expand Down
27 changes: 17 additions & 10 deletions src/databricks/labs/ucx/installer/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
# COMMAND ----------
# MAGIC %pip install /Workspace{remote_wheel}
# MAGIC %pip install {remote_wheel}
dbutils.library.restartPython()
# COMMAND ----------
Expand All @@ -91,7 +91,7 @@
"""

TEST_RUNNER_NOTEBOOK = """# Databricks notebook source
# MAGIC %pip install /Workspace{remote_wheel}
# MAGIC %pip install {remote_wheel}
dbutils.library.restartPython()
# COMMAND ----------
Expand Down Expand Up @@ -525,12 +525,17 @@ def _deploy_workflow(self, step_name: str, settings):
return None

def _upload_wheel(self):
wheel_paths = []
with self._wheels:
return self._wheels.upload_to_wsfs()
if self._config.upload_dependencies:
wheel_paths = self._wheels.upload_wheel_dependencies(["databricks_sdk", "sqlglot"])
wheel_paths.append(f"/Workspace{self._wheels.upload_to_wsfs()}")
return wheel_paths

def _upload_wheel_runner(self, remote_wheel: str):
def _upload_wheel_runner(self, remote_wheel: list[str]):
# TODO: we have to be doing this workaround until ES-897453 is solved in the platform
code = TEST_RUNNER_NOTEBOOK.format(remote_wheel=remote_wheel, config_file=self._config_file).encode("utf8")
remote_wheels_str = ", ".join(list(remote_wheel))
code = TEST_RUNNER_NOTEBOOK.format(remote_wheel=remote_wheels_str, config_file=self._config_file).encode("utf8")
return self._installation.upload(f"wheels/wheel-test-runner-{self._product_info.version()}.py", code)

@staticmethod
Expand All @@ -554,7 +559,7 @@ def _apply_cluster_overrides(
job_task.notebook_task = jobs.NotebookTask(notebook_path=wheel_runner, base_parameters=widget_values)
return settings

def _job_settings(self, step_name: str, remote_wheel: str):
def _job_settings(self, step_name: str, remote_wheel: list[str]):
email_notifications = None
if not self._config.override_clusters and "@" in self._my_username:
# set email notifications only if we're running the real
Expand Down Expand Up @@ -582,7 +587,7 @@ def _job_settings(self, step_name: str, remote_wheel: str):
"tasks": job_tasks,
}

def _job_task(self, task: Task, remote_wheel: str) -> jobs.Task:
def _job_task(self, task: Task, remote_wheel: list[str]) -> jobs.Task:
jobs_task = jobs.Task(
task_key=task.name,
job_cluster_key=task.job_cluster,
Expand Down Expand Up @@ -627,8 +632,10 @@ def _job_notebook_task(self, jobs_task: jobs.Task, task: Task) -> jobs.Task:
),
)

def _job_wheel_task(self, jobs_task: jobs.Task, workflow: str, remote_wheel: str) -> jobs.Task:
libraries = [compute.Library(whl=f"/Workspace{remote_wheel}")]
def _job_wheel_task(self, jobs_task: jobs.Task, workflow: str, remote_wheel: list[str]) -> jobs.Task:
libraries = []
for wheel in remote_wheel:
libraries.append(compute.Library(whl=wheel))
named_parameters = {
"config": f"/Workspace{self._config_file}",
"workflow": workflow,
Expand Down Expand Up @@ -689,7 +696,7 @@ def _job_clusters(self, names: set[str]):
)
return clusters

def _job_parse_logs_task(self, job_tasks: list[jobs.Task], workflow: str, remote_wheel: str) -> jobs.Task:
def _job_parse_logs_task(self, job_tasks: list[jobs.Task], workflow: str, remote_wheel: list[str]) -> jobs.Task:
jobs_task = jobs.Task(
task_key="parse_logs",
job_cluster_key=Task.job_cluster,
Expand Down
22 changes: 22 additions & 0 deletions tests/unit/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -1886,3 +1886,25 @@ def test_save_config_ext_hms(ws, mock_installation):
'num_days_submit_runs_history': 30,
},
)


def test_upload_dependencies(ws, mock_installation):
prompts = MockPrompts(
{
r".*": "",
r"Choose how to map the workspace groups.*": "0",
r".*PRO or SERVERLESS SQL warehouse.*": "1",
r".*Does given workspace block Internet access.*": "Yes",
}
)
wheels = create_autospec(WheelsV2)
workspace_installation = WorkspaceInstaller(ws).replace(
prompts=prompts,
installation=mock_installation,
product_info=PRODUCT_INFO,
sql_backend=MockBackend(),
wheels=wheels,
)
workspace_installation.run()
wheels.upload_wheel_dependencies.assert_called_once()
wheels.upload_to_wsfs.assert_called_once()

0 comments on commit 6334fd0

Please sign in to comment.