Skip to content

Commit

Permalink
Adding upload_dependencies flag to include dependencies upload in air…
Browse files Browse the repository at this point in the history
…-gapped workspaces
  • Loading branch information
aminmovahed-db committed May 17, 2024
1 parent ee8fffc commit 23e09bb
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 11 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ classifiers = [

dependencies = ["databricks-sdk~=0.27.0",
"databricks-labs-lsql~=0.4.0",
"databricks-labs-blueprint>=0.4.3,<0.6.0",
"databricks-labs-blueprint>=0.6.0",
"PyYAML>=6.0.0,<7.0.0",
"sqlglot>=23.9,<23.16"]

Expand Down
3 changes: 3 additions & 0 deletions src/databricks/labs/ucx/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ class WorkspaceConfig: # pylint: disable=too-many-instance-attributes
# List of workspace ids ucx is installed on, only applied to account-level installation
installed_workspace_ids: list[int] | None = None

# Whether to upload dependent libraries to the workspace
upload_dependencies: bool = False

# [INTERNAL ONLY] Whether the assessment should capture only specific object permissions.
include_object_permissions: list[str] | None = None

Expand Down
4 changes: 4 additions & 0 deletions src/databricks/labs/ucx/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,9 @@ def _prompt_for_new_installation(self) -> WorkspaceConfig:
configure_groups = ConfigureGroups(self.prompts)
configure_groups.run()
include_databases = self._select_databases()
upload_dependencies = self.prompts.confirm(
"If there is no internet access in the target workspace, do you want to upload all the dependencies?"
)
trigger_job = self.prompts.confirm("Do you want to trigger assessment job after installation?")
return WorkspaceConfig(
inventory_database=inventory_database,
Expand All @@ -226,6 +229,7 @@ def _prompt_for_new_installation(self) -> WorkspaceConfig:
num_threads=num_threads,
include_databases=include_databases,
trigger_job=trigger_job,
upload_dependencies=upload_dependencies,
)

def _compare_remote_local_versions(self):
Expand Down
29 changes: 19 additions & 10 deletions src/databricks/labs/ucx/installer/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
"""

TEST_RUNNER_NOTEBOOK = """# Databricks notebook source
# MAGIC %pip install /Workspace{remote_wheel}
# MAGIC %pip install {remote_wheel}
dbutils.library.restartPython()
# COMMAND ----------
Expand Down Expand Up @@ -525,12 +525,19 @@ def _deploy_workflow(self, step_name: str, settings):
return None

def _upload_wheel(self):
wheel_paths = []
with self._wheels:
return self._wheels.upload_to_wsfs()

def _upload_wheel_runner(self, remote_wheel: str):
if self._config.upload_dependencies:
wheel_paths = self._wheels.upload_wheel_dependencies(

Check warning on line 531 in src/databricks/labs/ucx/installer/workflows.py

View check run for this annotation

Codecov / codecov/patch

src/databricks/labs/ucx/installer/workflows.py#L531

Added line #L531 was not covered by tests
["databricks_sdk", "sqlglot"]
) # TODO: add more dependencies if required
wheel_paths.append(self._wheels.upload_to_wsfs())
return wheel_paths

def _upload_wheel_runner(self, remote_wheel: list[str]):
# TODO: we have to be doing this workaround until ES-897453 is solved in the platform
code = TEST_RUNNER_NOTEBOOK.format(remote_wheel=remote_wheel, config_file=self._config_file).encode("utf8")
remote_wheels_str = ", ".join([f"/Workspace{wheel}" for wheel in remote_wheel])
code = TEST_RUNNER_NOTEBOOK.format(remote_wheel=remote_wheels_str, config_file=self._config_file).encode("utf8")
return self._installation.upload(f"wheels/wheel-test-runner-{self._product_info.version()}.py", code)

@staticmethod
Expand All @@ -554,7 +561,7 @@ def _apply_cluster_overrides(
job_task.notebook_task = jobs.NotebookTask(notebook_path=wheel_runner, base_parameters=widget_values)
return settings

def _job_settings(self, step_name: str, remote_wheel: str):
def _job_settings(self, step_name: str, remote_wheel: list[str]):
email_notifications = None
if not self._config.override_clusters and "@" in self._my_username:
# set email notifications only if we're running the real
Expand Down Expand Up @@ -582,7 +589,7 @@ def _job_settings(self, step_name: str, remote_wheel: str):
"tasks": job_tasks,
}

def _job_task(self, task: Task, remote_wheel: str) -> jobs.Task:
def _job_task(self, task: Task, remote_wheel: list[str]) -> jobs.Task:
jobs_task = jobs.Task(
task_key=task.name,
job_cluster_key=task.job_cluster,
Expand Down Expand Up @@ -627,8 +634,10 @@ def _job_notebook_task(self, jobs_task: jobs.Task, task: Task) -> jobs.Task:
),
)

def _job_wheel_task(self, jobs_task: jobs.Task, workflow: str, remote_wheel: str) -> jobs.Task:
libraries = [compute.Library(whl=f"/Workspace{remote_wheel}")]
def _job_wheel_task(self, jobs_task: jobs.Task, workflow: str, remote_wheel: list[str]) -> jobs.Task:
libraries = []
for wheel in remote_wheel:
libraries.append(compute.Library(whl=f"/Workspace{wheel}"))
named_parameters = {
"config": f"/Workspace{self._config_file}",
"workflow": workflow,
Expand Down Expand Up @@ -689,7 +698,7 @@ def _job_clusters(self, names: set[str]):
)
return clusters

def _job_parse_logs_task(self, job_tasks: list[jobs.Task], workflow: str, remote_wheel: str) -> jobs.Task:
def _job_parse_logs_task(self, job_tasks: list[jobs.Task], workflow: str, remote_wheel: list[str]) -> jobs.Task:
jobs_task = jobs.Task(
task_key="parse_logs",
job_cluster_key=Task.job_cluster,
Expand Down

0 comments on commit 23e09bb

Please sign in to comment.