Skip to content

Commit

Permalink
Download wheel dependency locally to register it to the dependency gr…
Browse files Browse the repository at this point in the history
…aph (#1704)
  • Loading branch information
JCZuurmond authored May 31, 2024
1 parent a8e823f commit 0eb0d46
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 9 deletions.
8 changes: 5 additions & 3 deletions src/databricks/labs/ucx/source_code/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,11 @@ def _register_library(self, graph: DependencyGraph, library: compute.Library) ->
local_file.write_bytes(remote_file.read())
yield from graph.register_library(local_file.as_posix())
if library.whl:
# TODO: download the wheel somewhere local and add it to "virtual sys.path" via graph.path_lookup.push_path
# TODO: https://github.com/databrickslabs/ucx/issues/1640
yield DependencyProblem("not-yet-implemented", "Wheel library is not yet implemented")
with self._ws.workspace.download(library.whl, format=ExportFormat.AUTO) as remote_file:
with tempfile.TemporaryDirectory() as directory:
local_file = Path(directory) / Path(library.whl).name
local_file.write_bytes(remote_file.read())
yield from graph.register_library(local_file.as_posix())
if library.requirements: # https://pip.pypa.io/en/stable/reference/requirements-file-format/
logger.info(f"Registering libraries from {library.requirements}")
with self._ws.workspace.download(library.requirements, format=ExportFormat.AUTO) as remote_file:
Expand Down
65 changes: 62 additions & 3 deletions tests/integration/source_code/test_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@
from datetime import timedelta
from io import StringIO
from pathlib import Path
from unittest.mock import create_autospec

from databricks.labs.blueprint.tui import Prompts
from databricks.sdk.errors import NotFound
from databricks.sdk.retries import retried
from databricks.sdk.service.compute import Library, PythonPyPiLibrary
from databricks.sdk.service.workspace import ImportFormat

from databricks.labs.blueprint.tui import Prompts

from databricks.labs.ucx.hive_metastore.migration_status import MigrationIndex
from databricks.labs.ucx.source_code.known import UNKNOWN, Whitelist
from databricks.labs.ucx.source_code.linters.files import LocalCodeLinter
from databricks.labs.ucx.source_code.linters.context import LinterContext
from databricks.labs.ucx.source_code.path_lookup import PathLookup
Expand All @@ -21,7 +22,12 @@


@retried(on=[NotFound], timeout=timedelta(minutes=2))
def test_running_real_workflow_linter_job(installation_ctx):
def test_running_real_workflow_linter_job(installation_ctx, make_notebook, make_directory, make_job):
# Deprecated file system path in call to: /mnt/things/e/f/g
lint_problem = b"display(spark.read.csv('/mnt/things/e/f/g'))"
notebook = make_notebook(path=f"{make_directory()}/notebook.ipynb", content=lint_problem)
make_job(notebook_path=notebook)

ctx = installation_ctx
ctx.workspace_installation.run()
ctx.deployed_workflows.run_workflow("experimental-workflow-linter")
Expand Down Expand Up @@ -239,3 +245,56 @@ def test_workflow_linter_lints_job_with_egg_dependency(
problems = simple_ctx.workflow_linter.lint_job(job_with_egg_dependency.job_id)

assert len([problem for problem in problems if problem.message == expected_problem_message]) == 0


def test_workflow_linter_lints_job_with_missing_library(
simple_ctx,
ws,
make_job,
make_notebook,
make_random,
make_directory,
):
expected_problem_message = "Could not locate import: databricks.labs.ucx"
whitelist = create_autospec(Whitelist) # databricks is in default list
whitelist.module_compatibility.return_value = UNKNOWN

simple_ctx = simple_ctx.replace(
whitelist=whitelist,
path_lookup=PathLookup(Path("/non/existing/path"), []), # Avoid finding current project
)

notebook = make_notebook(path=f"{make_directory()}/notebook.ipynb", content=b"import databricks.labs.ucx")
job_without_ucx_library = make_job(notebook_path=notebook)

problems = simple_ctx.workflow_linter.lint_job(job_without_ucx_library.job_id)

assert len([problem for problem in problems if problem.message == expected_problem_message]) > 0
whitelist.module_compatibility.assert_called_once_with("databricks.labs.ucx")


def test_workflow_linter_lints_job_with_wheel_dependency(
simple_ctx,
ws,
make_job,
make_notebook,
make_random,
make_directory,
):
expected_problem_message = "Could not locate import: databricks.labs.ucx"

simple_ctx = simple_ctx.replace(
whitelist=Whitelist(), # databricks is in default list
path_lookup=PathLookup(Path("/non/existing/path"), []), # Avoid finding current project
)

simple_ctx.workspace_installation.run() # Creates ucx wheel
wheels = [file for file in simple_ctx.installation.files() if file.path.endswith(".whl")]
library = compute.Library(whl=wheels[0].path)

notebook = make_notebook(path=f"{make_directory()}/notebook.ipynb", content=b"import databricks.labs.ucx")
job_with_ucx_library = make_job(notebook_path=notebook, libraries=[library])

problems = simple_ctx.workflow_linter.lint_job(job_with_ucx_library.job_id)

assert len([problem for problem in problems if problem.message == expected_problem_message]) == 0
23 changes: 20 additions & 3 deletions tests/unit/source_code/test_jobs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
import io
import logging
from pathlib import Path
from unittest.mock import create_autospec

Expand Down Expand Up @@ -49,13 +49,13 @@ def graph(mock_path_lookup, dependency_resolver) -> DependencyGraph:
def test_workflow_task_container_builds_dependency_graph_not_yet_implemented(mock_path_lookup, graph):
# Goal of test is to raise test coverage, remove after implementing
ws = create_autospec(WorkspaceClient)
library = compute.Library(jar="library.jar", whl="library.whl")
library = compute.Library(jar="library.jar")
task = jobs.Task(task_key="test", libraries=[library], existing_cluster_id="id")

workflow_task_container = WorkflowTaskContainer(ws, task)
problems = workflow_task_container.build_dependency_graph(graph)

assert len(problems) == 2
assert len(problems) == 1
assert all(problem.code == "not-yet-implemented" for problem in problems)
ws.assert_not_called()

Expand Down Expand Up @@ -99,6 +99,23 @@ def test_workflow_task_container_builds_dependency_graph_unknown_pypi_library(mo
ws.assert_not_called()


def test_workflow_task_container_builds_dependency_graph_for_python_wheel(mock_path_lookup, graph):
ws = create_autospec(WorkspaceClient)
ws.workspace.download.return_value = io.BytesIO(b"test")

libraries = [compute.Library(whl="test.whl")]
task = jobs.Task(task_key="test", libraries=libraries)

workflow_task_container = WorkflowTaskContainer(ws, task)
problems = workflow_task_container.build_dependency_graph(graph)

assert len(problems) == 1
assert problems[0].code == "library-install-failed"
assert problems[0].message.startswith("Failed to install")
assert mock_path_lookup.resolve(Path("test")) is None
ws.assert_not_called()


def test_workflow_linter_lint_job_logs_problems(dependency_resolver, mock_path_lookup, empty_index, caplog):
expected_message = "Found job problems:\nUNKNOWN:-1 [library-install-failed] Failed to install unknown-library"

Expand Down

0 comments on commit 0eb0d46

Please sign in to comment.