diff --git a/src/databricks/labs/ucx/source_code/jobs.py b/src/databricks/labs/ucx/source_code/jobs.py index 4adbddda56..5031a14a95 100644 --- a/src/databricks/labs/ucx/source_code/jobs.py +++ b/src/databricks/labs/ucx/source_code/jobs.py @@ -101,9 +101,11 @@ def _register_library(self, graph: DependencyGraph, library: compute.Library) -> local_file.write_bytes(remote_file.read()) yield from graph.register_library(local_file.as_posix()) if library.whl: - # TODO: download the wheel somewhere local and add it to "virtual sys.path" via graph.path_lookup.push_path - # TODO: https://github.com/databrickslabs/ucx/issues/1640 - yield DependencyProblem("not-yet-implemented", "Wheel library is not yet implemented") + with self._ws.workspace.download(library.whl, format=ExportFormat.AUTO) as remote_file: + with tempfile.TemporaryDirectory() as directory: + local_file = Path(directory) / Path(library.whl).name + local_file.write_bytes(remote_file.read()) + yield from graph.register_library(local_file.as_posix()) if library.requirements: # https://pip.pypa.io/en/stable/reference/requirements-file-format/ logger.info(f"Registering libraries from {library.requirements}") with self._ws.workspace.download(library.requirements, format=ExportFormat.AUTO) as remote_file: diff --git a/tests/integration/source_code/test_jobs.py b/tests/integration/source_code/test_jobs.py index a5a4bd2312..39479bb2f5 100644 --- a/tests/integration/source_code/test_jobs.py +++ b/tests/integration/source_code/test_jobs.py @@ -4,15 +4,16 @@ from datetime import timedelta from io import StringIO from pathlib import Path +from unittest.mock import create_autospec +from databricks.labs.blueprint.tui import Prompts from databricks.sdk.errors import NotFound from databricks.sdk.retries import retried from databricks.sdk.service.compute import Library, PythonPyPiLibrary from databricks.sdk.service.workspace import ImportFormat -from databricks.labs.blueprint.tui import Prompts - from databricks.labs.ucx.hive_metastore.migration_status import MigrationIndex +from databricks.labs.ucx.source_code.known import UNKNOWN, Whitelist from databricks.labs.ucx.source_code.linters.files import LocalCodeLinter from databricks.labs.ucx.source_code.linters.context import LinterContext from databricks.labs.ucx.source_code.path_lookup import PathLookup @@ -21,7 +22,12 @@ @retried(on=[NotFound], timeout=timedelta(minutes=2)) -def test_running_real_workflow_linter_job(installation_ctx): +def test_running_real_workflow_linter_job(installation_ctx, make_notebook, make_directory, make_job): + # Deprecated file system path in call to: /mnt/things/e/f/g + lint_problem = b"display(spark.read.csv('/mnt/things/e/f/g'))" + notebook = make_notebook(path=f"{make_directory()}/notebook.ipynb", content=lint_problem) + make_job(notebook_path=notebook) + ctx = installation_ctx ctx.workspace_installation.run() ctx.deployed_workflows.run_workflow("experimental-workflow-linter") @@ -239,3 +245,56 @@ def test_workflow_linter_lints_job_with_egg_dependency( problems = simple_ctx.workflow_linter.lint_job(job_with_egg_dependency.job_id) assert len([problem for problem in problems if problem.message == expected_problem_message]) == 0 + + +def test_workflow_linter_lints_job_with_missing_library( + simple_ctx, + ws, + make_job, + make_notebook, + make_random, + make_directory, +): + expected_problem_message = "Could not locate import: databricks.labs.ucx" + whitelist = create_autospec(Whitelist) # databricks is in default list + whitelist.module_compatibility.return_value = UNKNOWN + + simple_ctx = simple_ctx.replace( + whitelist=whitelist, + path_lookup=PathLookup(Path("/non/existing/path"), []), # Avoid finding current project + ) + + notebook = make_notebook(path=f"{make_directory()}/notebook.ipynb", content=b"import databricks.labs.ucx") + job_without_ucx_library = make_job(notebook_path=notebook) + + problems = simple_ctx.workflow_linter.lint_job(job_without_ucx_library.job_id) + + assert len([problem for problem in problems if problem.message == expected_problem_message]) > 0 + whitelist.module_compatibility.assert_called_once_with("databricks.labs.ucx") + + +def test_workflow_linter_lints_job_with_wheel_dependency( + simple_ctx, + ws, + make_job, + make_notebook, + make_random, + make_directory, +): + expected_problem_message = "Could not locate import: databricks.labs.ucx" + + simple_ctx = simple_ctx.replace( + whitelist=Whitelist(), # databricks is in default list + path_lookup=PathLookup(Path("/non/existing/path"), []), # Avoid finding current project + ) + + simple_ctx.workspace_installation.run() # Creates ucx wheel + wheels = [file for file in simple_ctx.installation.files() if file.path.endswith(".whl")] + library = compute.Library(whl=wheels[0].path) + + notebook = make_notebook(path=f"{make_directory()}/notebook.ipynb", content=b"import databricks.labs.ucx") + job_with_ucx_library = make_job(notebook_path=notebook, libraries=[library]) + + problems = simple_ctx.workflow_linter.lint_job(job_with_ucx_library.job_id) + + assert len([problem for problem in problems if problem.message == expected_problem_message]) == 0 diff --git a/tests/unit/source_code/test_jobs.py b/tests/unit/source_code/test_jobs.py index f1fcd4726f..c97ecc5cec 100644 --- a/tests/unit/source_code/test_jobs.py +++ b/tests/unit/source_code/test_jobs.py @@ -1,5 +1,5 @@ -import logging import io +import logging from pathlib import Path from unittest.mock import create_autospec @@ -49,13 +49,13 @@ def graph(mock_path_lookup, dependency_resolver) -> DependencyGraph: def test_workflow_task_container_builds_dependency_graph_not_yet_implemented(mock_path_lookup, graph): # Goal of test is to raise test coverage, remove after implementing ws = create_autospec(WorkspaceClient) - library = compute.Library(jar="library.jar", whl="library.whl") + library = compute.Library(jar="library.jar") task = jobs.Task(task_key="test", libraries=[library], existing_cluster_id="id") workflow_task_container = WorkflowTaskContainer(ws, task) problems = workflow_task_container.build_dependency_graph(graph) - assert len(problems) == 2 + assert len(problems) == 1 assert all(problem.code == "not-yet-implemented" for problem in problems) ws.assert_not_called() @@ -99,6 +99,23 @@ def test_workflow_task_container_builds_dependency_graph_unknown_pypi_library(mo ws.assert_not_called() +def test_workflow_task_container_builds_dependency_graph_for_python_wheel(mock_path_lookup, graph): + ws = create_autospec(WorkspaceClient) + ws.workspace.download.return_value = io.BytesIO(b"test") + + libraries = [compute.Library(whl="test.whl")] + task = jobs.Task(task_key="test", libraries=libraries) + + workflow_task_container = WorkflowTaskContainer(ws, task) + problems = workflow_task_container.build_dependency_graph(graph) + + assert len(problems) == 1 + assert problems[0].code == "library-install-failed" + assert problems[0].message.startswith("Failed to install") + assert mock_path_lookup.resolve(Path("test")) is None + ws.assert_not_called() + + def test_workflow_linter_lint_job_logs_problems(dependency_resolver, mock_path_lookup, empty_index, caplog): expected_message = "Found job problems:\nUNKNOWN:-1 [library-install-failed] Failed to install unknown-library"