From b14c6ca80dcea39a6ab77c5fc2374b0a5affcc17 Mon Sep 17 00:00:00 2001 From: Amin Movahed Date: Fri, 10 May 2024 22:12:50 +1000 Subject: [PATCH] Updating _build_wheel, upload_to_wsfs and upload_to_dbfs (dependecies download) Resolves #573 --- src/databricks/labs/blueprint/wheels.py | 63 ++++++++----------------- tests/unit/test_wheels.py | 6 +-- 2 files changed, 22 insertions(+), 47 deletions(-) diff --git a/src/databricks/labs/blueprint/wheels.py b/src/databricks/labs/blueprint/wheels.py index a2abb01..a87a87f 100644 --- a/src/databricks/labs/blueprint/wheels.py +++ b/src/databricks/labs/blueprint/wheels.py @@ -215,43 +215,24 @@ class WheelsV2(AbstractContextManager): __version: str | None = None - def __init__( - self, installation: Installation, product_info: ProductInfo, *, verbose: bool = False, no_deps: bool = True - ): + def __init__(self, installation: Installation, product_info: ProductInfo, *, verbose: bool = False): self._installation = installation self._product_info = product_info self._verbose = verbose - self._no_deps = no_deps def upload_to_dbfs(self) -> str: """Uploads the wheel to DBFS location of installation and returns the remote path.""" - main_wheel = [] - for wheel in self._local_wheel: - if self._product_info.product_name() in wheel.name: - main_wheel.append(wheel) - if len(main_wheel) == 1: - with main_wheel[0].open("rb") as f: - return self._installation.upload_dbfs(f"wheels/{main_wheel[0].name}", f) - raise ValueError( - f"Expected one wheel containing product name {self._product_info.product_name()}, got {main_wheel}" - ) + with self._local_wheel.open("rb") as f: + return self._installation.upload_dbfs(f"wheels/{self._local_wheel.name}", f) def upload_to_wsfs(self) -> str: """Uploads the wheel to WSFS location of installation and returns the remote path.""" - # TODO: This function should be modified as there is no guarantee that the main wheel.name contains product name in it - anyway the input is a list of wheels - main_wheel = [] - for wheel in self._local_wheel: - if self._product_info.product_name() in wheel.name: - main_wheel.append(wheel) - if len(main_wheel) == 1: - remote_wheel = self._installation.upload(f"wheels/{main_wheel[0].name}", main_wheel[0].read_bytes()) + with self._local_wheel.open("rb") as f: + remote_wheel = self._installation.upload(f"wheels/{self._local_wheel.name}", f.read()) self._installation.save(Version(self._product_info.version(), remote_wheel, self._now_iso())) return remote_wheel - raise ValueError( - f"Expected one wheel containing product name {self._product_info.product_name()}, got {main_wheel}" - ) - def upload_upstream_dependencies_to_wsfs(self, prefixes: list[str] | None = None) -> list[str]: + def upload_wheel_dependencies(self, prefixes: list[str] | None = None) -> list[str]: """Uploads the wheel dependencies to WSFS location of installation and returns the remote paths. :param prefixes (optional): A list of prefixes to match against the wheel names. If a prefix matches, the wheel is uploaded. If None, all wheels are uploaded. Defaults to None. """ @@ -279,14 +260,14 @@ def _now_iso(): def __enter__(self) -> "WheelsV2": """Builds the wheel and returns the instance. Use it as a context manager.""" self._tmp_dir = tempfile.TemporaryDirectory() - self._local_wheel = self._build_wheel(self._tmp_dir.name, verbose=self._verbose, no_deps=self._no_deps) + self._local_wheel = next(self._build_wheel(self._tmp_dir.name, verbose=self._verbose, no_deps=True)) return self def __exit__(self, __exc_type, __exc_value, __traceback): """Cleans up the temporary directory. Use it as a context manager.""" self._tmp_dir.cleanup() - def _build_wheel(self, tmp_dir: str, *, verbose: bool = False, no_deps: bool = True) -> list[Path]: + def _build_wheel(self, tmp_dir: str, *, verbose: bool = False, no_deps: bool = True): """Helper to build the wheel package :param tmp_dir: str: @@ -305,24 +286,18 @@ def _build_wheel(self, tmp_dir: str, *, verbose: bool = False, no_deps: bool = T checkout_root = self._copy_root_to(tmp_dir) # and override the version file self._override_version_to_unreleased(checkout_root) + logger.debug(f"Building wheel for {checkout_root} in {tmp_dir}") + args = [sys.executable, "-m", "pip", "wheel", "--wheel-dir", tmp_dir, checkout_root.as_posix()] if no_deps: - logger.debug(f"Building wheel for {checkout_root} in {tmp_dir} without dependencies") - subprocess.run( - [sys.executable, "-m", "pip", "wheel", "--no-deps", "--wheel-dir", tmp_dir, checkout_root.as_posix()], - check=True, - stdout=stdout, - stderr=stderr, - ) - else: - logger.debug(f"Building wheel for {checkout_root} in {tmp_dir} with dependencies") - subprocess.run( - [sys.executable, "-m", "pip", "wheel", "--wheel-dir", tmp_dir, checkout_root.as_posix()], - check=True, - stdout=stdout, - stderr=stderr, - ) - # get all the wheel names in the temp directory - return list(Path(tmp_dir).glob("*.whl")) + logger.debug(f"Building wheel for {checkout_root} in {tmp_dir}") + args.append("--no-deps") + subprocess.run( + args, + check=True, + stdout=stdout, + stderr=stderr, + ) + return Path(tmp_dir).glob("*.whl") def _override_version_to_unreleased(self, tmp_dir_path: Path): """Overrides the version file to unreleased version.""" diff --git a/tests/unit/test_wheels.py b/tests/unit/test_wheels.py index f7c9251..cf5b4b0 100644 --- a/tests/unit/test_wheels.py +++ b/tests/unit/test_wheels.py @@ -15,13 +15,13 @@ ) -def test_build_and_upload_wheel(): # TODO: modify this test to consider different scenarios (change to this test is inevitable if we go ahead with _build_wheel -> list[Path]) +def test_build_and_upload_wheel(): installation = MockInstallation() product_info = ProductInfo.from_class(MockInstallation) wheels = WheelsV2(installation, product_info) with wheels: - assert os.path.exists(wheels._local_wheel[0]) + assert os.path.exists(wheels._local_wheel) remote_on_wsfs = wheels.upload_to_wsfs() installation.assert_file_uploaded(re.compile("wheels/databricks_labs_blueprint-*")) @@ -36,7 +36,7 @@ def test_build_and_upload_wheel(): # TODO: modify this test to consider differe wheels.upload_to_dbfs() installation.assert_file_dbfs_uploaded(re.compile("wheels/databricks_labs_blueprint-*")) - assert not os.path.exists(wheels._local_wheel[0]) + assert not os.path.exists(wheels._local_wheel) def test_unreleased_version(tmp_path):