From a67c202c4549193fd631728b5ee36a77d07548c8 Mon Sep 17 00:00:00 2001 From: Amin Movahed Date: Sat, 4 May 2024 20:22:39 +1000 Subject: [PATCH 1/7] Updating _build_wheel, upload_to_wsfs and upload_to_dbfs (dependecies download) Resolves #573 The output of the build wheel function will be a list of all the dependent wheel packages. Upload functions will get a new flag as input to either include the dependencies in the download list or just limit the download to the main wheel package. --- src/databricks/labs/blueprint/wheels.py | 31 ++++++++++++++++--------- tests/unit/test_wheels.py | 31 +++++++++++++++++++++---- 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/src/databricks/labs/blueprint/wheels.py b/src/databricks/labs/blueprint/wheels.py index 5bed53d..d9670e0 100644 --- a/src/databricks/labs/blueprint/wheels.py +++ b/src/databricks/labs/blueprint/wheels.py @@ -220,17 +220,26 @@ def __init__(self, installation: Installation, product_info: ProductInfo, *, ver self._product_info = product_info self._verbose = verbose - def upload_to_dbfs(self) -> str: + def upload_to_dbfs(self, force_dependencies: bool = False) -> str: """Uploads the wheel to DBFS location of installation and returns the remote path.""" - with self._local_wheel.open("rb") as f: - return self._installation.upload_dbfs(f"wheels/{self._local_wheel.name}", f) - - def upload_to_wsfs(self) -> str: + for wheel in self._local_wheel: + if force_dependencies or self._product_info.product_name() in wheel.name: + with wheel.open("rb") as f: + remote_wheel = self._installation.upload_dbfs(f"wheels/{wheel.name}", f) + if self._product_info.product_name() in wheel.name: + main_wheel_remote_path = remote_wheel + return main_wheel_remote_path + + def upload_to_wsfs(self, force_dependencies: bool = False) -> str: """Uploads the wheel to WSFS location of installation and returns the remote path.""" - with self._local_wheel.open("rb") as f: - remote_wheel = self._installation.upload(f"wheels/{self._local_wheel.name}", f.read()) - self._installation.save(Version(self._product_info.version(), remote_wheel, self._now_iso())) - return remote_wheel + for wheel in self._local_wheel: + if force_dependencies or self._product_info.product_name() in wheel.name: + with wheel.open("rb") as f: + remote_wheel = self._installation.upload(f"wheels/{wheel.name}", f.read()) + if self._product_info.product_name() in wheel.name: + self._installation.save(Version(self._product_info.version(), remote_wheel, self._now_iso())) + main_wheel_remote_path = remote_wheel + return main_wheel_remote_path @staticmethod def _now_iso(): @@ -268,13 +277,13 @@ def _build_wheel(self, tmp_dir: str, *, verbose: bool = False): self._override_version_to_unreleased(checkout_root) logger.debug(f"Building wheel for {checkout_root} in {tmp_dir}") subprocess.run( - [sys.executable, "-m", "pip", "wheel", "--no-deps", "--wheel-dir", tmp_dir, checkout_root.as_posix()], + [sys.executable, "-m", "pip", "wheel", "--wheel-dir", tmp_dir, checkout_root.as_posix()], check=True, stdout=stdout, stderr=stderr, ) # get wheel name as first file in the temp directory - return next(Path(tmp_dir).glob("*.whl")) + return list(Path(tmp_dir).glob("*.whl")) def _override_version_to_unreleased(self, tmp_dir_path: Path): """Overrides the version file to unreleased version.""" diff --git a/tests/unit/test_wheels.py b/tests/unit/test_wheels.py index cf5b4b0..eb7b439 100644 --- a/tests/unit/test_wheels.py +++ b/tests/unit/test_wheels.py @@ -21,9 +21,10 @@ def test_build_and_upload_wheel(): wheels = WheelsV2(installation, product_info) with wheels: - assert os.path.exists(wheels._local_wheel) + for wheel in wheels._local_wheel: + assert os.path.exists(wheel) - remote_on_wsfs = wheels.upload_to_wsfs() + remote_on_wsfs = wheels.upload_to_wsfs(force_dependencies=False) installation.assert_file_uploaded(re.compile("wheels/databricks_labs_blueprint-*")) installation.assert_file_written( "version.json", @@ -34,9 +35,31 @@ def test_build_and_upload_wheel(): }, ) - wheels.upload_to_dbfs() + wheels.upload_to_dbfs(force_dependencies=False) + installation.assert_file_dbfs_uploaded(re.compile("wheels/databricks_labs_blueprint-*")) + for wheel in wheels._local_wheel: + assert not os.path.exists(wheel) + + +def test_build_and_upload_wheel_with_no_dependency(): + installation = MockInstallation() + product_info = ProductInfo.from_class(MockInstallation) + + wheels = WheelsV2(installation, product_info) + with wheels: + remote_on_wsfs = wheels.upload_to_wsfs(force_dependencies=True) + installation.assert_file_uploaded(re.compile("wheels/databricks_labs_blueprint-*")) + installation.assert_file_written( + "version.json", + { + "version": product_info.version(), + "wheel": remote_on_wsfs, + "date": ..., + }, + ) + + wheels.upload_to_dbfs(force_dependencies=True) installation.assert_file_dbfs_uploaded(re.compile("wheels/databricks_labs_blueprint-*")) - assert not os.path.exists(wheels._local_wheel) def test_unreleased_version(tmp_path): From 9e6f5724d57b234e85be51ef717282e76a96bebf Mon Sep 17 00:00:00 2001 From: Amin Movahed Date: Wed, 8 May 2024 16:47:17 +1000 Subject: [PATCH 2/7] Updating _build_wheel, upload_to_wsfs and upload_to_dbfs (dependecies download) Resolves #573 --- src/databricks/labs/blueprint/wheels.py | 95 +++++++++++++++++-------- tests/unit/test_wheels.py | 33 ++------- 2 files changed, 72 insertions(+), 56 deletions(-) diff --git a/src/databricks/labs/blueprint/wheels.py b/src/databricks/labs/blueprint/wheels.py index d9670e0..a2abb01 100644 --- a/src/databricks/labs/blueprint/wheels.py +++ b/src/databricks/labs/blueprint/wheels.py @@ -215,31 +215,61 @@ class WheelsV2(AbstractContextManager): __version: str | None = None - def __init__(self, installation: Installation, product_info: ProductInfo, *, verbose: bool = False): + def __init__( + self, installation: Installation, product_info: ProductInfo, *, verbose: bool = False, no_deps: bool = True + ): self._installation = installation self._product_info = product_info self._verbose = verbose + self._no_deps = no_deps - def upload_to_dbfs(self, force_dependencies: bool = False) -> str: + def upload_to_dbfs(self) -> str: """Uploads the wheel to DBFS location of installation and returns the remote path.""" + main_wheel = [] for wheel in self._local_wheel: - if force_dependencies or self._product_info.product_name() in wheel.name: - with wheel.open("rb") as f: - remote_wheel = self._installation.upload_dbfs(f"wheels/{wheel.name}", f) - if self._product_info.product_name() in wheel.name: - main_wheel_remote_path = remote_wheel - return main_wheel_remote_path - - def upload_to_wsfs(self, force_dependencies: bool = False) -> str: + if self._product_info.product_name() in wheel.name: + main_wheel.append(wheel) + if len(main_wheel) == 1: + with main_wheel[0].open("rb") as f: + return self._installation.upload_dbfs(f"wheels/{main_wheel[0].name}", f) + raise ValueError( + f"Expected one wheel containing product name {self._product_info.product_name()}, got {main_wheel}" + ) + + def upload_to_wsfs(self) -> str: """Uploads the wheel to WSFS location of installation and returns the remote path.""" + # TODO: This function should be modified as there is no guarantee that the main wheel.name contains product name in it - anyway the input is a list of wheels + main_wheel = [] + for wheel in self._local_wheel: + if self._product_info.product_name() in wheel.name: + main_wheel.append(wheel) + if len(main_wheel) == 1: + remote_wheel = self._installation.upload(f"wheels/{main_wheel[0].name}", main_wheel[0].read_bytes()) + self._installation.save(Version(self._product_info.version(), remote_wheel, self._now_iso())) + return remote_wheel + raise ValueError( + f"Expected one wheel containing product name {self._product_info.product_name()}, got {main_wheel}" + ) + + def upload_upstream_dependencies_to_wsfs(self, prefixes: list[str] | None = None) -> list[str]: + """Uploads the wheel dependencies to WSFS location of installation and returns the remote paths. + :param prefixes (optional): A list of prefixes to match against the wheel names. If a prefix matches, the wheel is uploaded. If None, all wheels are uploaded. Defaults to None. + """ + if prefixes is None: + prefixes = [wheel.name for wheel in self._local_wheel] + remote_paths = [] for wheel in self._local_wheel: - if force_dependencies or self._product_info.product_name() in wheel.name: - with wheel.open("rb") as f: - remote_wheel = self._installation.upload(f"wheels/{wheel.name}", f.read()) - if self._product_info.product_name() in wheel.name: - self._installation.save(Version(self._product_info.version(), remote_wheel, self._now_iso())) - main_wheel_remote_path = remote_wheel - return main_wheel_remote_path + for prefix in prefixes: + # TODO: This function should be modified as there is no guarantee that the main wheel.name contains product name in it + # TODO: In the future the need of passing explicit prefixes should be removed + if ( + prefix in wheel.name + and wheel.name.endswith("-none-any.whl") + and self._product_info.product_name() not in wheel.name + ): + remote_wheel = self._installation.upload(f"wheels/{wheel.name}", wheel.read_bytes()) + remote_paths.append(remote_wheel) + return remote_paths @staticmethod def _now_iso(): @@ -249,20 +279,20 @@ def _now_iso(): def __enter__(self) -> "WheelsV2": """Builds the wheel and returns the instance. Use it as a context manager.""" self._tmp_dir = tempfile.TemporaryDirectory() - self._local_wheel = self._build_wheel(self._tmp_dir.name, verbose=self._verbose) + self._local_wheel = self._build_wheel(self._tmp_dir.name, verbose=self._verbose, no_deps=self._no_deps) return self def __exit__(self, __exc_type, __exc_value, __traceback): """Cleans up the temporary directory. Use it as a context manager.""" self._tmp_dir.cleanup() - def _build_wheel(self, tmp_dir: str, *, verbose: bool = False): + def _build_wheel(self, tmp_dir: str, *, verbose: bool = False, no_deps: bool = True) -> list[Path]: """Helper to build the wheel package :param tmp_dir: str: :param *: :param verbose: bool: (Default value = False) - + :param no_deps: bool: (Default value = True) """ stdout = subprocess.STDOUT stderr = subprocess.STDOUT @@ -275,14 +305,23 @@ def _build_wheel(self, tmp_dir: str, *, verbose: bool = False): checkout_root = self._copy_root_to(tmp_dir) # and override the version file self._override_version_to_unreleased(checkout_root) - logger.debug(f"Building wheel for {checkout_root} in {tmp_dir}") - subprocess.run( - [sys.executable, "-m", "pip", "wheel", "--wheel-dir", tmp_dir, checkout_root.as_posix()], - check=True, - stdout=stdout, - stderr=stderr, - ) - # get wheel name as first file in the temp directory + if no_deps: + logger.debug(f"Building wheel for {checkout_root} in {tmp_dir} without dependencies") + subprocess.run( + [sys.executable, "-m", "pip", "wheel", "--no-deps", "--wheel-dir", tmp_dir, checkout_root.as_posix()], + check=True, + stdout=stdout, + stderr=stderr, + ) + else: + logger.debug(f"Building wheel for {checkout_root} in {tmp_dir} with dependencies") + subprocess.run( + [sys.executable, "-m", "pip", "wheel", "--wheel-dir", tmp_dir, checkout_root.as_posix()], + check=True, + stdout=stdout, + stderr=stderr, + ) + # get all the wheel names in the temp directory return list(Path(tmp_dir).glob("*.whl")) def _override_version_to_unreleased(self, tmp_dir_path: Path): diff --git a/tests/unit/test_wheels.py b/tests/unit/test_wheels.py index eb7b439..f7c9251 100644 --- a/tests/unit/test_wheels.py +++ b/tests/unit/test_wheels.py @@ -15,16 +15,15 @@ ) -def test_build_and_upload_wheel(): +def test_build_and_upload_wheel(): # TODO: modify this test to consider different scenarios (change to this test is inevitable if we go ahead with _build_wheel -> list[Path]) installation = MockInstallation() product_info = ProductInfo.from_class(MockInstallation) wheels = WheelsV2(installation, product_info) with wheels: - for wheel in wheels._local_wheel: - assert os.path.exists(wheel) + assert os.path.exists(wheels._local_wheel[0]) - remote_on_wsfs = wheels.upload_to_wsfs(force_dependencies=False) + remote_on_wsfs = wheels.upload_to_wsfs() installation.assert_file_uploaded(re.compile("wheels/databricks_labs_blueprint-*")) installation.assert_file_written( "version.json", @@ -35,31 +34,9 @@ def test_build_and_upload_wheel(): }, ) - wheels.upload_to_dbfs(force_dependencies=False) - installation.assert_file_dbfs_uploaded(re.compile("wheels/databricks_labs_blueprint-*")) - for wheel in wheels._local_wheel: - assert not os.path.exists(wheel) - - -def test_build_and_upload_wheel_with_no_dependency(): - installation = MockInstallation() - product_info = ProductInfo.from_class(MockInstallation) - - wheels = WheelsV2(installation, product_info) - with wheels: - remote_on_wsfs = wheels.upload_to_wsfs(force_dependencies=True) - installation.assert_file_uploaded(re.compile("wheels/databricks_labs_blueprint-*")) - installation.assert_file_written( - "version.json", - { - "version": product_info.version(), - "wheel": remote_on_wsfs, - "date": ..., - }, - ) - - wheels.upload_to_dbfs(force_dependencies=True) + wheels.upload_to_dbfs() installation.assert_file_dbfs_uploaded(re.compile("wheels/databricks_labs_blueprint-*")) + assert not os.path.exists(wheels._local_wheel[0]) def test_unreleased_version(tmp_path): From b14c6ca80dcea39a6ab77c5fc2374b0a5affcc17 Mon Sep 17 00:00:00 2001 From: Amin Movahed Date: Fri, 10 May 2024 22:12:50 +1000 Subject: [PATCH 3/7] Updating _build_wheel, upload_to_wsfs and upload_to_dbfs (dependecies download) Resolves #573 --- src/databricks/labs/blueprint/wheels.py | 63 ++++++++----------------- tests/unit/test_wheels.py | 6 +-- 2 files changed, 22 insertions(+), 47 deletions(-) diff --git a/src/databricks/labs/blueprint/wheels.py b/src/databricks/labs/blueprint/wheels.py index a2abb01..a87a87f 100644 --- a/src/databricks/labs/blueprint/wheels.py +++ b/src/databricks/labs/blueprint/wheels.py @@ -215,43 +215,24 @@ class WheelsV2(AbstractContextManager): __version: str | None = None - def __init__( - self, installation: Installation, product_info: ProductInfo, *, verbose: bool = False, no_deps: bool = True - ): + def __init__(self, installation: Installation, product_info: ProductInfo, *, verbose: bool = False): self._installation = installation self._product_info = product_info self._verbose = verbose - self._no_deps = no_deps def upload_to_dbfs(self) -> str: """Uploads the wheel to DBFS location of installation and returns the remote path.""" - main_wheel = [] - for wheel in self._local_wheel: - if self._product_info.product_name() in wheel.name: - main_wheel.append(wheel) - if len(main_wheel) == 1: - with main_wheel[0].open("rb") as f: - return self._installation.upload_dbfs(f"wheels/{main_wheel[0].name}", f) - raise ValueError( - f"Expected one wheel containing product name {self._product_info.product_name()}, got {main_wheel}" - ) + with self._local_wheel.open("rb") as f: + return self._installation.upload_dbfs(f"wheels/{self._local_wheel.name}", f) def upload_to_wsfs(self) -> str: """Uploads the wheel to WSFS location of installation and returns the remote path.""" - # TODO: This function should be modified as there is no guarantee that the main wheel.name contains product name in it - anyway the input is a list of wheels - main_wheel = [] - for wheel in self._local_wheel: - if self._product_info.product_name() in wheel.name: - main_wheel.append(wheel) - if len(main_wheel) == 1: - remote_wheel = self._installation.upload(f"wheels/{main_wheel[0].name}", main_wheel[0].read_bytes()) + with self._local_wheel.open("rb") as f: + remote_wheel = self._installation.upload(f"wheels/{self._local_wheel.name}", f.read()) self._installation.save(Version(self._product_info.version(), remote_wheel, self._now_iso())) return remote_wheel - raise ValueError( - f"Expected one wheel containing product name {self._product_info.product_name()}, got {main_wheel}" - ) - def upload_upstream_dependencies_to_wsfs(self, prefixes: list[str] | None = None) -> list[str]: + def upload_wheel_dependencies(self, prefixes: list[str] | None = None) -> list[str]: """Uploads the wheel dependencies to WSFS location of installation and returns the remote paths. :param prefixes (optional): A list of prefixes to match against the wheel names. If a prefix matches, the wheel is uploaded. If None, all wheels are uploaded. Defaults to None. """ @@ -279,14 +260,14 @@ def _now_iso(): def __enter__(self) -> "WheelsV2": """Builds the wheel and returns the instance. Use it as a context manager.""" self._tmp_dir = tempfile.TemporaryDirectory() - self._local_wheel = self._build_wheel(self._tmp_dir.name, verbose=self._verbose, no_deps=self._no_deps) + self._local_wheel = next(self._build_wheel(self._tmp_dir.name, verbose=self._verbose, no_deps=True)) return self def __exit__(self, __exc_type, __exc_value, __traceback): """Cleans up the temporary directory. Use it as a context manager.""" self._tmp_dir.cleanup() - def _build_wheel(self, tmp_dir: str, *, verbose: bool = False, no_deps: bool = True) -> list[Path]: + def _build_wheel(self, tmp_dir: str, *, verbose: bool = False, no_deps: bool = True): """Helper to build the wheel package :param tmp_dir: str: @@ -305,24 +286,18 @@ def _build_wheel(self, tmp_dir: str, *, verbose: bool = False, no_deps: bool = T checkout_root = self._copy_root_to(tmp_dir) # and override the version file self._override_version_to_unreleased(checkout_root) + logger.debug(f"Building wheel for {checkout_root} in {tmp_dir}") + args = [sys.executable, "-m", "pip", "wheel", "--wheel-dir", tmp_dir, checkout_root.as_posix()] if no_deps: - logger.debug(f"Building wheel for {checkout_root} in {tmp_dir} without dependencies") - subprocess.run( - [sys.executable, "-m", "pip", "wheel", "--no-deps", "--wheel-dir", tmp_dir, checkout_root.as_posix()], - check=True, - stdout=stdout, - stderr=stderr, - ) - else: - logger.debug(f"Building wheel for {checkout_root} in {tmp_dir} with dependencies") - subprocess.run( - [sys.executable, "-m", "pip", "wheel", "--wheel-dir", tmp_dir, checkout_root.as_posix()], - check=True, - stdout=stdout, - stderr=stderr, - ) - # get all the wheel names in the temp directory - return list(Path(tmp_dir).glob("*.whl")) + logger.debug(f"Building wheel for {checkout_root} in {tmp_dir}") + args.append("--no-deps") + subprocess.run( + args, + check=True, + stdout=stdout, + stderr=stderr, + ) + return Path(tmp_dir).glob("*.whl") def _override_version_to_unreleased(self, tmp_dir_path: Path): """Overrides the version file to unreleased version.""" diff --git a/tests/unit/test_wheels.py b/tests/unit/test_wheels.py index f7c9251..cf5b4b0 100644 --- a/tests/unit/test_wheels.py +++ b/tests/unit/test_wheels.py @@ -15,13 +15,13 @@ ) -def test_build_and_upload_wheel(): # TODO: modify this test to consider different scenarios (change to this test is inevitable if we go ahead with _build_wheel -> list[Path]) +def test_build_and_upload_wheel(): installation = MockInstallation() product_info = ProductInfo.from_class(MockInstallation) wheels = WheelsV2(installation, product_info) with wheels: - assert os.path.exists(wheels._local_wheel[0]) + assert os.path.exists(wheels._local_wheel) remote_on_wsfs = wheels.upload_to_wsfs() installation.assert_file_uploaded(re.compile("wheels/databricks_labs_blueprint-*")) @@ -36,7 +36,7 @@ def test_build_and_upload_wheel(): # TODO: modify this test to consider differe wheels.upload_to_dbfs() installation.assert_file_dbfs_uploaded(re.compile("wheels/databricks_labs_blueprint-*")) - assert not os.path.exists(wheels._local_wheel[0]) + assert not os.path.exists(wheels._local_wheel) def test_unreleased_version(tmp_path): From 1808a90634d216ae82ca27e365d592a27957cda4 Mon Sep 17 00:00:00 2001 From: Amin Movahed Date: Sat, 11 May 2024 12:15:38 +1000 Subject: [PATCH 4/7] Updating _build_wheel, upload_to_wsfs and upload_to_dbfs (dependecies download) Resolves #573 --- src/databricks/labs/blueprint/wheels.py | 33 ++++++++++++++----------- tests/unit/test_wheels.py | 10 ++++++++ 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/src/databricks/labs/blueprint/wheels.py b/src/databricks/labs/blueprint/wheels.py index a87a87f..539bc8c 100644 --- a/src/databricks/labs/blueprint/wheels.py +++ b/src/databricks/labs/blueprint/wheels.py @@ -232,24 +232,26 @@ def upload_to_wsfs(self) -> str: self._installation.save(Version(self._product_info.version(), remote_wheel, self._now_iso())) return remote_wheel - def upload_wheel_dependencies(self, prefixes: list[str] | None = None) -> list[str]: + def upload_wheel_dependencies(self, prefixes: list[str]) -> list[str]: """Uploads the wheel dependencies to WSFS location of installation and returns the remote paths. - :param prefixes (optional): A list of prefixes to match against the wheel names. If a prefix matches, the wheel is uploaded. If None, all wheels are uploaded. Defaults to None. + :param prefixes : A list of prefixes to match against the wheel names. If a prefix matches, the wheel is uploaded. """ - if prefixes is None: - prefixes = [wheel.name for wheel in self._local_wheel] + # _tmp_dir_dependencies needs to be created here as _tmp_dir already exists and causes error when running _build_wheel + _tmp_dir_dependencies = tempfile.TemporaryDirectory() remote_paths = [] - for wheel in self._local_wheel: + for wheel in list(self._build_wheel(_tmp_dir_dependencies.name, verbose=self._verbose, no_deps=False)): + if not wheel.name.endswith("-none-any.whl"): + continue + # main wheel is uploaded with upload_to_wsfs() method. + if wheel.name == self._local_wheel.name: + continue for prefix in prefixes: - # TODO: This function should be modified as there is no guarantee that the main wheel.name contains product name in it - # TODO: In the future the need of passing explicit prefixes should be removed - if ( - prefix in wheel.name - and wheel.name.endswith("-none-any.whl") - and self._product_info.product_name() not in wheel.name - ): - remote_wheel = self._installation.upload(f"wheels/{wheel.name}", wheel.read_bytes()) - remote_paths.append(remote_wheel) + if not wheel.name.startswith(prefix): + continue + remote_wheel = self._installation.upload(f"wheels/{wheel.name}", wheel.read_bytes()) + remote_paths.append(remote_wheel) + # cleanup the temporary directory + _tmp_dir_dependencies.cleanup() return remote_paths @staticmethod @@ -286,11 +288,12 @@ def _build_wheel(self, tmp_dir: str, *, verbose: bool = False, no_deps: bool = T checkout_root = self._copy_root_to(tmp_dir) # and override the version file self._override_version_to_unreleased(checkout_root) - logger.debug(f"Building wheel for {checkout_root} in {tmp_dir}") args = [sys.executable, "-m", "pip", "wheel", "--wheel-dir", tmp_dir, checkout_root.as_posix()] if no_deps: logger.debug(f"Building wheel for {checkout_root} in {tmp_dir}") args.append("--no-deps") + else: + logger.debug(f"Building dependencies for {checkout_root} in {tmp_dir}") subprocess.run( args, check=True, diff --git a/tests/unit/test_wheels.py b/tests/unit/test_wheels.py index cf5b4b0..59cf4a5 100644 --- a/tests/unit/test_wheels.py +++ b/tests/unit/test_wheels.py @@ -38,6 +38,16 @@ def test_build_and_upload_wheel(): installation.assert_file_dbfs_uploaded(re.compile("wheels/databricks_labs_blueprint-*")) assert not os.path.exists(wheels._local_wheel) +def test_build_and_dependencies_upload_wheel(): + installation = MockInstallation() + product_info = ProductInfo.from_class(MockInstallation) + + wheels = WheelsV2(installation, product_info) + with wheels: + wheel_paths = wheels.upload_wheel_dependencies(["databricks_sdk"]) + assert len(wheel_paths) == 1 + installation.assert_file_uploaded(re.compile("wheels/databricks_sdk-*")) + def test_unreleased_version(tmp_path): if not is_in_debug(): From 543070438e4a3265a90fac680c48386c4f449b8b Mon Sep 17 00:00:00 2001 From: Amin Movahed Date: Sat, 11 May 2024 21:46:40 +1000 Subject: [PATCH 5/7] Updating _copy_root_to to get dirs_exist_ok: bool = False --- src/databricks/labs/blueprint/wheels.py | 17 ++++++++--------- tests/unit/test_wheels.py | 1 + 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/databricks/labs/blueprint/wheels.py b/src/databricks/labs/blueprint/wheels.py index 539bc8c..60fb66d 100644 --- a/src/databricks/labs/blueprint/wheels.py +++ b/src/databricks/labs/blueprint/wheels.py @@ -236,10 +236,10 @@ def upload_wheel_dependencies(self, prefixes: list[str]) -> list[str]: """Uploads the wheel dependencies to WSFS location of installation and returns the remote paths. :param prefixes : A list of prefixes to match against the wheel names. If a prefix matches, the wheel is uploaded. """ - # _tmp_dir_dependencies needs to be created here as _tmp_dir already exists and causes error when running _build_wheel - _tmp_dir_dependencies = tempfile.TemporaryDirectory() remote_paths = [] - for wheel in list(self._build_wheel(_tmp_dir_dependencies.name, verbose=self._verbose, no_deps=False)): + for wheel in list( + self._build_wheel(self._tmp_dir.name, verbose=self._verbose, no_deps=False, dirs_exist_ok=True) + ): if not wheel.name.endswith("-none-any.whl"): continue # main wheel is uploaded with upload_to_wsfs() method. @@ -250,8 +250,6 @@ def upload_wheel_dependencies(self, prefixes: list[str]) -> list[str]: continue remote_wheel = self._installation.upload(f"wheels/{wheel.name}", wheel.read_bytes()) remote_paths.append(remote_wheel) - # cleanup the temporary directory - _tmp_dir_dependencies.cleanup() return remote_paths @staticmethod @@ -269,13 +267,14 @@ def __exit__(self, __exc_type, __exc_value, __traceback): """Cleans up the temporary directory. Use it as a context manager.""" self._tmp_dir.cleanup() - def _build_wheel(self, tmp_dir: str, *, verbose: bool = False, no_deps: bool = True): + def _build_wheel(self, tmp_dir: str, *, verbose: bool = False, no_deps: bool = True, dirs_exist_ok: bool = False): """Helper to build the wheel package :param tmp_dir: str: :param *: :param verbose: bool: (Default value = False) :param no_deps: bool: (Default value = True) + :param dirs_exist_ok: bool: (Default value = False) """ stdout = subprocess.STDOUT stderr = subprocess.STDOUT @@ -285,7 +284,7 @@ def _build_wheel(self, tmp_dir: str, *, verbose: bool = False, no_deps: bool = T checkout_root = self._product_info.checkout_root() if self._product_info.is_git_checkout() and self._product_info.is_unreleased_version(): # working copy becomes project root for building a wheel - checkout_root = self._copy_root_to(tmp_dir) + checkout_root = self._copy_root_to(tmp_dir, dirs_exist_ok) # and override the version file self._override_version_to_unreleased(checkout_root) args = [sys.executable, "-m", "pip", "wheel", "--wheel-dir", tmp_dir, checkout_root.as_posix()] @@ -310,7 +309,7 @@ def _override_version_to_unreleased(self, tmp_dir_path: Path): with version_file.open("w") as f: f.write(f'__version__ = "{self._product_info.version()}"') - def _copy_root_to(self, tmp_dir: str | Path): + def _copy_root_to(self, tmp_dir: str | Path, dirs_exist_ok: bool = False): """Copies the root to a temporary directory.""" checkout_root = self._product_info.checkout_root() tmp_dir_path = Path(tmp_dir) / "working-copy" @@ -325,7 +324,7 @@ def copy_ignore(_, names: list[str]): ignored_names.append(name) return ignored_names - shutil.copytree(checkout_root, tmp_dir_path, ignore=copy_ignore) + shutil.copytree(checkout_root, tmp_dir_path, ignore=copy_ignore, dirs_exist_ok=dirs_exist_ok) return tmp_dir_path diff --git a/tests/unit/test_wheels.py b/tests/unit/test_wheels.py index 59cf4a5..5ba6db6 100644 --- a/tests/unit/test_wheels.py +++ b/tests/unit/test_wheels.py @@ -38,6 +38,7 @@ def test_build_and_upload_wheel(): installation.assert_file_dbfs_uploaded(re.compile("wheels/databricks_labs_blueprint-*")) assert not os.path.exists(wheels._local_wheel) + def test_build_and_dependencies_upload_wheel(): installation = MockInstallation() product_info = ProductInfo.from_class(MockInstallation) From aa6e22f66d4527fb5ae41e93fff9c26909b2e3a1 Mon Sep 17 00:00:00 2001 From: Amin Movahed Date: Sun, 12 May 2024 17:16:41 +1000 Subject: [PATCH 6/7] Incorporating the comments --- README.md | 27 +++++++++++++++++++++++++ src/databricks/labs/blueprint/wheels.py | 8 ++------ tests/integration/test_wheels.py | 3 +++ 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 83ac7d3..240ac33 100644 --- a/README.md +++ b/README.md @@ -933,6 +933,33 @@ This will print something like: You can also do `wheels.upload_to_dbfs()`, though you're not able to set any access control over it. +### Publishing the dependencies of the wheel to Databricks Workspace + +When you build a wheel, it may have dependencies that are not included in the wheel itself. These dependencies are usually other Python packages that your wheel relies on. To ensure that your wheel works correctly when it is installed, these dependencies also need to be available in the same environment. + +While in many instances, the required packages are readily available in Databricks Runtime (DBR) or PyPi, there may be situations where specific dependencies need to be manually uploaded to the Databricks Workspace. This ensures that all necessary packages are present, allowing your wheel to operate correctly in its intended environment. + +The `upload_wheel_dependencies(prefixes)` method can be used to upload these dependencies to Databricks Workspace. This method takes a list of prefixes as an argument. It will upload all the dependencies of the wheel that have names starting with any of the provided prefixes. + +Here is an example of how you can use this method: + +```python +from databricks.sdk import WorkspaceClient +from databricks.labs.blueprint.wheels import ProductInfo + +ws = WorkspaceClient() +product_info = ProductInfo(__file__) +installation = product_info.current_installation(ws) + +with product_info.wheels(ws) as wheels: + wheel_paths = wheels.upload_wheel_dependencies(['databricks_sdk', 'pandas']) + for path in wheel_paths: + print(f'Uploaded dependency to {path}') +``` + +In this example, the `upload_wheel_dependencies(['databricks_sdk', 'pandas'])` call will upload all the dependencies of the wheel that have names starting with 'databricks_sdk' or 'pandas'. This method excludes any platform specific dependencies (i.e. ending with `-none-any.whl`). Also the main wheel file is not uploaded. The method returns a list of paths to the uploaded dependencies on WorkspaceFS. + + [[back to top](#databricks-labs-blueprint)] ## Databricks CLI's `databricks labs ...` Router diff --git a/src/databricks/labs/blueprint/wheels.py b/src/databricks/labs/blueprint/wheels.py index 60fb66d..46beb98 100644 --- a/src/databricks/labs/blueprint/wheels.py +++ b/src/databricks/labs/blueprint/wheels.py @@ -237,9 +237,7 @@ def upload_wheel_dependencies(self, prefixes: list[str]) -> list[str]: :param prefixes : A list of prefixes to match against the wheel names. If a prefix matches, the wheel is uploaded. """ remote_paths = [] - for wheel in list( - self._build_wheel(self._tmp_dir.name, verbose=self._verbose, no_deps=False, dirs_exist_ok=True) - ): + for wheel in self._build_wheel(self._tmp_dir.name, verbose=self._verbose, no_deps=False, dirs_exist_ok=True): if not wheel.name.endswith("-none-any.whl"): continue # main wheel is uploaded with upload_to_wsfs() method. @@ -288,11 +286,9 @@ def _build_wheel(self, tmp_dir: str, *, verbose: bool = False, no_deps: bool = T # and override the version file self._override_version_to_unreleased(checkout_root) args = [sys.executable, "-m", "pip", "wheel", "--wheel-dir", tmp_dir, checkout_root.as_posix()] + logger.debug(f"Building wheel for {checkout_root} in {tmp_dir}") if no_deps: - logger.debug(f"Building wheel for {checkout_root} in {tmp_dir}") args.append("--no-deps") - else: - logger.debug(f"Building dependencies for {checkout_root} in {tmp_dir}") subprocess.run( args, check=True, diff --git a/tests/integration/test_wheels.py b/tests/integration/test_wheels.py index 518ce68..aa59da5 100644 --- a/tests/integration/test_wheels.py +++ b/tests/integration/test_wheels.py @@ -17,3 +17,6 @@ def test_upload_dbfs(ws, new_installation): with WheelsV2(new_installation, product_info) as whl: remote_wheel = whl.upload_to_dbfs() ws.dbfs.get_status(remote_wheel) + + +# TODO: to add an integration test for upload_wheel_dependencies (currently getting an access issue to the test environment) From 84450668088e1e994db8ebeca0bb15effb81cdde Mon Sep 17 00:00:00 2001 From: Serge Smertin <259697+nfx@users.noreply.github.com> Date: Sun, 12 May 2024 12:05:50 +0200 Subject: [PATCH 7/7] fixed documentation --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 240ac33..bc70ac2 100644 --- a/README.md +++ b/README.md @@ -933,11 +933,11 @@ This will print something like: You can also do `wheels.upload_to_dbfs()`, though you're not able to set any access control over it. -### Publishing the dependencies of the wheel to Databricks Workspace +### Publishing upstream dependencies to Databricks Workspace without Public Internet access -When you build a wheel, it may have dependencies that are not included in the wheel itself. These dependencies are usually other Python packages that your wheel relies on. To ensure that your wheel works correctly when it is installed, these dependencies also need to be available in the same environment. +Python wheel may have dependencies that are not included in the wheel itself. These dependencies are usually other Python packages that your wheel relies on. During installation on regular Databricks Workspaces, these dependencies get automatically fetched from [Python Package Index](https://pypi.org/). -While in many instances, the required packages are readily available in Databricks Runtime (DBR) or PyPi, there may be situations where specific dependencies need to be manually uploaded to the Databricks Workspace. This ensures that all necessary packages are present, allowing your wheel to operate correctly in its intended environment. +Some Databricks Workspaces are configured with extra layers of network security, that block all access to Public Internet, including [Python Package Index](https://pypi.org/). To ensure installations working on these kinds of workspaces, developers need to explicitly upload all upstream dependencies for their applications to work correctly. The `upload_wheel_dependencies(prefixes)` method can be used to upload these dependencies to Databricks Workspace. This method takes a list of prefixes as an argument. It will upload all the dependencies of the wheel that have names starting with any of the provided prefixes.