From a3cf816f4ac0c539f34605b656340231f3548233 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Thu, 8 Dec 2022 16:41:04 +0100 Subject: [PATCH 1/5] fix multinode cloud component --- src/lightning_app/components/multi_node/base.py | 2 +- .../utilities/packaging/cloud_compute.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/lightning_app/components/multi_node/base.py b/src/lightning_app/components/multi_node/base.py index ee4f2b3abd4fb..5662442b7375a 100644 --- a/src/lightning_app/components/multi_node/base.py +++ b/src/lightning_app/components/multi_node/base.py @@ -66,7 +66,7 @@ def run( *[ work_cls( *work_args, - cloud_compute=cloud_compute, + cloud_compute=cloud_compute.clone(), **work_kwargs, parallel=True, ) diff --git a/src/lightning_app/utilities/packaging/cloud_compute.py b/src/lightning_app/utilities/packaging/cloud_compute.py index f3b162ed042c6..39966bb8449e8 100644 --- a/src/lightning_app/utilities/packaging/cloud_compute.py +++ b/src/lightning_app/utilities/packaging/cloud_compute.py @@ -82,7 +82,7 @@ def __post_init__(self) -> None: # All `default` CloudCompute are identified in the same way. if self._internal_id is None: - self._internal_id = "default" if self.name == "default" else uuid4().hex[:7] + self._internal_id = self._generate_id() # Internal arguments for now. self.preemptible = False @@ -118,6 +118,15 @@ def id(self) -> Optional[str]: def is_default(self) -> bool: return self.name == "default" + def _generate_id(self): + return "default" if self.name == "default" else uuid4().hex[:7] + + def clone(self): + new_dict = self.to_dict() + new_dict["_internal_id"] = self._generate_id() + + return self.from_dict(new_dict) + def _verify_mount_root_dirs_are_unique(mounts: Union[None, Mount, List[Mount], Tuple[Mount]]) -> None: if isinstance(mounts, (list, tuple, set)): From b28fff5ca704ec6f7d6a8241aaf73caf12b785df Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Fri, 9 Dec 2022 11:29:57 +0100 Subject: [PATCH 2/5] add tests --- src/lightning_app/CHANGELOG.md | 2 ++ .../components/multi_node/test_base.py | 16 ++++++++++++++++ .../utilities/packaging/test_cloud_compute.py | 18 ++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index 889bf82b8e42a..2b5e6bbe7731e 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -77,6 +77,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed detection of a Lightning App running in debug mode ([#15951](https://github.com/Lightning-AI/lightning/pull/15951)) +- Fixed MultiNode Component to use separate cloud computes([#15965](https://github.com/Lightning-AI/lightning/pull/15965)) + ## [1.8.3] - 2022-11-22 diff --git a/tests/tests_app/components/multi_node/test_base.py b/tests/tests_app/components/multi_node/test_base.py index e23535fbfe970..6d957c786e4d2 100644 --- a/tests/tests_app/components/multi_node/test_base.py +++ b/tests/tests_app/components/multi_node/test_base.py @@ -1,8 +1,10 @@ from re import escape +from unittest import mock import pytest from tests_app.helpers.utils import no_warning_call +import lightning_app from lightning_app import CloudCompute, LightningWork from lightning_app.components import MultiNode @@ -17,3 +19,17 @@ def run(self): with no_warning_call(UserWarning, match=escape("You set MultiNode(num_nodes=1, ...)` but ")): MultiNode(Work, num_nodes=1, cloud_compute=CloudCompute("gpu")) + + +@mock.patch("lightning_app.components.multi_node.base.is_running_in_cloud", mock.Mock(return_value=True)) +def test_multi_node_separate_cloud_computes(): + class Work(LightningWork): + def run(self): + pass + + MultiNode(Work, num_nodes=2, cloud_compute=CloudCompute("gpu")) + + assert len(lightning_app.utilities.packaging.cloud_compute._CLOUD_COMPUTE_STORE) == 2 + for v in lightning_app.utilities.packaging.cloud_compute._CLOUD_COMPUTE_STORE.values(): + assert len(v.component_names) == 1 + assert v.component_names[0].startswith("root.ws.") and v.component_names[0][-1].isdigit() diff --git a/tests/tests_app/utilities/packaging/test_cloud_compute.py b/tests/tests_app/utilities/packaging/test_cloud_compute.py index aa0395aa5451a..f2670723f132a 100644 --- a/tests/tests_app/utilities/packaging/test_cloud_compute.py +++ b/tests/tests_app/utilities/packaging/test_cloud_compute.py @@ -41,3 +41,21 @@ def test_cloud_compute_with_non_unique_mount_root_dirs(): with pytest.raises(ValueError, match="Every Mount attached to a work must have a unique"): CloudCompute("gpu", mounts=[mount_1, mount_2]) + + +def test_cloud_compute_clone(): + c1 = CloudCompute("gpu") + c2 = c1.clone() + + assert isinstance(c2, CloudCompute) + + c1_dict = c1.to_dict() + c2_dict = c2.to_dict() + + assert len(c1_dict) == len(c2_dict) + + for k in c1_dict.keys(): + if k == "_internal_id": + assert c1_dict[k] != c2_dict[k] + else: + assert c1_dict[k] == c2_dict[k] From bf20ee6ed70b7a0ae2d6f4399ef9bb2843e70d41 Mon Sep 17 00:00:00 2001 From: Justus Schock Date: Fri, 9 Dec 2022 11:34:25 +0100 Subject: [PATCH 3/5] add tests --- tests/tests_app/components/multi_node/test_base.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/tests_app/components/multi_node/test_base.py b/tests/tests_app/components/multi_node/test_base.py index 6d957c786e4d2..2c6aed1120c0a 100644 --- a/tests/tests_app/components/multi_node/test_base.py +++ b/tests/tests_app/components/multi_node/test_base.py @@ -4,7 +4,6 @@ import pytest from tests_app.helpers.utils import no_warning_call -import lightning_app from lightning_app import CloudCompute, LightningWork from lightning_app.components import MultiNode @@ -27,9 +26,6 @@ class Work(LightningWork): def run(self): pass - MultiNode(Work, num_nodes=2, cloud_compute=CloudCompute("gpu")) + m = MultiNode(Work, num_nodes=2, cloud_compute=CloudCompute("gpu")) - assert len(lightning_app.utilities.packaging.cloud_compute._CLOUD_COMPUTE_STORE) == 2 - for v in lightning_app.utilities.packaging.cloud_compute._CLOUD_COMPUTE_STORE.values(): - assert len(v.component_names) == 1 - assert v.component_names[0].startswith("root.ws.") and v.component_names[0][-1].isdigit() + assert len({w.cloud_compute._internal_id for w in m.ws}) == len(m.ws) From 12b3c9d93facc61033249bdc8c5a4f6c50b60c63 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Fri, 9 Dec 2022 11:45:59 +0100 Subject: [PATCH 4/5] . --- src/lightning_app/utilities/packaging/cloud_compute.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lightning_app/utilities/packaging/cloud_compute.py b/src/lightning_app/utilities/packaging/cloud_compute.py index 39966bb8449e8..ca6c9705ae866 100644 --- a/src/lightning_app/utilities/packaging/cloud_compute.py +++ b/src/lightning_app/utilities/packaging/cloud_compute.py @@ -124,7 +124,6 @@ def _generate_id(self): def clone(self): new_dict = self.to_dict() new_dict["_internal_id"] = self._generate_id() - return self.from_dict(new_dict) From de4c9ce7e45dfd734a07331f8c9c0e91f8e47c44 Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+Borda@users.noreply.github.com> Date: Fri, 9 Dec 2022 12:02:35 +0100 Subject: [PATCH 5/5] . --- src/lightning_app/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index 9480f3681b5c2..6a46678721bd4 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -15,6 +15,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Changed the default port of `PythonServer` from `7777` to a free port at runtime ([#15966](https://github.com/Lightning-AI/lightning/pull/15966)) + - Remove the `AutoScaler` dependency `aiohttp` from the base requirements ([#15971](https://github.com/Lightning-AI/lightning/pull/15971)) @@ -32,6 +33,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed MultiNode Component to use separate cloud computes ([#15965](https://github.com/Lightning-AI/lightning/pull/15965)) + - Fixed `AutoScaler` failing due to port collision across works ([#15966](https://github.com/Lightning-AI/lightning/pull/15966))