From fceac35c2c9539da8db0eb5469394c92ad8823c2 Mon Sep 17 00:00:00 2001 From: Luca Antiga Date: Thu, 8 Dec 2022 21:02:31 +0100 Subject: [PATCH 01/11] Apply dynamo to training_step, validation_step, test_step, predict_step (#15957) * Apply dynamo to training_step, validation_step, test_step, predict_step * Add entry to CHANGELOG.md (cherry picked from commit edc998608464f27be8b9c05385cd464d2f0fc73e) --- src/pytorch_lightning/CHANGELOG.md | 5 ++++- src/pytorch_lightning/core/module.py | 12 +++++++++++ .../core/test_lightning_module.py | 21 +++++++++++++++++-- tests/tests_pytorch/trainer/test_trainer.py | 3 +-- 4 files changed, 36 insertions(+), 5 deletions(-) diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index 3df4b4a7dcb83..9f544fbe835e8 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -8,7 +8,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Changed -- Direct support for compiled models ([#15922](https://github.com/Lightning-AI/lightning/pull/15922)) +- Direct support for compiled models ( + [#15922](https://github.com/Lightning-AI/lightning/pull/15922), + [15957](https://github.com/Lightning-AI/lightning/pull/15957) +) ### Fixed diff --git a/src/pytorch_lightning/core/module.py b/src/pytorch_lightning/core/module.py index 48cce4ccbe971..c24235f47d113 100644 --- a/src/pytorch_lightning/core/module.py +++ b/src/pytorch_lightning/core/module.py @@ -1976,9 +1976,17 @@ def from_compiled(cls, model: "torch._dynamo.OptimizedModule") -> "pl.LightningM "compiler": "dynamo", "dynamo_ctx": model.dynamo_ctx, "original_forward": orig_module.forward, + "original_training_step": orig_module.training_step, + "original_validation_step": orig_module.validation_step, + "original_test_step": orig_module.test_step, + "original_predict_step": orig_module.predict_step, } orig_module.forward = model.dynamo_ctx(orig_module.forward) # type: ignore[assignment] + orig_module.training_step = model.dynamo_ctx(orig_module.training_step) # type: ignore[assignment] + orig_module.validation_step = model.dynamo_ctx(orig_module.validation_step) # type: ignore[assignment] + orig_module.test_step = model.dynamo_ctx(orig_module.test_step) # type: ignore[assignment] + orig_module.predict_step = model.dynamo_ctx(orig_module.predict_step) # type: ignore[assignment] return orig_module @classmethod @@ -2007,6 +2015,10 @@ def to_uncompiled(cls, model: Union["pl.LightningModule", "torch._dynamo.Optimiz raise ValueError("`model` must either be an instance of torch._dynamo.OptimizedModule or LightningModule") model.forward = model._compiler_ctx["original_forward"] # type: ignore[assignment] + model.training_step = model._compiler_ctx["original_training_step"] # type: ignore[assignment] + model.validation_step = model._compiler_ctx["original_validation_step"] # type: ignore[assignment] + model.test_step = model._compiler_ctx["original_test_step"] # type: ignore[assignment] + model.predict_step = model._compiler_ctx["original_predict_step"] # type: ignore[assignment] model._compiler_ctx = None return model diff --git a/tests/tests_pytorch/core/test_lightning_module.py b/tests/tests_pytorch/core/test_lightning_module.py index ba8419a904b3a..1fc00f277a526 100644 --- a/tests/tests_pytorch/core/test_lightning_module.py +++ b/tests/tests_pytorch/core/test_lightning_module.py @@ -21,7 +21,7 @@ from torch.optim import Adam, SGD from pytorch_lightning import LightningModule, Trainer -from pytorch_lightning.demos.boring_classes import BoringModel, DemoModel +from pytorch_lightning.demos.boring_classes import BoringModel from pytorch_lightning.loggers import TensorBoardLogger from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_11, _TORCH_GREATER_EQUAL_1_13 @@ -457,15 +457,32 @@ def test_trainer_reference_recursively(): @RunIf(min_torch="1.14.0.dev20221202") def test_compile_uncompile(): - lit_model = DemoModel() + lit_model = BoringModel() model_compiled = torch.compile(lit_model) lit_model_compiled = LightningModule.from_compiled(model_compiled) + def has_dynamo(fn): + return any(el for el in dir(fn) if el.startswith("_torchdynamo")) + assert isinstance(lit_model_compiled, LightningModule) assert lit_model_compiled._compiler_ctx is not None + assert has_dynamo(lit_model_compiled.forward) + assert has_dynamo(lit_model_compiled.training_step) + assert has_dynamo(lit_model_compiled.validation_step) + assert has_dynamo(lit_model_compiled.test_step) + assert has_dynamo(lit_model_compiled.predict_step) lit_model_orig = LightningModule.to_uncompiled(lit_model) assert lit_model_orig._compiler_ctx is None assert lit_model_orig.forward == lit_model.forward + assert lit_model_orig.training_step == lit_model.training_step + assert lit_model_orig.validation_step == lit_model.validation_step + assert lit_model_orig.test_step == lit_model.test_step + assert lit_model_orig.predict_step == lit_model.predict_step + assert not has_dynamo(lit_model_orig.forward) + assert not has_dynamo(lit_model_orig.training_step) + assert not has_dynamo(lit_model_orig.validation_step) + assert not has_dynamo(lit_model_orig.test_step) + assert not has_dynamo(lit_model_orig.predict_step) diff --git a/tests/tests_pytorch/trainer/test_trainer.py b/tests/tests_pytorch/trainer/test_trainer.py index 066172af11de3..74ea2ac11a701 100644 --- a/tests/tests_pytorch/trainer/test_trainer.py +++ b/tests/tests_pytorch/trainer/test_trainer.py @@ -45,7 +45,6 @@ from pytorch_lightning.demos.boring_classes import ( BoringDataModule, BoringModel, - DemoModel, RandomDataset, RandomIterableDataset, RandomIterableDatasetWithLen, @@ -2244,7 +2243,7 @@ def on_fit_start(self): # TODO: replace with 1.14 when it is released @RunIf(min_torch="1.14.0.dev20221202") def test_trainer_compiled_model(): - model = DemoModel() + model = BoringModel() model = torch.compile(model) From 81ff9387ef979088d93f9c81e112f68656ea5a12 Mon Sep 17 00:00:00 2001 From: thomas chaton Date: Fri, 9 Dec 2022 10:27:46 +0000 Subject: [PATCH 02/11] [App] Resolve run installation (#15974) (cherry picked from commit dd83587102bf9650babf2af407af79d623934a84) --- src/lightning_app/cli/cmd_install.py | 15 +++++++++------ tests/tests_app/cli/test_cmd_install.py | 12 ++++++++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/lightning_app/cli/cmd_install.py b/src/lightning_app/cli/cmd_install.py index 579a921179b4c..56c3d07b3d37f 100644 --- a/src/lightning_app/cli/cmd_install.py +++ b/src/lightning_app/cli/cmd_install.py @@ -101,7 +101,7 @@ def gallery_apps_and_components( except Exception: return None - entry, kind = _resolve_entry(app_or_component, version_arg) + entry, kind = _resolve_entry(name, version_arg) if kind == "app": # give the user the chance to do a manual install @@ -111,16 +111,19 @@ def gallery_apps_and_components( # run installation if requested _install_app_from_source(source_url, git_url, folder_name, cwd=cwd, overwrite=overwrite, git_sha=git_sha) - return os.path.join(os.getcwd(), folder_name, entry["appEntrypointFile"]) + return os.path.join(os.getcwd(), *entry["appEntrypointFile"].split("/")) elif kind == "component": # give the user the chance to do a manual install - git_url = _show_install_component_prompt(entry, app_or_component, org, yes_arg) - + source_url, git_url, folder_name, git_sha = _show_install_app_prompt( + entry, app_or_component, org, yes_arg, resource_type="component" + ) + if "@" in git_url: + git_url = git_url.split("git+")[1].split("@")[0] # run installation if requested - _install_component_from_source(git_url) + _install_app_from_source(source_url, git_url, folder_name, cwd=cwd, overwrite=overwrite, git_sha=git_sha) - return os.path.join(os.getcwd(), entry["appEntrypointFile"]) + return os.path.join(os.getcwd(), *entry["entrypointFile"].split("/")) return None diff --git a/tests/tests_app/cli/test_cmd_install.py b/tests/tests_app/cli/test_cmd_install.py index 2e2086348cb58..c11dd5fdd38c0 100644 --- a/tests/tests_app/cli/test_cmd_install.py +++ b/tests/tests_app/cli/test_cmd_install.py @@ -321,6 +321,18 @@ def test_install_app_shows_error(tmpdir): # os.chdir(cwd) +def test_app_and_component_gallery_app(monkeypatch): + monkeypatch.setattr(cmd_install, "_install_app_from_source", mock.MagicMock()) + path = cmd_install.gallery_apps_and_components("lightning/lightning-diffusion-component-api", True, "latest") + assert path == os.path.join(os.getcwd(), "diffusion2", "app.py") + + +def test_app_and_component_gallery_component(monkeypatch): + monkeypatch.setattr(cmd_install, "_install_app_from_source", mock.MagicMock()) + path = cmd_install.gallery_apps_and_components("lightning/lit-jupyter", True, "latest") + assert path == os.path.join(os.getcwd(), "app.py") + + @mock.patch.dict(os.environ, {"LIGHTNING_APP_REGISTRY": "https://TODO/other_non_PL_registry"}) def test_private_app_registry(): registry = cmd_install._resolve_app_registry() From 8038c7a47e076116f34eea124f5a3b0e5a0b5029 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Fri, 9 Dec 2022 11:42:52 +0100 Subject: [PATCH 03/11] App: Move AutoScaler dependency to extra requirements (#15971) * Make autoscaler dependency optional * update chglog * dont directly import aiohttp (cherry picked from commit 346e93665102f4faebce388a8e556f9394b7804f) # Conflicts: # requirements/app/base.txt # src/lightning_app/CHANGELOG.md --- requirements/app/base.txt | 2 -- requirements/app/cloud.txt | 2 ++ requirements/app/components.txt | 1 + src/lightning_app/CHANGELOG.md | 5 ++++- src/lightning_app/components/auto_scaler.py | 8 ++++++-- src/lightning_app/utilities/imports.py | 4 ++++ 6 files changed, 17 insertions(+), 5 deletions(-) diff --git a/requirements/app/base.txt b/requirements/app/base.txt index b3200940e4fe2..9590f2c1a2fbd 100644 --- a/requirements/app/base.txt +++ b/requirements/app/base.txt @@ -12,5 +12,3 @@ beautifulsoup4>=4.8.0, <4.11.2 inquirer>=2.10.0 psutil<5.9.4 click<=8.1.3 -s3fs>=2022.5.0, <2022.8.3 -aiohttp>=3.8.0, <=3.8.3 diff --git a/requirements/app/cloud.txt b/requirements/app/cloud.txt index 314676d5db5a7..512cacf130e1d 100644 --- a/requirements/app/cloud.txt +++ b/requirements/app/cloud.txt @@ -3,3 +3,5 @@ redis>=4.0.1, <=4.2.4 docker>=5.0.0, <=5.0.3 # setuptools==59.5.0 +s3fs>=2022.5.0, <2022.8.3 +aiohttp>=3.8.0, <=3.8.3 diff --git a/requirements/app/components.txt b/requirements/app/components.txt index 38180a480a59b..dd2cadfc1c17e 100644 --- a/requirements/app/components.txt +++ b/requirements/app/components.txt @@ -1,2 +1,3 @@ # deps required by components in the lightning app repository (src/lightning_app/components) lightning_api_access>=0.0.3 +aiohttp>=3.8.0, <=3.8.3 diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index 9bae487d2ad88..76211a4d2dc03 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -13,7 +13,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Added the CLI command `lightning delete app` to delete a lightning app on the cloud ([#15783](https://github.com/Lightning-AI/lightning/pull/15783)) - Added a CloudMultiProcessBackend which enables running a child App from within the Flow in the cloud ([#15800](https://github.com/Lightning-AI/lightning/pull/15800)) - Utility for pickling work object safely even from a child process ([#15836](https://github.com/Lightning-AI/lightning/pull/15836)) -- Added `AutoScaler` component ([#15769](https://github.com/Lightning-AI/lightning/pull/15769)) +- Added `AutoScaler` component ( + [#15769](https://github.com/Lightning-AI/lightning/pull/15769), + [#15971](https://github.com/Lightning-AI/lightning/pull/15971) +) - Added the property `ready` of the LightningFlow to inform when the `Open App` should be visible ([#15921](https://github.com/Lightning-AI/lightning/pull/15921)) - Added private work attributed `_start_method` to customize how to start the works ([#15923](https://github.com/Lightning-AI/lightning/pull/15923)) - Added a `configure_layout` method to the `LightningWork` which can be used to control how the work is handled in the layout of a parent flow ([#15926](https://github.com/Lightning-AI/lightning/pull/15926)) diff --git a/src/lightning_app/components/auto_scaler.py b/src/lightning_app/components/auto_scaler.py index 62e6180c49665..629e771c50600 100644 --- a/src/lightning_app/components/auto_scaler.py +++ b/src/lightning_app/components/auto_scaler.py @@ -8,8 +8,6 @@ from itertools import cycle from typing import Any, Dict, List, Tuple, Type -import aiohttp -import aiohttp.client_exceptions import requests import uvicorn from fastapi import Depends, FastAPI, HTTPException, Request @@ -22,8 +20,13 @@ from lightning_app.core.flow import LightningFlow from lightning_app.core.work import LightningWork from lightning_app.utilities.app_helpers import Logger +from lightning_app.utilities.imports import _is_aiohttp_available, requires from lightning_app.utilities.packaging.cloud_compute import CloudCompute +if _is_aiohttp_available(): + import aiohttp + import aiohttp.client_exceptions + logger = Logger(__name__) @@ -114,6 +117,7 @@ class _LoadBalancer(LightningWork): \**kwargs: Arguments passed to :func:`LightningWork.init` like ``CloudCompute``, ``BuildConfig``, etc. """ + @requires(["aiohttp"]) def __init__( self, input_type: BaseModel, diff --git a/src/lightning_app/utilities/imports.py b/src/lightning_app/utilities/imports.py index b484110d3811e..19978fcf5d137 100644 --- a/src/lightning_app/utilities/imports.py +++ b/src/lightning_app/utilities/imports.py @@ -141,4 +141,8 @@ def _is_sqlmodel_available() -> bool: return module_available("sqlmodel") +def _is_aiohttp_available() -> bool: + return module_available("aiohttp") + + _CLOUD_TEST_RUN = bool(os.getenv("CLOUD", False)) From e718b8258fd6ed7f60e8671d78842bb3cba0d625 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Fri, 9 Dec 2022 11:56:26 +0100 Subject: [PATCH 04/11] Avoid using the same port number for autoscaler works (#15966) * dont hardcode port in python server * add another chglog (cherry picked from commit a72d268a51534331d388b0dbd16a716c5805af0f) --- examples/app_server_with_auto_scaler/app.py | 21 +++++++++++-------- src/lightning_app/CHANGELOG.md | 3 ++- src/lightning_app/components/auto_scaler.py | 3 ++- .../components/serve/python_server.py | 6 +----- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/examples/app_server_with_auto_scaler/app.py b/examples/app_server_with_auto_scaler/app.py index b713bd6d1dcfc..70799827776a8 100644 --- a/examples/app_server_with_auto_scaler/app.py +++ b/examples/app_server_with_auto_scaler/app.py @@ -1,3 +1,4 @@ +# ! pip install torch torchvision from typing import Any, List import torch @@ -22,10 +23,10 @@ class BatchResponse(BaseModel): class PyTorchServer(L.app.components.PythonServer): def __init__(self, *args, **kwargs): super().__init__( - port=L.app.utilities.network.find_free_network_port(), input_type=BatchRequestModel, output_type=BatchResponse, - cloud_compute=L.CloudCompute("gpu"), + *args, + **kwargs, ) def setup(self): @@ -57,16 +58,14 @@ def scale(self, replicas: int, metrics: dict) -> int: """The default scaling logic that users can override.""" # scale out if the number of pending requests exceeds max batch size. max_requests_per_work = self.max_batch_size - pending_requests_per_running_or_pending_work = metrics["pending_requests"] / ( - replicas + metrics["pending_works"] - ) - if pending_requests_per_running_or_pending_work >= max_requests_per_work: + pending_requests_per_work = metrics["pending_requests"] / (replicas + metrics["pending_works"]) + if pending_requests_per_work >= max_requests_per_work: return replicas + 1 # scale in if the number of pending requests is below 25% of max_requests_per_work min_requests_per_work = max_requests_per_work * 0.25 - pending_requests_per_running_work = metrics["pending_requests"] / replicas - if pending_requests_per_running_work < min_requests_per_work: + pending_requests_per_work = metrics["pending_requests"] / replicas + if pending_requests_per_work < min_requests_per_work: return replicas - 1 return replicas @@ -74,13 +73,17 @@ def scale(self, replicas: int, metrics: dict) -> int: app = L.LightningApp( MyAutoScaler( + # work class and args PyTorchServer, - min_replicas=2, + cloud_compute=L.CloudCompute("gpu"), + # autoscaler specific args + min_replicas=1, max_replicas=4, autoscale_interval=10, endpoint="predict", input_type=RequestModel, output_type=Any, timeout_batching=1, + max_batch_size=8, ) ) diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index 76211a4d2dc03..c63af4ac40f0c 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -15,7 +15,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Utility for pickling work object safely even from a child process ([#15836](https://github.com/Lightning-AI/lightning/pull/15836)) - Added `AutoScaler` component ( [#15769](https://github.com/Lightning-AI/lightning/pull/15769), - [#15971](https://github.com/Lightning-AI/lightning/pull/15971) + [#15971](https://github.com/Lightning-AI/lightning/pull/15971), + [#15966](https://github.com/Lightning-AI/lightning/pull/15966) ) - Added the property `ready` of the LightningFlow to inform when the `Open App` should be visible ([#15921](https://github.com/Lightning-AI/lightning/pull/15921)) - Added private work attributed `_start_method` to customize how to start the works ([#15923](https://github.com/Lightning-AI/lightning/pull/15923)) diff --git a/src/lightning_app/components/auto_scaler.py b/src/lightning_app/components/auto_scaler.py index 629e771c50600..fc6a1a873769b 100644 --- a/src/lightning_app/components/auto_scaler.py +++ b/src/lightning_app/components/auto_scaler.py @@ -450,7 +450,8 @@ def workers(self) -> List[LightningWork]: def create_work(self) -> LightningWork: """Replicates a LightningWork instance with args and kwargs provided via ``__init__``.""" # TODO: Remove `start_with_flow=False` for faster initialization on the cloud - return self._work_cls(*self._work_args, **self._work_kwargs, start_with_flow=False) + self._work_kwargs.update(dict(start_with_flow=False)) + return self._work_cls(*self._work_args, **self._work_kwargs) def add_work(self, work) -> str: """Adds a new LightningWork instance. diff --git a/src/lightning_app/components/serve/python_server.py b/src/lightning_app/components/serve/python_server.py index 1868b0b357fd3..c522a25eb3f3d 100644 --- a/src/lightning_app/components/serve/python_server.py +++ b/src/lightning_app/components/serve/python_server.py @@ -75,8 +75,6 @@ class PythonServer(LightningWork, abc.ABC): @requires(["torch", "lightning_api_access"]) def __init__( # type: ignore self, - host: str = "127.0.0.1", - port: int = 7777, input_type: type = _DefaultInputData, output_type: type = _DefaultOutputData, **kwargs, @@ -84,8 +82,6 @@ def __init__( # type: ignore """The PythonServer Class enables to easily get your machine learning server up and running. Arguments: - host: Address to be used for running the server. - port: Port to be used to running the server. input_type: Optional `input_type` to be provided. This needs to be a pydantic BaseModel class. The default data type is good enough for the basic usecases and it expects the data to be a json object that has one key called `payload` @@ -129,7 +125,7 @@ def predict(self, request): ... >>> app = LightningApp(SimpleServer()) """ - super().__init__(parallel=True, host=host, port=port, **kwargs) + super().__init__(parallel=True, **kwargs) if not issubclass(input_type, BaseModel): raise TypeError("input_type must be a pydantic BaseModel class") if not issubclass(output_type, BaseModel): From d1509ad6d955303e62e6a8fcca22f5791ed1f243 Mon Sep 17 00:00:00 2001 From: Liyang90 Date: Fri, 9 Dec 2022 12:31:45 +0100 Subject: [PATCH 05/11] Fix `action_name` usage in `XLAProfiler` (#15886) * Fix `action_name` usage in `XLAProfiler` * add changelog * Update src/pytorch_ligh * Update xla.py Co-authored-by: awaelchli Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit c748f828e5758465b295dc9aace208368f88c44c) --- src/pytorch_lightning/CHANGELOG.md | 1 + src/pytorch_lightning/profilers/xla.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index 9f544fbe835e8..a8d91c1ae4a55 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed LRScheduler import for PyTorch 2.0 ([#15940](https://github.com/Lightning-AI/lightning/pull/15940)) - Fixed `fit_loop.restarting` to be `False` for lr finder ([#15620](https://github.com/Lightning-AI/lightning/pull/15620)) - Fixed `torch.jit.script`-ing a LightningModule causing an unintended error message about deprecated `use_amp` property ([#15947](https://github.com/Lightning-AI/lightning/pull/15947)) +- Fixed the `XLAProfiler` not recording anything due to mismatching of action names ([#15885](https://github.com/Lightning-AI/lightning/pull/15885)) ## [1.8.3] - 2022-11-22 diff --git a/src/pytorch_lightning/profilers/xla.py b/src/pytorch_lightning/profilers/xla.py index ef103a9a45842..4bfefbc0bacbb 100644 --- a/src/pytorch_lightning/profilers/xla.py +++ b/src/pytorch_lightning/profilers/xla.py @@ -50,12 +50,14 @@ def __init__(self, port: int = 9012) -> None: def start(self, action_name: str) -> None: import torch_xla.debug.profiler as xp - if action_name in self.RECORD_FUNCTIONS: + # The action name is formatted as '[TYPE]{class name}.{hook name}' + # Example: [LightningModule]BoringModel.training_step + if action_name.split(".")[-1] in self.RECORD_FUNCTIONS: if not self._start_trace: self.server = xp.start_server(self.port) self._start_trace = True - if action_name in self.STEP_FUNCTIONS: + if action_name.split(".")[-1] in self.STEP_FUNCTIONS: step = self._get_step_num(action_name) recording = xp.StepTrace(action_name, step_num=step) else: From b9bb24a086cc109ebcaf34fb7224d0f3927ed61d Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Fri, 9 Dec 2022 13:02:58 +0100 Subject: [PATCH 06/11] Fix multinode cloud component (#15965) * fix multinode cloud component * add tests (cherry picked from commit d21b8992eead8f544a41792e4ef40a2710423a62) --- src/lightning_app/CHANGELOG.md | 2 ++ .../components/multi_node/base.py | 2 +- .../utilities/packaging/cloud_compute.py | 10 +++++++++- .../components/multi_node/test_base.py | 12 ++++++++++++ .../utilities/packaging/test_cloud_compute.py | 18 ++++++++++++++++++ 5 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index c63af4ac40f0c..a25c8ecd9fe39 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -54,6 +54,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed multiprocessing breakpoint ([#15950](https://github.com/Lightning-AI/lightning/pull/15950)) - Fixed detection of a Lightning App running in debug mode ([#15951](https://github.com/Lightning-AI/lightning/pull/15951)) - Fixed `ImportError` on Multinode if package not present ([#15963](https://github.com/Lightning-AI/lightning/pull/15963)) +- Fixed MultiNode Component to use separate cloud computes ([#15965](https://github.com/Lightning-AI/lightning/pull/15965)) + ## [1.8.3] - 2022-11-22 diff --git a/src/lightning_app/components/multi_node/base.py b/src/lightning_app/components/multi_node/base.py index ee4f2b3abd4fb..5662442b7375a 100644 --- a/src/lightning_app/components/multi_node/base.py +++ b/src/lightning_app/components/multi_node/base.py @@ -66,7 +66,7 @@ def run( *[ work_cls( *work_args, - cloud_compute=cloud_compute, + cloud_compute=cloud_compute.clone(), **work_kwargs, parallel=True, ) diff --git a/src/lightning_app/utilities/packaging/cloud_compute.py b/src/lightning_app/utilities/packaging/cloud_compute.py index f3b162ed042c6..ca6c9705ae866 100644 --- a/src/lightning_app/utilities/packaging/cloud_compute.py +++ b/src/lightning_app/utilities/packaging/cloud_compute.py @@ -82,7 +82,7 @@ def __post_init__(self) -> None: # All `default` CloudCompute are identified in the same way. if self._internal_id is None: - self._internal_id = "default" if self.name == "default" else uuid4().hex[:7] + self._internal_id = self._generate_id() # Internal arguments for now. self.preemptible = False @@ -118,6 +118,14 @@ def id(self) -> Optional[str]: def is_default(self) -> bool: return self.name == "default" + def _generate_id(self): + return "default" if self.name == "default" else uuid4().hex[:7] + + def clone(self): + new_dict = self.to_dict() + new_dict["_internal_id"] = self._generate_id() + return self.from_dict(new_dict) + def _verify_mount_root_dirs_are_unique(mounts: Union[None, Mount, List[Mount], Tuple[Mount]]) -> None: if isinstance(mounts, (list, tuple, set)): diff --git a/tests/tests_app/components/multi_node/test_base.py b/tests/tests_app/components/multi_node/test_base.py index e23535fbfe970..2c6aed1120c0a 100644 --- a/tests/tests_app/components/multi_node/test_base.py +++ b/tests/tests_app/components/multi_node/test_base.py @@ -1,4 +1,5 @@ from re import escape +from unittest import mock import pytest from tests_app.helpers.utils import no_warning_call @@ -17,3 +18,14 @@ def run(self): with no_warning_call(UserWarning, match=escape("You set MultiNode(num_nodes=1, ...)` but ")): MultiNode(Work, num_nodes=1, cloud_compute=CloudCompute("gpu")) + + +@mock.patch("lightning_app.components.multi_node.base.is_running_in_cloud", mock.Mock(return_value=True)) +def test_multi_node_separate_cloud_computes(): + class Work(LightningWork): + def run(self): + pass + + m = MultiNode(Work, num_nodes=2, cloud_compute=CloudCompute("gpu")) + + assert len({w.cloud_compute._internal_id for w in m.ws}) == len(m.ws) diff --git a/tests/tests_app/utilities/packaging/test_cloud_compute.py b/tests/tests_app/utilities/packaging/test_cloud_compute.py index aa0395aa5451a..f2670723f132a 100644 --- a/tests/tests_app/utilities/packaging/test_cloud_compute.py +++ b/tests/tests_app/utilities/packaging/test_cloud_compute.py @@ -41,3 +41,21 @@ def test_cloud_compute_with_non_unique_mount_root_dirs(): with pytest.raises(ValueError, match="Every Mount attached to a work must have a unique"): CloudCompute("gpu", mounts=[mount_1, mount_2]) + + +def test_cloud_compute_clone(): + c1 = CloudCompute("gpu") + c2 = c1.clone() + + assert isinstance(c2, CloudCompute) + + c1_dict = c1.to_dict() + c2_dict = c2.to_dict() + + assert len(c1_dict) == len(c2_dict) + + for k in c1_dict.keys(): + if k == "_internal_id": + assert c1_dict[k] != c2_dict[k] + else: + assert c1_dict[k] == c2_dict[k] From 843786f2b7a942648e0ee07cc9836485dfab5cfc Mon Sep 17 00:00:00 2001 From: Jirka Borovec <6035284+borda@users.noreply.github.com> Date: Fri, 9 Dec 2022 13:19:32 +0100 Subject: [PATCH 07/11] ci: update signaling (#15981) * ci: update signaling * config (cherry picked from commit e56e7f11b0e337c5b8c110887f5fb351697666ca) --- .github/workflows/release-pypi.yml | 36 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml index 5e500670865de..1d9d0a2eeda25 100644 --- a/.github/workflows/release-pypi.yml +++ b/.github/workflows/release-pypi.yml @@ -12,6 +12,7 @@ defaults: shell: bash jobs: + init: runs-on: ubuntu-20.04 steps: @@ -23,6 +24,7 @@ jobs: name: dist-packages-${{ github.sha }} path: dist + build-packages: needs: init runs-on: ubuntu-20.04 @@ -40,22 +42,20 @@ jobs: - uses: actions/setup-python@v4 with: python-version: 3.9 - - name: Install dependencies run: pip install -U setuptools wheel - - name: Build packages env: PACKAGE_NAME: ${{ matrix.pkg-name }} run: | python setup.py sdist bdist_wheel ls -lh dist/ - - uses: actions/upload-artifact@v3 with: name: dist-packages-${{ github.sha }} path: dist + upload-packages: runs-on: ubuntu-20.04 needs: build-packages @@ -73,6 +73,7 @@ jobs: files: 'dist/*' repo-token: ${{ secrets.GITHUB_TOKEN }} + release-version: runs-on: ubuntu-20.04 outputs: @@ -87,6 +88,7 @@ jobs: id: lai-package run: python -c "import lightning as L; print(f'version={L.__version__}')" >> $GITHUB_OUTPUT + signaling: runs-on: ubuntu-20.04 needs: [release-version] @@ -100,12 +102,6 @@ jobs: with: repository: gridai/base-images token: ${{ secrets.PAT_GHOST }} - ref: main - - uses: fregante/setup-git-token@v1 - with: - token: ${{ secrets.PAT_GHOST }} - name: PL Ghost - email: pl-github@grid.ai - name: Update lightning version run: | import json, os @@ -115,20 +111,21 @@ jobs: with open("versions.json", "w") as fw: json.dump(vers, fw) shell: python - - name: GIT Commit + - run: cat versions.json + - name: GIT commit & push + env: + BRANCH_NAME: "trigger/lightning-${{ env.TAG }}" run: | + git config --global user.name "PL Ghost" + git config --global user.email pl-github@grid.ai + git checkout -b ${BRANCH_NAME} git add versions.json - git commit -m "bumping lightning version -> ${TAG}" - cat versions.json - - name: GIT Push - run: | git status - # force push is not very nice - # but so far the push is rejected even with exception for this user - git push -f + git commit -m "bumping lightning version -> ${TAG}" + git push -u origin ${BRANCH_NAME} -f + waiting: - # TODO: replace with back signal from build images/ loop checking for a specific branch? runs-on: ubuntu-20.04 needs: [release-version, signaling] env: @@ -152,6 +149,7 @@ jobs: time.sleep(60) shell: python + pre-publish-packages: runs-on: ubuntu-20.04 needs: build-packages @@ -181,6 +179,7 @@ jobs: pkg-pattern: "*" pypi-test-token: ${{ secrets.PYPI_TEST_TOKEN_LAI }} + publish-packages: runs-on: ubuntu-20.04 needs: [build-packages, waiting] @@ -210,6 +209,7 @@ jobs: pkg-pattern: "*" pypi-token: ${{ secrets.PYPI_TOKEN_LAI }} + legacy-checkpoints: needs: [build-packages] uses: ./.github/workflows/legacy-checkpoints.yml From ad1de34c588309f4070eba82db814493ecebab3a Mon Sep 17 00:00:00 2001 From: Justus Schock <12886177+justusschock@users.noreply.github.com> Date: Fri, 9 Dec 2022 13:46:49 +0100 Subject: [PATCH 08/11] Fix cloudcomputes registration for structures (#15964) * fix cloudcomputes * updates cloudcompute registration * changelog (cherry picked from commit 90a4c0289d6b404939a4cf59fe6e722b97abe441) --- src/lightning_app/CHANGELOG.md | 1 + src/lightning_app/core/flow.py | 19 +++++++++----- tests/tests_app/core/test_lightning_flow.py | 29 ++++++++++++++++++++- 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index a25c8ecd9fe39..c171e1b2df26e 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -55,6 +55,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed detection of a Lightning App running in debug mode ([#15951](https://github.com/Lightning-AI/lightning/pull/15951)) - Fixed `ImportError` on Multinode if package not present ([#15963](https://github.com/Lightning-AI/lightning/pull/15963)) - Fixed MultiNode Component to use separate cloud computes ([#15965](https://github.com/Lightning-AI/lightning/pull/15965)) +- Fixed Registration for CloudComputes of Works in `L.app.structures` ([#15964](https://github.com/Lightning-AI/lightning/pull/15964)) ## [1.8.3] - 2022-11-22 diff --git a/src/lightning_app/core/flow.py b/src/lightning_app/core/flow.py index a79794bac3d20..5a82400066f05 100644 --- a/src/lightning_app/core/flow.py +++ b/src/lightning_app/core/flow.py @@ -173,12 +173,19 @@ def __setattr__(self, name: str, value: Any) -> None: elif isinstance(value, (Dict, List)): self._structures.add(name) _set_child_name(self, value, name) - if getattr(self, "_backend", None) is not None: - value._backend = self._backend - for flow in value.flows: - LightningFlow._attach_backend(flow, self._backend) - for work in value.works: - self._backend._wrap_run_method(_LightningAppRef().get_current(), work) + + _backend = getattr(self, "backend", None) + if _backend is not None: + value._backend = _backend + + for flow in value.flows: + if _backend is not None: + LightningFlow._attach_backend(flow, _backend) + + for work in value.works: + work._register_cloud_compute() + if _backend is not None: + _backend._wrap_run_method(_LightningAppRef().get_current(), work) elif isinstance(value, Path): # In the init context, the full name of the Flow and Work is not known, i.e., we can't serialize diff --git a/tests/tests_app/core/test_lightning_flow.py b/tests/tests_app/core/test_lightning_flow.py index dacccfb3873aa..c8e9921f29eec 100644 --- a/tests/tests_app/core/test_lightning_flow.py +++ b/tests/tests_app/core/test_lightning_flow.py @@ -10,7 +10,8 @@ import pytest from deepdiff import DeepDiff, Delta -from lightning_app import LightningApp +import lightning_app +from lightning_app import CloudCompute, LightningApp from lightning_app.core.flow import LightningFlow from lightning_app.core.work import LightningWork from lightning_app.runners import MultiProcessRuntime @@ -901,3 +902,29 @@ def run_patch(method): state = app.api_publish_state_queue.put._mock_call_args[0][0] call_hash = state["works"]["w"]["calls"]["latest_call_hash"] assert state["works"]["w"]["calls"][call_hash]["statuses"][0]["stage"] == "succeeded" + + +def test_structures_register_work_cloudcompute(): + class MyDummyWork(LightningWork): + def run(self): + return + + class MyDummyFlow(LightningFlow): + def __init__(self): + super().__init__() + self.w_list = LList(*[MyDummyWork(cloud_compute=CloudCompute("gpu")) for i in range(5)]) + self.w_dict = LDict(**{str(i): MyDummyWork(cloud_compute=CloudCompute("gpu")) for i in range(5)}) + + def run(self): + for w in self.w_list: + w.run() + + for w in self.w_dict.values(): + w.run() + + MyDummyFlow() + assert len(lightning_app.utilities.packaging.cloud_compute._CLOUD_COMPUTE_STORE) == 10 + for v in lightning_app.utilities.packaging.cloud_compute._CLOUD_COMPUTE_STORE.values(): + assert len(v.component_names) == 1 + assert v.component_names[0][:-1] in ("root.w_list.", "root.w_dict.") + assert v.component_names[0][-1].isdigit() From 5f862fd764b1bf0f9783b2d33dd9e7993ee7947d Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Fri, 9 Dec 2022 23:13:45 +0900 Subject: [PATCH 09/11] Document running dev lightning on the cloud (#15962) * document running dev lightning on the cloud * document running dev lightning on the cloud * Update .github/CONTRIBUTING.md Co-authored-by: Noha Alon * document running dev lightning on the cloud * git clone & pip install -e * Update .github/CONTRIBUTING.md Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> Co-authored-by: Noha Alon Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit cfd00d3e47853ff45d3259056f2bbc42b9b764cb) --- .github/CONTRIBUTING.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index a1edacea7c104..42bda00d58a3e 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -309,6 +309,20 @@ and the last true master commit is `ccc111` and your first commit is `mmm222`. git push -f ``` +#### How to run an app on the cloud with a local version of lightning + +The lightning cloud uses the latest release by default. However, you might want to run your app with some local changes you've made to the lightning framework. To use your local version of lightning on the cloud, set the following environment variable: + +```bash +git clone https://github.com/Lightning-AI/lightning.git +cd lightning +pip install -e . +export PACKAGE_LIGHTNING=1 # <- this is the magic to use your version (not mainstream PyPI)! +lightning run app app.py --cloud +``` + +By seting `PACKAGE_LIGHTNING=1`, lightning packages the lightning source code in your local directory in addition to your app source code and uploads them to the cloud. + ### Bonus Workflow Tip If you don't want to remember all the commands above every time you want to push some code/setup a Lightning Dev environment on a new VM, you can set up bash aliases for some common commands. You can add these to one of your `~/.bashrc`, `~/.zshrc`, or `~/.bash_aliases` files. From 74d796bb2c12a5312bc4b1cc44b76b8806313436 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Fri, 9 Dec 2022 15:47:36 +0100 Subject: [PATCH 10/11] [App] Install exact version whn upgrading and not when testing (#15984) * [App] Install exact version whn upgrading and not when testing * Update CHANGELOG.md Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> (cherry picked from commit 1657ea8a39c3795e1276c491eff4f13b06a06e61) --- src/lightning_app/CHANGELOG.md | 1 + src/lightning_app/cli/lightning_cli.py | 5 +++-- src/lightning_app/testing/testing.py | 1 + src/lightning_app/utilities/cli_helpers.py | 4 ++-- tests/tests_app/utilities/test_cli_helpers.py | 2 +- 5 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index c171e1b2df26e..b61d45b79f3e2 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -56,6 +56,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `ImportError` on Multinode if package not present ([#15963](https://github.com/Lightning-AI/lightning/pull/15963)) - Fixed MultiNode Component to use separate cloud computes ([#15965](https://github.com/Lightning-AI/lightning/pull/15965)) - Fixed Registration for CloudComputes of Works in `L.app.structures` ([#15964](https://github.com/Lightning-AI/lightning/pull/15964)) +- Fixed a bug where auto-upgrading to the latest lightning via the CLI could get stuck in a loop ([#15984](https://github.com/Lightning-AI/lightning/pull/15984)) ## [1.8.3] - 2022-11-22 diff --git a/src/lightning_app/cli/lightning_cli.py b/src/lightning_app/cli/lightning_cli.py index 68027e7784f0b..4696745ada95f 100644 --- a/src/lightning_app/cli/lightning_cli.py +++ b/src/lightning_app/cli/lightning_cli.py @@ -48,8 +48,9 @@ def main() -> None: - # Check environment and versions if not in the cloud - if "LIGHTNING_APP_STATE_URL" not in os.environ: + # Check environment and versions if not in the cloud and not testing + is_testing = bool(int(os.getenv("LIGHTING_TESTING", "0"))) + if not is_testing and "LIGHTNING_APP_STATE_URL" not in os.environ: # Enforce running in PATH Python _check_environment_and_redirect() diff --git a/src/lightning_app/testing/testing.py b/src/lightning_app/testing/testing.py index 8d112d7fa4a7a..40b705458dd49 100644 --- a/src/lightning_app/testing/testing.py +++ b/src/lightning_app/testing/testing.py @@ -262,6 +262,7 @@ def run_app_in_cloud( with tempfile.TemporaryDirectory() as tmpdir: env_copy = os.environ.copy() env_copy["PACKAGE_LIGHTNING"] = "1" + env_copy["LIGHTING_TESTING"] = "1" if debug: env_copy["LIGHTNING_DEBUG"] = "1" shutil.copytree(app_folder, tmpdir, dirs_exist_ok=True) diff --git a/src/lightning_app/utilities/cli_helpers.py b/src/lightning_app/utilities/cli_helpers.py index 293944ca82c50..caa414e163ffc 100644 --- a/src/lightning_app/utilities/cli_helpers.py +++ b/src/lightning_app/utilities/cli_helpers.py @@ -254,7 +254,7 @@ def _get_newer_version() -> Optional[str]: return None if __version__ == latest_version else latest_version except Exception: # Return None if any exception occurs - return "err" + return None def _redirect_command(executable: str): @@ -277,7 +277,7 @@ def _check_version_and_upgrade(): prompt = f"A newer version of {__package_name__} is available ({new_version}). Would you like to upgrade?" if click.confirm(prompt, default=True): - command = f"pip install --upgrade {__package_name__}" + command = f"pip install '{__package_name__}=={new_version}'" logger.info(f"⚡ RUN: {command}") diff --git a/tests/tests_app/utilities/test_cli_helpers.py b/tests/tests_app/utilities/test_cli_helpers.py index 4ebb3ddc4f0ae..ecdd1705c2130 100644 --- a/tests/tests_app/utilities/test_cli_helpers.py +++ b/tests/tests_app/utilities/test_cli_helpers.py @@ -99,7 +99,7 @@ def test_arrow_time_callback(): "1.0.0dev", None, ), - ({"1.0.0": "this wil trigger an error"}, "1.0.0", "err"), + ({"1.0.0": "this wil trigger an error"}, "1.0.0", None), ({}, "1.0.0rc0", None), ], ) From 640d8a5adbcc8400160fd9741e6be22f1fbf2589 Mon Sep 17 00:00:00 2001 From: Jirka Date: Fri, 9 Dec 2022 15:51:55 +0100 Subject: [PATCH 11/11] releasing 1.8.4.post0 --- src/lightning/__version__.py | 2 +- src/lightning_app/__version__.py | 2 +- src/lightning_lite/__version__.py | 2 +- src/pytorch_lightning/__version__.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lightning/__version__.py b/src/lightning/__version__.py index f2493fc98fbc8..5e8e81fd482ee 100644 --- a/src/lightning/__version__.py +++ b/src/lightning/__version__.py @@ -1 +1 @@ -version = "1.8.4" +version = "1.8.4.post0" diff --git a/src/lightning_app/__version__.py b/src/lightning_app/__version__.py index f2493fc98fbc8..5e8e81fd482ee 100644 --- a/src/lightning_app/__version__.py +++ b/src/lightning_app/__version__.py @@ -1 +1 @@ -version = "1.8.4" +version = "1.8.4.post0" diff --git a/src/lightning_lite/__version__.py b/src/lightning_lite/__version__.py index f2493fc98fbc8..5e8e81fd482ee 100644 --- a/src/lightning_lite/__version__.py +++ b/src/lightning_lite/__version__.py @@ -1 +1 @@ -version = "1.8.4" +version = "1.8.4.post0" diff --git a/src/pytorch_lightning/__version__.py b/src/pytorch_lightning/__version__.py index f2493fc98fbc8..5e8e81fd482ee 100644 --- a/src/pytorch_lightning/__version__.py +++ b/src/pytorch_lightning/__version__.py @@ -1 +1 @@ -version = "1.8.4" +version = "1.8.4.post0"