From de1401d5872b7d1ce2078abd5eb669937e797695 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Thu, 13 Jan 2022 20:15:13 -0500
Subject: [PATCH 01/32] update tests for v2

---
 .../data/horovod/train_default_model.py       |  2 +-
 tests/models/test_amp.py                      | 31 +++++--
 tests/models/test_cpu.py                      |  4 +-
 tests/models/test_gpu.py                      | 80 ++++++++++---------
 tests/models/test_hooks.py                    | 18 +++--
 tests/models/test_horovod.py                  | 20 +----
 tests/models/test_onnx.py                     |  5 +-
 tests/models/test_restore.py                  |  8 +-
 tests/models/test_tpu.py                      | 77 +++++++++++-------
 9 files changed, 140 insertions(+), 105 deletions(-)

diff --git a/tests/models/data/horovod/train_default_model.py b/tests/models/data/horovod/train_default_model.py
index 4527f337af365..2c2515851bbf3 100644
--- a/tests/models/data/horovod/train_default_model.py
+++ b/tests/models/data/horovod/train_default_model.py
@@ -100,7 +100,7 @@ def training_epoch_end(self, outputs) -> None:
     trainer._checkpoint_connector.restore(checkpoint_path)
 
     if on_gpu:
-        trainer = Trainer(gpus=1, strategy="horovod", max_epochs=1)
+        trainer = Trainer(accelerator="gpu", devices=1, strategy="horovod", max_epochs=1)
         # Test the root_gpu property
         assert trainer.root_gpu == hvd.local_rank()
 
diff --git a/tests/models/test_amp.py b/tests/models/test_amp.py
index 917bb4d224194..3fb42fb0ce29e 100644
--- a/tests/models/test_amp.py
+++ b/tests/models/test_amp.py
@@ -77,13 +77,18 @@ def _assert_autocast_enabled(self):
     ],
 )
 @pytest.mark.parametrize("precision", [16, "bf16"])
-@pytest.mark.parametrize("num_processes", [1, 2])
-def test_amp_cpus(tmpdir, strategy, precision, num_processes):
+@pytest.mark.parametrize("devices", [1, 2])
+def test_amp_cpus(tmpdir, strategy, precision, devices):
     """Make sure combinations of AMP and training types work if supported."""
     tutils.reset_seed()
 
     trainer = Trainer(
-        default_root_dir=tmpdir, num_processes=num_processes, max_epochs=1, strategy=strategy, precision=precision
+        default_root_dir=tmpdir,
+        accelerator="cpu",
+        devices=devices,
+        max_epochs=1,
+        strategy=strategy,
+        precision=precision,
     )
 
     model = AMPTestModel()
@@ -97,12 +102,19 @@ def test_amp_cpus(tmpdir, strategy, precision, num_processes):
 @RunIf(min_gpus=2, min_torch="1.10")
 @pytest.mark.parametrize("strategy", [None, "dp", "ddp_spawn"])
 @pytest.mark.parametrize("precision", [16, "bf16"])
-@pytest.mark.parametrize("gpus", [1, 2])
-def test_amp_gpus(tmpdir, strategy, precision, gpus):
+@pytest.mark.parametrize("devices", [1, 2])
+def test_amp_gpus(tmpdir, strategy, precision, devices):
     """Make sure combinations of AMP and training types work if supported."""
     tutils.reset_seed()
 
-    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, gpus=gpus, strategy=strategy, precision=precision)
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=1,
+        accelerator="gpu",
+        devices=devices,
+        strategy=strategy,
+        precision=precision,
+    )
 
     model = AMPTestModel()
     trainer.fit(model)
@@ -141,7 +153,8 @@ def test_amp_gpu_ddp_slurm_managed(tmpdir):
     trainer = Trainer(
         default_root_dir=tmpdir,
         max_epochs=1,
-        gpus=[0],
+        accelerator="gpu",
+        devices=[0],
         strategy="ddp_spawn",
         precision=16,
         callbacks=[checkpoint],
@@ -195,7 +208,9 @@ def configure_optimizers(self):
     model = CustomModel()
     model.training_epoch_end = None
 
-    trainer = Trainer(default_root_dir=tmpdir, max_steps=5, precision=16, amp_backend="apex", gpus=1)
+    trainer = Trainer(
+        default_root_dir=tmpdir, max_steps=5, precision=16, amp_backend="apex", accelerator="gpu", devices=1
+    )
     assert str(trainer.amp_backend) == "AMPType.APEX"
     trainer.fit(model)
     assert trainer.state.finished, f"Training failed with {trainer.state}"
diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py
index 22e31c442d7dd..d8d155dd269b8 100644
--- a/tests/models/test_cpu.py
+++ b/tests/models/test_cpu.py
@@ -136,8 +136,8 @@ def test_multi_cpu_model_ddp(tmpdir):
         max_epochs=1,
         limit_train_batches=0.4,
         limit_val_batches=0.2,
-        gpus=None,
-        num_processes=2,
+        accelerator="cpu",
+        devices=2,
         strategy="ddp_spawn",
     )
 
diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py
index d17322e191ff1..da6934081ef72 100644
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@@ -47,7 +47,8 @@ def test_multi_gpu_none_backend(tmpdir):
         max_epochs=1,
         limit_train_batches=0.2,
         limit_val_batches=0.2,
-        gpus=2,
+        accelerator="gpu",
+        devices=2,
     )
 
     dm = ClassifDataModule()
@@ -56,8 +57,8 @@ def test_multi_gpu_none_backend(tmpdir):
 
 
 @RunIf(min_gpus=2)
-@pytest.mark.parametrize("gpus", [1, [0], [1]])
-def test_single_gpu_model(tmpdir, gpus):
+@pytest.mark.parametrize("devices", [1, [0], [1]])
+def test_single_gpu_model(tmpdir, devices):
     """Make sure single GPU works (DP mode)."""
     trainer_options = dict(
         default_root_dir=tmpdir,
@@ -65,7 +66,8 @@ def test_single_gpu_model(tmpdir, gpus):
         max_epochs=1,
         limit_train_batches=0.1,
         limit_val_batches=0.1,
-        gpus=gpus,
+        accelerator="gpu",
+        devices=devices,
     )
 
     model = BoringModel()
@@ -93,7 +95,7 @@ def device_count():
 
 
 @pytest.mark.parametrize(
-    ["gpus", "expected_num_gpus", "strategy"],
+    ["devices", "expected_num_gpus", "strategy"],
     [
         pytest.param(None, 0, None, id="None - expect 0 gpu to use."),
         pytest.param(0, 0, None, id="Oth gpu, expect 1 gpu to use."),
@@ -103,23 +105,23 @@ def device_count():
         pytest.param(3, 3, "ddp", id="3rd gpu - 1 gpu to use (backend:ddp)"),
     ],
 )
-def test_trainer_gpu_parse(mocked_device_count, gpus, expected_num_gpus, strategy):
-    assert Trainer(gpus=gpus, strategy=strategy).num_gpus == expected_num_gpus
+def test_trainer_gpu_parse(mocked_device_count, devices, expected_num_gpus, strategy):
+    assert Trainer(accelerator="gpu", devices=devices, strategy=strategy).num_gpus == expected_num_gpus
 
 
 @pytest.mark.parametrize(
-    ["gpus", "expected_num_gpus", "strategy"],
+    ["devices", "expected_num_gpus", "strategy"],
     [
         pytest.param(None, 0, None, id="None - expect 0 gpu to use."),
         pytest.param(None, 0, "ddp", id="None - expect 0 gpu to use."),
     ],
 )
-def test_trainer_num_gpu_0(mocked_device_count_0, gpus, expected_num_gpus, strategy):
-    assert Trainer(gpus=gpus, strategy=strategy).num_gpus == expected_num_gpus
+def test_trainer_num_gpu_0(mocked_device_count_0, devices, expected_num_gpus, strategy):
+    assert Trainer(accelerator="gpu", devices=devices, strategy=strategy).num_gpus == expected_num_gpus
 
 
 @pytest.mark.parametrize(
-    ["gpus", "expected_root_gpu", "strategy"],
+    ["devices", "expected_root_gpu", "strategy"],
     [
         pytest.param(None, None, "ddp", id="None is None"),
         pytest.param(0, None, "ddp", id="O gpus, expect gpu root device to be None."),
@@ -129,25 +131,25 @@ def test_trainer_num_gpu_0(mocked_device_count_0, gpus, expected_num_gpus, strat
         pytest.param(3, 0, "ddp", id="3 gpus, expect gpu root device to be 0.(backend:ddp)"),
     ],
 )
-def test_root_gpu_property(mocked_device_count, gpus, expected_root_gpu, strategy):
-    assert Trainer(gpus=gpus, strategy=strategy).root_gpu == expected_root_gpu
+def test_root_gpu_property(mocked_device_count, devices, expected_root_gpu, strategy):
+    assert Trainer(accelerator="gpu", devices=devices, strategy=strategy).root_gpu == expected_root_gpu
 
 
 @pytest.mark.parametrize(
-    ["gpus", "expected_root_gpu", "strategy"],
+    ["devices", "expected_root_gpu", "strategy"],
     [
         pytest.param(None, None, None, id="None is None"),
         pytest.param(None, None, "ddp", id="None is None"),
         pytest.param(0, None, "ddp", id="None is None"),
     ],
 )
-def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_gpu, strategy):
-    assert Trainer(gpus=gpus, strategy=strategy).root_gpu == expected_root_gpu
+def test_root_gpu_property_0_passing(mocked_device_count_0, devices, expected_root_gpu, strategy):
+    assert Trainer(accelerator="gpu", devices=devices, strategy=strategy).root_gpu == expected_root_gpu
 
 
 # Asking for a gpu when non are available will result in a MisconfigurationException
 @pytest.mark.parametrize(
-    ["gpus", "expected_root_gpu", "strategy"],
+    ["devices", "expected_root_gpu", "strategy"],
     [
         (1, None, "ddp"),
         (3, None, "ddp"),
@@ -158,13 +160,13 @@ def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_
         ("-1", None, "ddp"),
     ],
 )
-def test_root_gpu_property_0_raising(mocked_device_count_0, gpus, expected_root_gpu, strategy):
+def test_root_gpu_property_0_raising(mocked_device_count_0, devices, expected_root_gpu, strategy):
     with pytest.raises(MisconfigurationException):
-        Trainer(gpus=gpus, strategy=strategy)
+        Trainer(accelerator="gpu", devices=devices, strategy=strategy)
 
 
 @pytest.mark.parametrize(
-    ["gpus", "expected_root_gpu"],
+    ["devices", "expected_root_gpu"],
     [
         pytest.param(None, None, id="No gpus, expect gpu root device to be None"),
         pytest.param([0], 0, id="Oth gpu, expect gpu root device to be 0."),
@@ -173,12 +175,12 @@ def test_root_gpu_property_0_raising(mocked_device_count_0, gpus, expected_root_
         pytest.param([1, 2], 1, id="[1, 2] gpus, expect gpu root device to be 1."),
     ],
 )
-def test_determine_root_gpu_device(gpus, expected_root_gpu):
-    assert device_parser.determine_root_gpu_device(gpus) == expected_root_gpu
+def test_determine_root_gpu_device(devices, expected_root_gpu):
+    assert device_parser.determine_root_gpu_device(devices) == expected_root_gpu
 
 
 @pytest.mark.parametrize(
-    ["gpus", "expected_gpu_ids"],
+    ["devices", "expected_gpu_ids"],
     [
         (None, None),
         (0, None),
@@ -196,20 +198,20 @@ def test_determine_root_gpu_device(gpus, expected_root_gpu):
         pytest.param("-1", list(range(PRETEND_N_OF_GPUS)), id="'-1' - use all gpus"),
     ],
 )
-def test_parse_gpu_ids(mocked_device_count, gpus, expected_gpu_ids):
-    assert device_parser.parse_gpu_ids(gpus) == expected_gpu_ids
+def test_parse_gpu_ids(mocked_device_count, devices, expected_gpu_ids):
+    assert device_parser.parse_gpu_ids(devices) == expected_gpu_ids
 
 
-@pytest.mark.parametrize("gpus", [0.1, -2, False, [-1], [None], ["0"], [0, 0]])
-def test_parse_gpu_fail_on_unsupported_inputs(mocked_device_count, gpus):
+@pytest.mark.parametrize("devices", [0.1, -2, False, [-1], [None], ["0"], [0, 0]])
+def test_parse_gpu_fail_on_unsupported_inputs(mocked_device_count, devices):
     with pytest.raises(MisconfigurationException):
-        device_parser.parse_gpu_ids(gpus)
+        device_parser.parse_gpu_ids(devices)
 
 
-@pytest.mark.parametrize("gpus", [[1, 2, 19], -1, "-1"])
-def test_parse_gpu_fail_on_non_existent_id(mocked_device_count_0, gpus):
+@pytest.mark.parametrize("devices", [[1, 2, 19], -1, "-1"])
+def test_parse_gpu_fail_on_non_existent_id(mocked_device_count_0, devices):
     with pytest.raises(MisconfigurationException):
-        device_parser.parse_gpu_ids(gpus)
+        device_parser.parse_gpu_ids(devices)
 
 
 def test_parse_gpu_fail_on_non_existent_id_2(mocked_device_count):
@@ -217,10 +219,10 @@ def test_parse_gpu_fail_on_non_existent_id_2(mocked_device_count):
         device_parser.parse_gpu_ids([1, 2, 19])
 
 
-@pytest.mark.parametrize("gpus", [-1, "-1"])
-def test_parse_gpu_returns_none_when_no_devices_are_available(mocked_device_count_0, gpus):
+@pytest.mark.parametrize("devices", [-1, "-1"])
+def test_parse_gpu_returns_none_when_no_devices_are_available(mocked_device_count_0, devices):
     with pytest.raises(MisconfigurationException):
-        device_parser.parse_gpu_ids(gpus)
+        device_parser.parse_gpu_ids(devices)
 
 
 @mock.patch.dict(
@@ -236,19 +238,19 @@ def test_parse_gpu_returns_none_when_no_devices_are_available(mocked_device_coun
 )
 @mock.patch("torch.cuda.device_count", return_value=1)
 @mock.patch("torch.cuda.is_available", return_value=True)
-@pytest.mark.parametrize("gpus", [[0, 1, 2], 2, "0"])
-def test_torchelastic_gpu_parsing(mocked_device_count, mocked_is_available, gpus):
+@pytest.mark.parametrize("devices", [[0, 1, 2], 2, "0"])
+def test_torchelastic_gpu_parsing(mocked_device_count, devices):
     """Ensure when using torchelastic and nproc_per_node is set to the default of 1 per GPU device That we omit
     sanitizing the gpus as only one of the GPUs is visible."""
-    trainer = Trainer(gpus=gpus)
+    trainer = Trainer(accelerator="gpu", devices=devices)
     assert isinstance(trainer._accelerator_connector.cluster_environment, TorchElasticEnvironment)
     assert trainer.data_parallel_device_ids == device_parser.parse_gpu_ids(gpus)
-    assert trainer.gpus == gpus
+    assert trainer.devices == devices
 
 
 @RunIf(min_gpus=1)
 def test_single_gpu_batch_parse():
-    trainer = Trainer(gpus=1)
+    trainer = Trainer(accelerator="gpu", devices=1)
 
     # non-transferrable types
     primitive_objects = [None, {}, [], 1.0, "x", [None, 2], {"x": (1, 2), "y": None}]
diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index cdf94d18171f7..1388313ecccc9 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -156,7 +156,7 @@ def transfer_batch_to_device(self, batch, device, dataloader_idx):
     model = CurrentTestModel()
     batch = CustomBatch((torch.zeros(5, 32), torch.ones(5, 1, dtype=torch.long)))
 
-    trainer = Trainer(gpus=1)
+    trainer = Trainer(accelerator="gpu", devices=1)
     # running .fit() would require us to implement custom data loaders, we mock the model reference instead
 
     model_getter_mock.return_value = model
@@ -203,7 +203,8 @@ def train_dataloader(self):
         max_epochs=1,
         enable_model_summary=False,
         strategy="ddp",
-        gpus=2,
+        accelerator="gpu",
+        devices=2,
     )
     trainer.fit(model)
 
@@ -437,10 +438,17 @@ def _predict_batch(trainer, model, batches):
     [
         {},
         # these precision plugins modify the optimization flow, so testing them explicitly
-        pytest.param(dict(gpus=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
-        pytest.param(dict(gpus=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)),
         pytest.param(
-            dict(gpus=1, precision=16, strategy="deepspeed"), marks=RunIf(deepspeed=True, min_gpus=1, standalone=True)
+            dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"),
+            marks=RunIf(min_gpus=1)
+        ),
+        pytest.param(
+            dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"),
+            marks=RunIf(amp_apex=True, min_gpus=1)
+        ),
+        pytest.param(
+            dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"),
+            marks=RunIf(deepspeed=True, min_gpus=1, standalone=True),
         ),
     ],
 )
diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py
index 5d553394fed9c..a3d213ef9e920 100644
--- a/tests/models/test_horovod.py
+++ b/tests/models/test_horovod.py
@@ -55,7 +55,9 @@ def test_nccl_is_available_on_gpu_environment():
 
 def _run_horovod(trainer_options):
     """Execute the training script across multiple workers in parallel."""
-    devices = trainer_options.get("devices", 1)
+    num_processes = trainer_options.get("devices", 2)
+    # for Horovod, we interpret `gpus` to be set per worker
+    trainer_options.update(accelerator="gpu" if on_gpu else "cpu")
     tutils.reset_seed()
     # TODO: Find out why coverage breaks CI.
     # append = '-a' if '.coverage' in os.listdir(_PROJECT_ROOT) else ''
@@ -151,22 +153,6 @@ def test_horovod_multi_gpu(tmpdir):
     _run_horovod(trainer_options)
 
 
-@RunIf(min_gpus=2, skip_windows=True, horovod_nccl=True)
-def test_horovod_multi_gpu_accumulate_grad_batches(tmpdir):
-    trainer_options = dict(
-        default_root_dir=tmpdir,
-        enable_progress_bar=False,
-        max_epochs=1,
-        limit_train_batches=4,
-        limit_val_batches=0,
-        accumulate_grad_batches=2,
-        accelerator="gpu",
-        devices=2,
-        strategy="horovod",
-    )
-    _run_horovod(trainer_options)
-
-
 @RunIf(horovod=True, skip_windows=True)
 def test_horovod_raises_unsupported_accumulate_grad_batches(tmpdir):
     """Ensure MisConfigurationException for different `accumulate_grad_batches` at different epochs for Horovod
diff --git a/tests/models/test_onnx.py b/tests/models/test_onnx.py
index d111b266fb115..ee750d113cc59 100644
--- a/tests/models/test_onnx.py
+++ b/tests/models/test_onnx.py
@@ -43,7 +43,7 @@ def test_model_saves_with_input_sample(tmpdir):
 def test_model_saves_on_gpu(tmpdir):
     """Test that model saves on gpu."""
     model = BoringModel()
-    trainer = Trainer(gpus=1, fast_dev_run=True)
+    trainer = Trainer(accelerator="gpu", devices=1, fast_dev_run=True)
     trainer.fit(model)
 
     file_path = os.path.join(tmpdir, "model.onnx")
@@ -96,7 +96,8 @@ def test_model_saves_on_multi_gpu(tmpdir):
         max_epochs=1,
         limit_train_batches=10,
         limit_val_batches=10,
-        gpus=[0, 1],
+        accelerator="gpu",
+        devices=[0, 1],
         strategy="ddp_spawn",
         enable_progress_bar=False,
     )
diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py
index e5259c4047ad2..20212af558f3e 100644
--- a/tests/models/test_restore.py
+++ b/tests/models/test_restore.py
@@ -399,7 +399,8 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir):
         limit_val_batches=5,
         callbacks=[checkpoint],
         logger=logger,
-        gpus=[0, 1],
+        accelerator="gpu",
+        devices=[0, 1],
         strategy="dp",
         default_root_dir=tmpdir,
     )
@@ -445,7 +446,8 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir):
         limit_val_batches=2,
         callbacks=[checkpoint],
         logger=logger,
-        gpus=[0, 1],
+        accelerator="gpu",
+        devices=[0, 1],
         strategy="ddp_spawn",
         default_root_dir=tmpdir,
     )
@@ -564,7 +566,7 @@ def test_dp_resume(tmpdir):
     model = CustomClassificationModelDP(lr=0.1)
     dm = ClassifDataModule()
 
-    trainer_options = dict(max_epochs=1, gpus=2, strategy="dp", default_root_dir=tmpdir)
+    trainer_options = dict(max_epochs=1, accelerator="gpu", devices=2, strategy="dp", default_root_dir=tmpdir)
 
     # get logger
     logger = tutils.get_default_logger(tmpdir)
diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index 90b255f73f5aa..2d65db791ebdd 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -57,7 +57,8 @@ def test_model_tpu_cores_1(tmpdir):
         default_root_dir=tmpdir,
         enable_progress_bar=False,
         max_epochs=2,
-        tpu_cores=1,
+        accelerator="tpu",
+        devices=1,
         limit_train_batches=4,
         limit_val_batches=4,
     )
@@ -76,7 +77,8 @@ def test_model_tpu_index(tmpdir, tpu_core):
         default_root_dir=tmpdir,
         enable_progress_bar=False,
         max_epochs=2,
-        tpu_cores=[tpu_core],
+        accelerator="tpu",
+        devices=[tpu_core],
         limit_train_batches=4,
         limit_val_batches=4,
     )
@@ -95,7 +97,8 @@ def test_model_tpu_cores_8(tmpdir):
         default_root_dir=tmpdir,
         enable_progress_bar=False,
         max_epochs=1,
-        tpu_cores=8,
+        accelerator="tpu",
+        devices=8,
         limit_train_batches=4,
         limit_val_batches=4,
     )
@@ -115,7 +118,8 @@ def test_model_16bit_tpu_cores_1(tmpdir):
         precision=16,
         enable_progress_bar=False,
         max_epochs=2,
-        tpu_cores=1,
+        accelerator="tpu",
+        devices=1,
         limit_train_batches=8,
         limit_val_batches=2,
     )
@@ -135,7 +139,8 @@ def test_model_16bit_tpu_index(tmpdir, tpu_core):
         precision=16,
         enable_progress_bar=False,
         max_epochs=2,
-        tpu_cores=[tpu_core],
+        accelerator="tpu",
+        devices=[tpu_core],
         limit_train_batches=4,
         limit_val_batches=2,
     )
@@ -155,7 +160,8 @@ def test_model_16bit_tpu_cores_8(tmpdir):
         precision=16,
         enable_progress_bar=False,
         max_epochs=1,
-        tpu_cores=8,
+        accelerator="tpu",
+        devices=8,
         limit_train_batches=4,
         limit_val_batches=4,
     )
@@ -185,7 +191,8 @@ def validation_step(self, *args, **kwargs):
         max_epochs=2,
         limit_train_batches=2,
         limit_val_batches=2,
-        tpu_cores=8,
+        accelerator="tpu",
+        devices=8,
     )
     trainer.fit(model)
     trainer.test(dataloaders=DataLoader(RandomDataset(32, 2000), batch_size=32))
@@ -200,7 +207,8 @@ def test_tpu_grad_norm(tmpdir):
         default_root_dir=tmpdir,
         enable_progress_bar=False,
         max_epochs=4,
-        tpu_cores=1,
+        accelerator="tpu",
+        devices=1,
         limit_train_batches=0.4,
         limit_val_batches=0.4,
         gradient_clip_val=0.5,
@@ -219,7 +227,8 @@ def test_tpu_clip_grad_by_value(tmpdir):
         default_root_dir=tmpdir,
         enable_progress_bar=False,
         max_epochs=4,
-        tpu_cores=1,
+        accelerator="tpu",
+        devices=1,
         limit_train_batches=10,
         limit_val_batches=10,
         gradient_clip_val=0.5,
@@ -237,40 +246,41 @@ def test_dataloaders_passed_to_fit(tmpdir):
     tutils.reset_seed()
     model = BoringModel()
 
-    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, tpu_cores=8)
+    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, accelerator="tpu", devices=8)
     trainer.fit(model, train_dataloaders=model.train_dataloader(), val_dataloaders=model.val_dataloader())
     assert trainer.state.finished, f"Training failed with {trainer.state}"
 
 
 @pytest.mark.parametrize(
-    ["tpu_cores", "expected_tpu_id"],
+    ["devices", "expected_tpu_id"],
     [(1, None), (8, None), ([1], 1), ([8], 8)],
 )
 @RunIf(tpu=True)
-def test_tpu_id_to_be_as_expected(tpu_cores, expected_tpu_id):
+def test_tpu_id_to_be_as_expected(devices, expected_tpu_id):
     """Test if trainer.tpu_id is set as expected."""
-    assert Trainer(tpu_cores=tpu_cores)._accelerator_connector.tpu_id == expected_tpu_id
+    assert Trainer(accelerator="tpu", devices=devices)._accelerator_connector.tpu_id == expected_tpu_id
 
 
+@RunIf(tpu=True)
 def test_tpu_misconfiguration():
     """Test if trainer.tpu_id is set as expected."""
     with pytest.raises(MisconfigurationException, match="`tpu_cores` can only be"):
-        Trainer(tpu_cores=[1, 8])
+        Trainer(accelerator="tpu", devices=[1, 8])
 
 
 @pytest.mark.skipif(_TPU_AVAILABLE, reason="test requires missing TPU")
 def test_exception_when_no_tpu_found(tmpdir):
     """Test if exception is thrown when xla devices are not available."""
 
-    with pytest.raises(MisconfigurationException, match="No TPU devices were found."):
-        Trainer(tpu_cores=8)
+    with pytest.raises(MisconfigurationException, match="but TPUs are not available."):
+        Trainer(accelerator="tpu", devices=8)
 
 
-@pytest.mark.parametrize("tpu_cores", [1, 8, [1]])
+@pytest.mark.parametrize("devices", [1, 8, [1]])
 @RunIf(tpu=True)
-def test_accelerator_set_when_using_tpu(tmpdir, tpu_cores):
+def test_accelerator_set_when_using_tpu(tmpdir, devices):
     """Test if the accelerator is set to `tpu` when tpu_cores is not None."""
-    assert isinstance(Trainer(tpu_cores=tpu_cores).accelerator, TPUAccelerator)
+    assert isinstance(Trainer(accelerator="tpu", devices=devices).accelerator, TPUAccelerator)
 
 
 @RunIf(tpu=True)
@@ -279,7 +289,7 @@ def test_broadcast_on_tpu():
     """Checks if an object from the main process is broadcasted to other processes correctly."""
 
     def test_broadcast(rank):
-        trainer = Trainer(tpu_cores=8)
+        trainer = Trainer(accelerator="tpu", devices=8)
         assert isinstance(trainer.accelerator, TPUAccelerator)
         assert isinstance(trainer.strategy, TPUSpawnStrategy)
         obj = ("ver_0.5", "logger_name", rank)
@@ -310,9 +320,9 @@ def test_broadcast(rank):
 def test_tpu_choice(tmpdir, tpu_cores, expected_tpu_id, error_expected):
     if error_expected:
         with pytest.raises(MisconfigurationException, match=r".*tpu_cores` can only be 1, 8 or [<1-8>]*"):
-            Trainer(default_root_dir=tmpdir, tpu_cores=tpu_cores)
+            Trainer(default_root_dir=tmpdir, accelerator="tpu", devices=tpu_cores)
     else:
-        trainer = Trainer(default_root_dir=tmpdir, tpu_cores=tpu_cores)
+        trainer = Trainer(default_root_dir=tmpdir, accelerator="tpu", devices=tpu_cores)
         assert trainer._accelerator_connector.tpu_id == expected_tpu_id
 
 
@@ -341,7 +351,7 @@ def test_tpu_reduce():
     """Test tpu spawn reduce operation."""
 
     def test_reduce(rank):
-        trainer = Trainer(tpu_cores=8)
+        trainer = Trainer(accelerator="tpu", devices=8)
         # faster this way
         reduce_ops = ["mean", "AVG", "undefined", "sum", ReduceOp.SUM, ReduceOp.MAX]
         for reduce_op in reduce_ops:
@@ -372,7 +382,8 @@ def test_tpu_precision_16_clip_gradients(mock_clip_grad_norm, clip_val, tmpdir):
         default_root_dir=tmpdir,
         enable_progress_bar=False,
         max_epochs=1,
-        tpu_cores=1,
+        accelerator="tpu",
+        devices=1,
         precision=16,
         limit_train_batches=4,
         limit_val_batches=4,
@@ -394,7 +405,14 @@ def test_if_test_works_with_checkpoint_false(tmpdir):
 
     # Train a model on TPU
     model = BoringModel()
-    trainer = Trainer(max_epochs=1, tpu_cores=8, default_root_dir=tmpdir, fast_dev_run=True, enable_checkpointing=False)
+    trainer = Trainer(
+        max_epochs=1,
+        accelerator="tpu",
+        devices=8,
+        default_root_dir=tmpdir,
+        fast_dev_run=True,
+        enable_checkpointing=False,
+    )
     trainer.fit(model)
     assert trainer.state.finished, f"Training failed with {trainer.state}"
 
@@ -430,7 +448,8 @@ def teardown(self, stage):
         default_root_dir=tmpdir,
         enable_progress_bar=False,
         max_epochs=4,
-        tpu_cores=8,
+        accelerator="tpu",
+        devices=8,
         limit_train_batches=0.4,
         limit_val_batches=0.4,
         strategy=TPUSpawnStrategy(debug=True),
@@ -457,7 +476,8 @@ def teardown(self, stage):
         default_root_dir=tmpdir,
         enable_progress_bar=False,
         max_epochs=4,
-        tpu_cores=8,
+        accelerator="tpu",
+        devices=8,
         limit_train_batches=0.4,
         limit_val_batches=0.4,
     )
@@ -469,6 +489,7 @@ def teardown(self, stage):
 @RunIf(tpu=True)
 @pl_multi_process_test
 def test_device_type_when_training_plugin_tpu_passed(tmpdir):
-    trainer = Trainer(strategy=TPUSpawnStrategy(), tpu_cores=8)
+
+    trainer = Trainer(strategy=TPUSpawnStrategy(), accelerator="tpu", devices=8)
     assert isinstance(trainer.strategy, TPUSpawnStrategy)
     assert isinstance(trainer.accelerator, TPUAccelerator)

From b5814090cef33afc415e693c745efea3cc9a634b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 14 Jan 2022 01:37:33 +0000
Subject: [PATCH 02/32] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/models/test_hooks.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index 1388313ecccc9..67c762af321ad 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -438,13 +438,9 @@ def _predict_batch(trainer, model, batches):
     [
         {},
         # these precision plugins modify the optimization flow, so testing them explicitly
+        pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
         pytest.param(
-            dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"),
-            marks=RunIf(min_gpus=1)
-        ),
-        pytest.param(
-            dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"),
-            marks=RunIf(amp_apex=True, min_gpus=1)
+            dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)
         ),
         pytest.param(
             dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"),

From 5257653c53b071a99f4b9c1eb79803592c6f83ab Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Tue, 8 Feb 2022 21:09:02 -0500
Subject: [PATCH 03/32] get rid of devices = 0 or devices = None

---
 tests/models/test_gpu.py | 40 ++++++++--------------------------------
 1 file changed, 8 insertions(+), 32 deletions(-)

diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py
index da6934081ef72..cfd46e87b58e1 100644
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@@ -97,53 +97,29 @@ def device_count():
 @pytest.mark.parametrize(
     ["devices", "expected_num_gpus", "strategy"],
     [
-        pytest.param(None, 0, None, id="None - expect 0 gpu to use."),
-        pytest.param(0, 0, None, id="Oth gpu, expect 1 gpu to use."),
         pytest.param(1, 1, None, id="1st gpu, expect 1 gpu to use."),
         pytest.param(-1, PRETEND_N_OF_GPUS, "ddp", id="-1 - use all gpus"),
         pytest.param("-1", PRETEND_N_OF_GPUS, "ddp", id="'-1' - use all gpus"),
         pytest.param(3, 3, "ddp", id="3rd gpu - 1 gpu to use (backend:ddp)"),
     ],
 )
-def test_trainer_gpu_parse(mocked_device_count, devices, expected_num_gpus, strategy):
-    assert Trainer(accelerator="gpu", devices=devices, strategy=strategy).num_gpus == expected_num_gpus
-
-
-@pytest.mark.parametrize(
-    ["devices", "expected_num_gpus", "strategy"],
-    [
-        pytest.param(None, 0, None, id="None - expect 0 gpu to use."),
-        pytest.param(None, 0, "ddp", id="None - expect 0 gpu to use."),
-    ],
-)
-def test_trainer_num_gpu_0(mocked_device_count_0, devices, expected_num_gpus, strategy):
+@mock.patch("torch.cuda.is_available", return_value=True)
+def test_trainer_gpu_parse(_, mocked_device_count, devices, expected_num_gpus, strategy):
     assert Trainer(accelerator="gpu", devices=devices, strategy=strategy).num_gpus == expected_num_gpus
 
 
 @pytest.mark.parametrize(
     ["devices", "expected_root_gpu", "strategy"],
     [
-        pytest.param(None, None, "ddp", id="None is None"),
-        pytest.param(0, None, "ddp", id="O gpus, expect gpu root device to be None."),
         pytest.param(1, 0, "ddp", id="1 gpu, expect gpu root device to be 0."),
         pytest.param(-1, 0, "ddp", id="-1 - use all gpus, expect gpu root device to be 0."),
         pytest.param("-1", 0, "ddp", id="'-1' - use all gpus, expect gpu root device to be 0."),
         pytest.param(3, 0, "ddp", id="3 gpus, expect gpu root device to be 0.(backend:ddp)"),
     ],
 )
-def test_root_gpu_property(mocked_device_count, devices, expected_root_gpu, strategy):
-    assert Trainer(accelerator="gpu", devices=devices, strategy=strategy).root_gpu == expected_root_gpu
-
-
-@pytest.mark.parametrize(
-    ["devices", "expected_root_gpu", "strategy"],
-    [
-        pytest.param(None, None, None, id="None is None"),
-        pytest.param(None, None, "ddp", id="None is None"),
-        pytest.param(0, None, "ddp", id="None is None"),
-    ],
-)
-def test_root_gpu_property_0_passing(mocked_device_count_0, devices, expected_root_gpu, strategy):
+@mock.patch("torch.cuda.is_available", return_value=True)
+@mock.patch("torch.cuda.device_count", return_value=3)
+def test_root_gpu_property(_, mocked_device_count, devices, expected_root_gpu, strategy):
     assert Trainer(accelerator="gpu", devices=devices, strategy=strategy).root_gpu == expected_root_gpu
 
 
@@ -236,10 +212,10 @@ def test_parse_gpu_returns_none_when_no_devices_are_available(mocked_device_coun
         "LOCAL_WORLD_SIZE": "2",
     },
 )
-@mock.patch("torch.cuda.device_count", return_value=1)
 @mock.patch("torch.cuda.is_available", return_value=True)
-@pytest.mark.parametrize("devices", [[0, 1, 2], 2, "0"])
-def test_torchelastic_gpu_parsing(mocked_device_count, devices):
+@mock.patch("torch.cuda.device_count", return_value=1)
+@pytest.mark.parametrize("devices", [[0, 1, 2], 2])
+def test_torchelastic_gpu_parsing(_, mocked_device_count, devices):
     """Ensure when using torchelastic and nproc_per_node is set to the default of 1 per GPU device That we omit
     sanitizing the gpus as only one of the GPUs is visible."""
     trainer = Trainer(accelerator="gpu", devices=devices)

From 9e75a0f74d95609fbfcdb2d0da08cf3c021c3c47 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Tue, 8 Feb 2022 22:00:51 -0500
Subject: [PATCH 04/32] use gpu when accelerator="gpu"

---
 tests/models/test_hooks.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index 67c762af321ad..15e31e3f017cb 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -497,9 +497,11 @@ def training_step(self, batch, batch_idx):
         "state_dict": ANY,
         "loops": ANY,
     }
-    if kwargs.get("amp_backend") == "native" or kwargs.get("amp_backend") == "apex":
-        saved_ckpt[trainer.precision_plugin.__class__.__qualname__] = ANY
-    device = torch.device("cuda:0" if "gpus" in kwargs else "cpu")
+    if kwargs.get("amp_backend") == "native":
+        saved_ckpt["native_amp_scaling_state"] = ANY
+    elif kwargs.get("amp_backend") == "apex":
+        saved_ckpt["amp_scaling_state"] = ANY
+    device = torch.device("cuda:0" if kwargs["accelerator"] == "gpu" else "cpu")
     expected = [
         dict(name="Callback.on_init_start", args=(trainer,)),
         dict(name="Callback.on_init_end", args=(trainer,)),

From 84caafc0e83e0c34aa644cd7a5deed5521d69281 Mon Sep 17 00:00:00 2001
From: Jv Kyle Eclarin <mathemusician@users.noreply.github.com>
Date: Tue, 8 Feb 2022 22:54:46 -0500
Subject: [PATCH 05/32] use the right way of getting kwargs

---
 tests/models/test_hooks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index 15e31e3f017cb..aee49273a11a6 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -501,7 +501,7 @@ def training_step(self, batch, batch_idx):
         saved_ckpt["native_amp_scaling_state"] = ANY
     elif kwargs.get("amp_backend") == "apex":
         saved_ckpt["amp_scaling_state"] = ANY
-    device = torch.device("cuda:0" if kwargs["accelerator"] == "gpu" else "cpu")
+    device = torch.device("cuda:0" if kwargs.get("accelerator") == "gpu" else "cpu")
     expected = [
         dict(name="Callback.on_init_start", args=(trainer,)),
         dict(name="Callback.on_init_end", args=(trainer,)),

From 0b199dd685e05f12b4368e2e6783473b777a0c38 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Wed, 9 Feb 2022 23:08:03 -0500
Subject: [PATCH 06/32] use devices instead of tpu_cores

---
 tests/models/test_tpu.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index 2d65db791ebdd..f4db46f98c5c7 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -50,7 +50,7 @@ def val_dataloader(self):
 
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_model_tpu_cores_1(tmpdir):
+def test_model_devices_1(tmpdir):
     """Make sure model trains on TPU."""
     tutils.reset_seed()
     trainer_options = dict(
@@ -90,7 +90,7 @@ def test_model_tpu_index(tmpdir, tpu_core):
 
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_model_tpu_cores_8(tmpdir):
+def test_model_devices_8(tmpdir):
     """Make sure model trains on TPU."""
     tutils.reset_seed()
     trainer_options = dict(
@@ -110,7 +110,7 @@ def test_model_tpu_cores_8(tmpdir):
 
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_model_16bit_tpu_cores_1(tmpdir):
+def test_model_16bit_devices_1(tmpdir):
     """Make sure model trains on TPU."""
     tutils.reset_seed()
     trainer_options = dict(
@@ -152,7 +152,7 @@ def test_model_16bit_tpu_index(tmpdir, tpu_core):
 
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_model_16bit_tpu_cores_8(tmpdir):
+def test_model_16bit_devices_8(tmpdir):
     """Make sure model trains on TPU."""
     tutils.reset_seed()
     trainer_options = dict(
@@ -300,7 +300,7 @@ def test_broadcast(rank):
 
 
 @pytest.mark.parametrize(
-    ["tpu_cores", "expected_tpu_id", "error_expected"],
+    ["devices", "expected_tpu_id", "error_expected"],
     [
         (1, None, False),
         (8, None, False),
@@ -317,23 +317,23 @@ def test_broadcast(rank):
 )
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_tpu_choice(tmpdir, tpu_cores, expected_tpu_id, error_expected):
+def test_tpu_choice(tmpdir, devices, expected_tpu_id, error_expected):
     if error_expected:
         with pytest.raises(MisconfigurationException, match=r".*tpu_cores` can only be 1, 8 or [<1-8>]*"):
-            Trainer(default_root_dir=tmpdir, accelerator="tpu", devices=tpu_cores)
+            Trainer(default_root_dir=tmpdir, accelerator="tpu", devices=devices)
     else:
-        trainer = Trainer(default_root_dir=tmpdir, accelerator="tpu", devices=tpu_cores)
+        trainer = Trainer(default_root_dir=tmpdir, accelerator="tpu", devices=devices)
         assert trainer._accelerator_connector.tpu_id == expected_tpu_id
 
 
 @pytest.mark.parametrize(
     ["cli_args", "expected"],
-    [("--tpu_cores=8", {"tpu_cores": 8}), ("--tpu_cores=1,", {"tpu_cores": "1,"})],
+    [("--devices=8", {"devices": 8}), ("--devices=1,", {"devices": "1,"})],
 )
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_tpu_cores_with_argparse(cli_args, expected):
-    """Test passing tpu_cores in command line."""
+def test_devices_with_argparse(cli_args, expected):
+    """Test passing devices in command line."""
     cli_args = cli_args.split(" ") if cli_args else []
     with mock.patch("argparse._sys.argv", ["any.py"] + cli_args):
         parser = ArgumentParser(add_help=False)

From 2318b433ff29f78e13836bee367f903dc0e810f3 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Wed, 9 Feb 2022 23:08:52 -0500
Subject: [PATCH 07/32] switch mocked to match function

---
 tests/models/test_gpu.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py
index cfd46e87b58e1..aa02719236182 100644
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@@ -117,8 +117,8 @@ def test_trainer_gpu_parse(_, mocked_device_count, devices, expected_num_gpus, s
         pytest.param(3, 0, "ddp", id="3 gpus, expect gpu root device to be 0.(backend:ddp)"),
     ],
 )
-@mock.patch("torch.cuda.is_available", return_value=True)
 @mock.patch("torch.cuda.device_count", return_value=3)
+@mock.patch("torch.cuda.is_available", return_value=True)
 def test_root_gpu_property(_, mocked_device_count, devices, expected_root_gpu, strategy):
     assert Trainer(accelerator="gpu", devices=devices, strategy=strategy).root_gpu == expected_root_gpu
 
@@ -212,8 +212,8 @@ def test_parse_gpu_returns_none_when_no_devices_are_available(mocked_device_coun
         "LOCAL_WORLD_SIZE": "2",
     },
 )
-@mock.patch("torch.cuda.is_available", return_value=True)
 @mock.patch("torch.cuda.device_count", return_value=1)
+@mock.patch("torch.cuda.is_available", return_value=True)
 @pytest.mark.parametrize("devices", [[0, 1, 2], 2])
 def test_torchelastic_gpu_parsing(_, mocked_device_count, devices):
     """Ensure when using torchelastic and nproc_per_node is set to the default of 1 per GPU device That we omit

From 63b1dccd2b6b58f78777000f6e927cb7b857f8f0 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Wed, 9 Feb 2022 23:58:14 -0500
Subject: [PATCH 08/32] add accelerator

---
 tests/models/test_tpu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index f4db46f98c5c7..204432b88fa61 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -328,7 +328,7 @@ def test_tpu_choice(tmpdir, devices, expected_tpu_id, error_expected):
 
 @pytest.mark.parametrize(
     ["cli_args", "expected"],
-    [("--devices=8", {"devices": 8}), ("--devices=1,", {"devices": "1,"})],
+    [("--accelerator=gpu --devices=8", {"devices": 8}), ("--accelerator=gpu --devices=1,", {"devices": "1,"})],
 )
 @RunIf(tpu=True)
 @pl_multi_process_test

From 81aa3300a129f63a87033ae365dd5cb509a01b81 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Sun, 13 Mar 2022 00:36:17 -0500
Subject: [PATCH 09/32] revert tests

---
 tests/models/test_gpu.py | 8 ++++----
 tests/models/test_tpu.py | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py
index aa02719236182..9d780edec2e65 100644
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@@ -214,14 +214,14 @@ def test_parse_gpu_returns_none_when_no_devices_are_available(mocked_device_coun
 )
 @mock.patch("torch.cuda.device_count", return_value=1)
 @mock.patch("torch.cuda.is_available", return_value=True)
-@pytest.mark.parametrize("devices", [[0, 1, 2], 2])
-def test_torchelastic_gpu_parsing(_, mocked_device_count, devices):
+@pytest.mark.parametrize("gpus", [[0, 1, 2], 2, "0"])
+def test_torchelastic_gpu_parsing(mocked_device_count, mocked_is_available, gpus):
     """Ensure when using torchelastic and nproc_per_node is set to the default of 1 per GPU device That we omit
     sanitizing the gpus as only one of the GPUs is visible."""
-    trainer = Trainer(accelerator="gpu", devices=devices)
+    trainer = Trainer(gpus=gpus)
     assert isinstance(trainer._accelerator_connector.cluster_environment, TorchElasticEnvironment)
     assert trainer.data_parallel_device_ids == device_parser.parse_gpu_ids(gpus)
-    assert trainer.devices == devices
+    assert trainer.gpus == gpus
 
 
 @RunIf(min_gpus=1)
diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index 204432b88fa61..5920b77298e1e 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -272,8 +272,8 @@ def test_tpu_misconfiguration():
 def test_exception_when_no_tpu_found(tmpdir):
     """Test if exception is thrown when xla devices are not available."""
 
-    with pytest.raises(MisconfigurationException, match="but TPUs are not available."):
-        Trainer(accelerator="tpu", devices=8)
+    with pytest.raises(MisconfigurationException, match="No TPU devices were found."):
+        Trainer(tpu_cores=8)
 
 
 @pytest.mark.parametrize("devices", [1, 8, [1]])

From dcd3b03c023cd9b9d824505229303e739fcbe042 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Sun, 13 Mar 2022 01:17:12 -0500
Subject: [PATCH 10/32] put back a few more tests

---
 tests/models/test_gpu.py     | 12 ++++++++++++
 tests/models/test_horovod.py | 16 ++++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py
index 9d780edec2e65..865a236fe87c5 100644
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@@ -123,6 +123,18 @@ def test_root_gpu_property(_, mocked_device_count, devices, expected_root_gpu, s
     assert Trainer(accelerator="gpu", devices=devices, strategy=strategy).root_gpu == expected_root_gpu
 
 
+@pytest.mark.parametrize(
+    ["gpus", "expected_root_gpu", "strategy"],
+    [
+        pytest.param(None, None, None, id="None is None"),
+        pytest.param(None, None, "ddp", id="None is None"),
+        pytest.param(0, None, "ddp", id="None is None"),
+    ],
+)
+def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_gpu, strategy):
+    assert Trainer(gpus=gpus, strategy=strategy).root_gpu == expected_root_gpu
+
+
 # Asking for a gpu when non are available will result in a MisconfigurationException
 @pytest.mark.parametrize(
     ["devices", "expected_root_gpu", "strategy"],
diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py
index a3d213ef9e920..910190fcbfa4e 100644
--- a/tests/models/test_horovod.py
+++ b/tests/models/test_horovod.py
@@ -153,6 +153,22 @@ def test_horovod_multi_gpu(tmpdir):
     _run_horovod(trainer_options)
 
 
+@RunIf(min_gpus=2, skip_windows=True, horovod_nccl=True)
+def test_horovod_multi_gpu_accumulate_grad_batches(tmpdir):
+    trainer_options = dict(
+        default_root_dir=tmpdir,
+        enable_progress_bar=False,
+        max_epochs=1,
+        limit_train_batches=4,
+        limit_val_batches=0,
+        accumulate_grad_batches=2,
+        accelerator="gpu",
+        devices=2,
+        strategy="horovod",
+    )
+    _run_horovod(trainer_options)
+
+
 @RunIf(horovod=True, skip_windows=True)
 def test_horovod_raises_unsupported_accumulate_grad_batches(tmpdir):
     """Ensure MisConfigurationException for different `accumulate_grad_batches` at different epochs for Horovod

From d9f352b48829cdc7165e4828291cc202d85b3b66 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Sun, 13 Mar 2022 01:22:38 -0500
Subject: [PATCH 11/32] revert test_horovod

---
 tests/models/test_horovod.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py
index 910190fcbfa4e..5d553394fed9c 100644
--- a/tests/models/test_horovod.py
+++ b/tests/models/test_horovod.py
@@ -55,9 +55,7 @@ def test_nccl_is_available_on_gpu_environment():
 
 def _run_horovod(trainer_options):
     """Execute the training script across multiple workers in parallel."""
-    num_processes = trainer_options.get("devices", 2)
-    # for Horovod, we interpret `gpus` to be set per worker
-    trainer_options.update(accelerator="gpu" if on_gpu else "cpu")
+    devices = trainer_options.get("devices", 1)
     tutils.reset_seed()
     # TODO: Find out why coverage breaks CI.
     # append = '-a' if '.coverage' in os.listdir(_PROJECT_ROOT) else ''

From 5e19aa385a57802687cfee02eeda5f46a9db6599 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Sun, 13 Mar 2022 01:27:42 -0500
Subject: [PATCH 12/32] revert tpu test

---
 tests/models/test_tpu.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index 5920b77298e1e..0cd108a07a9be 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -276,11 +276,11 @@ def test_exception_when_no_tpu_found(tmpdir):
         Trainer(tpu_cores=8)
 
 
-@pytest.mark.parametrize("devices", [1, 8, [1]])
+@pytest.mark.parametrize("tpu_cores", [1, 8, [1]])
 @RunIf(tpu=True)
-def test_accelerator_set_when_using_tpu(tmpdir, devices):
+def test_accelerator_set_when_using_tpu(tmpdir, tpu_cores):
     """Test if the accelerator is set to `tpu` when tpu_cores is not None."""
-    assert isinstance(Trainer(accelerator="tpu", devices=devices).accelerator, TPUAccelerator)
+    assert isinstance(Trainer(tpu_cores=tpu_cores).accelerator, TPUAccelerator)
 
 
 @RunIf(tpu=True)

From c1f3df1eec03b5bda043633d096be637410d326f Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Mon, 14 Mar 2022 14:57:44 -0400
Subject: [PATCH 13/32] use tpu instead of gpu

---
 tests/models/test_tpu.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index 0cd108a07a9be..96569da016876 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -328,12 +328,12 @@ def test_tpu_choice(tmpdir, devices, expected_tpu_id, error_expected):
 
 @pytest.mark.parametrize(
     ["cli_args", "expected"],
-    [("--accelerator=gpu --devices=8", {"devices": 8}), ("--accelerator=gpu --devices=1,", {"devices": "1,"})],
+    [("--tpu_cores=8", {"tpu_cores": 8}), ("--tpu_cores=1,", {"tpu_cores": "1,"})],
 )
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_devices_with_argparse(cli_args, expected):
-    """Test passing devices in command line."""
+def test_tpu_cores_with_argparse(cli_args, expected):
+    """Test passing tpu_cores in command line."""
     cli_args = cli_args.split(" ") if cli_args else []
     with mock.patch("argparse._sys.argv", ["any.py"] + cli_args):
         parser = ArgumentParser(add_help=False)

From 26e5e66d63a3b8058cdb9223f5c881165e21eb7f Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Mon, 14 Mar 2022 15:03:20 -0400
Subject: [PATCH 14/32] use devices

---
 tests/models/test_tpu.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index 96569da016876..2a857b174baac 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -328,7 +328,7 @@ def test_tpu_choice(tmpdir, devices, expected_tpu_id, error_expected):
 
 @pytest.mark.parametrize(
     ["cli_args", "expected"],
-    [("--tpu_cores=8", {"tpu_cores": 8}), ("--tpu_cores=1,", {"tpu_cores": "1,"})],
+    [("--accelerator=tpu --devices=8", {"devices": 8}), ("--accelerator=tpu --devices=1,", {"devices": "1,"})],
 )
 @RunIf(tpu=True)
 @pl_multi_process_test

From 88e9752d9c549a87e6e6ff17443c787d0b6de98b Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Mon, 14 Mar 2022 15:36:02 -0400
Subject: [PATCH 15/32] devices always returns an int or List[int]

---
 tests/models/test_tpu.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index 2a857b174baac..bd0dba0be416e 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -306,9 +306,9 @@ def test_broadcast(rank):
         (8, None, False),
         ([1], 1, False),
         ([8], 8, False),
-        ("1,", 1, False),
-        ("1", None, False),
-        ("9, ", 9, True),
+        (1, 1, False),
+        (1, None, False),
+        (9, 9, True),
         ([9], 9, True),
         ([0], 0, True),
         (2, None, True),
@@ -328,7 +328,7 @@ def test_tpu_choice(tmpdir, devices, expected_tpu_id, error_expected):
 
 @pytest.mark.parametrize(
     ["cli_args", "expected"],
-    [("--accelerator=tpu --devices=8", {"devices": 8}), ("--accelerator=tpu --devices=1,", {"devices": "1,"})],
+    [("--accelerator=tpu --devices=8", {"devices": 8}), ("--accelerator=tpu --devices=1,", {"devices": 1})],
 )
 @RunIf(tpu=True)
 @pl_multi_process_test

From 1723b833a7f44702cc5a9b5626fad65f786968cb Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Mon, 14 Mar 2022 16:11:20 -0400
Subject: [PATCH 16/32] revert tests

---
 tests/models/test_tpu.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index bd0dba0be416e..2efaae65a395f 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -300,15 +300,15 @@ def test_broadcast(rank):
 
 
 @pytest.mark.parametrize(
-    ["devices", "expected_tpu_id", "error_expected"],
+    ["tpu_cores", "expected_tpu_id", "error_expected"],
     [
         (1, None, False),
         (8, None, False),
         ([1], 1, False),
         ([8], 8, False),
-        (1, 1, False),
-        (1, None, False),
-        (9, 9, True),
+        ("1,", 1, False),
+        ("1", None, False),
+        ("9, ", 9, True),
         ([9], 9, True),
         ([0], 0, True),
         (2, None, True),
@@ -317,18 +317,18 @@ def test_broadcast(rank):
 )
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_tpu_choice(tmpdir, devices, expected_tpu_id, error_expected):
+def test_tpu_choice(tmpdir, tpu_cores, expected_tpu_id, error_expected):
     if error_expected:
         with pytest.raises(MisconfigurationException, match=r".*tpu_cores` can only be 1, 8 or [<1-8>]*"):
-            Trainer(default_root_dir=tmpdir, accelerator="tpu", devices=devices)
+            Trainer(default_root_dir=tmpdir, tpu_cores=tpu_cores)
     else:
-        trainer = Trainer(default_root_dir=tmpdir, accelerator="tpu", devices=devices)
+        trainer = Trainer(default_root_dir=tmpdir, tpu_cores=tpu_cores)
         assert trainer._accelerator_connector.tpu_id == expected_tpu_id
 
 
 @pytest.mark.parametrize(
     ["cli_args", "expected"],
-    [("--accelerator=tpu --devices=8", {"devices": 8}), ("--accelerator=tpu --devices=1,", {"devices": 1})],
+    [("--tpu_cores=8", {"tpu_cores": 8}), ("--tpu_cores=1,", {"tpu_cores": "1,"})],
 )
 @RunIf(tpu=True)
 @pl_multi_process_test

From d35aba7651dc8c12b80875c1f7112e395d76a0cc Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Mon, 14 Mar 2022 22:39:56 -0400
Subject: [PATCH 17/32] use devices instead of tpu_cores

---
 tests/models/test_tpu.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index 2efaae65a395f..65daad59ae556 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -67,10 +67,10 @@ def test_model_devices_1(tmpdir):
     tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
 
 
-@pytest.mark.parametrize("tpu_core", [1, 5])
+@pytest.mark.parametrize("devices", [1, 5])
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_model_tpu_index(tmpdir, tpu_core):
+def test_model_tpu_index(tmpdir, devices):
     """Make sure model trains on TPU."""
     tutils.reset_seed()
     trainer_options = dict(
@@ -78,7 +78,7 @@ def test_model_tpu_index(tmpdir, tpu_core):
         enable_progress_bar=False,
         max_epochs=2,
         accelerator="tpu",
-        devices=[tpu_core],
+        devices=[devices],
         limit_train_batches=4,
         limit_val_batches=4,
     )
@@ -128,10 +128,10 @@ def test_model_16bit_devices_1(tmpdir):
     tpipes.run_model_test(trainer_options, model, on_gpu=False)
 
 
-@pytest.mark.parametrize("tpu_core", [1, 5])
+@pytest.mark.parametrize("devices", [1, 5])
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_model_16bit_tpu_index(tmpdir, tpu_core):
+def test_model_16bit_tpu_index(tmpdir, devices):
     """Make sure model trains on TPU."""
     tutils.reset_seed()
     trainer_options = dict(
@@ -140,7 +140,7 @@ def test_model_16bit_tpu_index(tmpdir, tpu_core):
         enable_progress_bar=False,
         max_epochs=2,
         accelerator="tpu",
-        devices=[tpu_core],
+        devices=[devices],
         limit_train_batches=4,
         limit_val_batches=2,
     )

From 263813060dff9044e05c65849df226724e6a3333 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Tue, 15 Mar 2022 01:55:50 -0400
Subject: [PATCH 18/32] revert some tests so we can use depreacation warning
 instead

---
 tests/models/test_tpu.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index 65daad59ae556..87fd29f1c0209 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -67,18 +67,17 @@ def test_model_devices_1(tmpdir):
     tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
 
 
-@pytest.mark.parametrize("devices", [1, 5])
+@pytest.mark.parametrize("tpu_core", [1, 5])
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_model_tpu_index(tmpdir, devices):
+def test_model_tpu_index(tmpdir, tpu_core):
     """Make sure model trains on TPU."""
     tutils.reset_seed()
     trainer_options = dict(
         default_root_dir=tmpdir,
         enable_progress_bar=False,
         max_epochs=2,
-        accelerator="tpu",
-        devices=[devices],
+        tpu_cores=[tpu_core],
         limit_train_batches=4,
         limit_val_batches=4,
     )
@@ -128,10 +127,10 @@ def test_model_16bit_devices_1(tmpdir):
     tpipes.run_model_test(trainer_options, model, on_gpu=False)
 
 
-@pytest.mark.parametrize("devices", [1, 5])
+@pytest.mark.parametrize("tpu_core", [1, 5])
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_model_16bit_tpu_index(tmpdir, devices):
+def test_model_16bit_tpu_index(tmpdir, tpu_core):
     """Make sure model trains on TPU."""
     tutils.reset_seed()
     trainer_options = dict(
@@ -139,8 +138,7 @@ def test_model_16bit_tpu_index(tmpdir, devices):
         precision=16,
         enable_progress_bar=False,
         max_epochs=2,
-        accelerator="tpu",
-        devices=[devices],
+        tpu_cores=[tpu_core],
         limit_train_batches=4,
         limit_val_batches=2,
     )

From 7bb59ac36cfdf2f11841a9483f04070ed144ba53 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Tue, 22 Mar 2022 02:15:59 -0400
Subject: [PATCH 19/32] match current changes directly

---
 tests/models/test_hooks.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index aee49273a11a6..2c21aa66c5937 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -442,10 +442,6 @@ def _predict_batch(trainer, model, batches):
         pytest.param(
             dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)
         ),
-        pytest.param(
-            dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"),
-            marks=RunIf(deepspeed=True, min_gpus=1, standalone=True),
-        ),
     ],
 )
 @pytest.mark.parametrize("automatic_optimization", (True, False))
@@ -497,11 +493,9 @@ def training_step(self, batch, batch_idx):
         "state_dict": ANY,
         "loops": ANY,
     }
-    if kwargs.get("amp_backend") == "native":
-        saved_ckpt["native_amp_scaling_state"] = ANY
-    elif kwargs.get("amp_backend") == "apex":
-        saved_ckpt["amp_scaling_state"] = ANY
-    device = torch.device("cuda:0" if kwargs.get("accelerator") == "gpu" else "cpu")
+    if kwargs.get("amp_backend") == "native" or kwargs.get("amp_backend") == "apex":
+        saved_ckpt[trainer.precision_plugin.__class__.__qualname__] = ANY
+    device = torch.device("cuda:0" if "gpus" in kwargs else "cpu")
     expected = [
         dict(name="Callback.on_init_start", args=(trainer,)),
         dict(name="Callback.on_init_end", args=(trainer,)),

From 1e18c21b3b22a9d9a84564e3dd018d6437da74ea Mon Sep 17 00:00:00 2001
From: Jv Kyle Eclarin <mathemusician@users.noreply.github.com>
Date: Thu, 24 Mar 2022 01:20:39 -0400
Subject: [PATCH 20/32] revert tpu test

save the best for last
---
 tests/models/test_hooks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index 281339ba3df70..eba6dbd611bbb 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -440,7 +440,7 @@ def _predict_batch(trainer, model, batches):
         # these precision plugins modify the optimization flow, so testing them explicitly
         pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
         pytest.param(
-            dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)
+            dict(gpus=1, precision=16, strategy="deepspeed"), marks=RunIf(deepspeed=True, min_gpus=1, standalone=True)
         ),
     ],
 )

From c61b4f7f524e39311b294d699772f186eaa1f99f Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Thu, 24 Mar 2022 19:38:20 -0400
Subject: [PATCH 21/32] use accelerator="gpu"

---
 tests/models/test_hooks.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index eba6dbd611bbb..c605e15561ba2 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -439,8 +439,9 @@ def _predict_batch(trainer, model, batches):
         {},
         # these precision plugins modify the optimization flow, so testing them explicitly
         pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
+        pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)),
         pytest.param(
-            dict(gpus=1, precision=16, strategy="deepspeed"), marks=RunIf(deepspeed=True, min_gpus=1, standalone=True)
+            dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"), marks=RunIf(deepspeed=True, min_gpus=1, standalone=True)
         ),
     ],
 )
@@ -495,7 +496,7 @@ def training_step(self, batch, batch_idx):
     }
     if kwargs.get("amp_backend") == "native" or kwargs.get("amp_backend") == "apex":
         saved_ckpt[trainer.precision_plugin.__class__.__qualname__] = ANY
-    device = torch.device("cuda:0" if "gpus" in kwargs else "cpu")
+    device = torch.device("cuda:0" if kwargs["accelerator"] == "gpu" else "cpu")
     expected = [
         dict(name="Callback.on_init_start", args=(trainer,)),
         dict(name="Callback.on_init_end", args=(trainer,)),
@@ -553,7 +554,6 @@ def training_step(self, batch, batch_idx):
         dict(name="training_epoch_end", args=([dict(loss=ANY)] * train_batches,)),
         dict(name="Callback.on_train_epoch_end", args=(trainer, model)),
         # `ModelCheckpoint.save_checkpoint` is called here from `Callback.on_train_epoch_end`
-        dict(name="Callback.state_dict"),
         dict(name="Callback.on_save_checkpoint", args=(trainer, model, saved_ckpt)),
         dict(name="on_save_checkpoint", args=(saved_ckpt,)),
         dict(name="on_train_epoch_end"),
@@ -627,7 +627,6 @@ def test_trainer_model_hook_system_fit_no_val_and_resume(tmpdir):
         dict(name="setup", kwargs=dict(stage="fit")),
         dict(name="on_load_checkpoint", args=(loaded_ckpt,)),
         dict(name="Callback.on_load_checkpoint", args=(trainer, model, {"foo": True})),
-        dict(name="Callback.load_state_dict", args=({"foo": True},)),
         dict(name="configure_sharded_model"),
         dict(name="Callback.on_configure_sharded_model", args=(trainer, model)),
         dict(name="configure_optimizers"),
@@ -649,7 +648,6 @@ def test_trainer_model_hook_system_fit_no_val_and_resume(tmpdir):
         *model._train_batch(trainer, model, steps_after_reload, current_batch=1, current_epoch=1),
         dict(name="training_epoch_end", args=([dict(loss=ANY)] * train_batches,)),
         dict(name="Callback.on_train_epoch_end", args=(trainer, model)),
-        dict(name="Callback.state_dict"),
         dict(name="Callback.on_save_checkpoint", args=(trainer, model, saved_ckpt)),
         dict(name="on_save_checkpoint", args=(saved_ckpt,)),
         dict(name="on_train_epoch_end"),

From 548426ed5eac3509981f9673777ff303c9b5b08f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 24 Mar 2022 23:39:39 +0000
Subject: [PATCH 22/32] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/models/test_hooks.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index c605e15561ba2..99397de7d5e90 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -439,9 +439,12 @@ def _predict_batch(trainer, model, batches):
         {},
         # these precision plugins modify the optimization flow, so testing them explicitly
         pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
-        pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)),
         pytest.param(
-            dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"), marks=RunIf(deepspeed=True, min_gpus=1, standalone=True)
+            dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)
+        ),
+        pytest.param(
+            dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"),
+            marks=RunIf(deepspeed=True, min_gpus=1, standalone=True),
         ),
     ],
 )

From 8e71f4174f2c4effdccd9b69e4ae1a148e78dfb8 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Thu, 24 Mar 2022 19:44:39 -0400
Subject: [PATCH 23/32] use if statements for readability

---
 tests/models/test_hooks.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index c605e15561ba2..6cd54238ddf2d 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -496,7 +496,9 @@ def training_step(self, batch, batch_idx):
     }
     if kwargs.get("amp_backend") == "native" or kwargs.get("amp_backend") == "apex":
         saved_ckpt[trainer.precision_plugin.__class__.__qualname__] = ANY
-    device = torch.device("cuda:0" if kwargs["accelerator"] == "gpu" else "cpu")
+    device = torch.device("cpu")
+    if "accelerator" in kwargs:
+        device = torch.device("cuda:0")
     expected = [
         dict(name="Callback.on_init_start", args=(trainer,)),
         dict(name="Callback.on_init_end", args=(trainer,)),

From fc7d4a7d4e2b374e65e80b03fd84272047c1d82f Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Thu, 24 Mar 2022 20:01:42 -0400
Subject: [PATCH 24/32] revert change for now

---
 tests/models/test_hooks.py | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index ee6c6e20f3373..423ff7a1f28b6 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -438,13 +438,10 @@ def _predict_batch(trainer, model, batches):
     [
         {},
         # these precision plugins modify the optimization flow, so testing them explicitly
-        pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
+        pytest.param(dict(gpus=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
+        pytest.param(dict(gpus=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)),
         pytest.param(
-            dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)
-        ),
-        pytest.param(
-            dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"),
-            marks=RunIf(deepspeed=True, min_gpus=1, standalone=True),
+            dict(gpus=1, precision=16, strategy="deepspeed"), marks=RunIf(deepspeed=True, min_gpus=1, standalone=True)
         ),
     ],
 )
@@ -499,9 +496,7 @@ def training_step(self, batch, batch_idx):
     }
     if kwargs.get("amp_backend") == "native" or kwargs.get("amp_backend") == "apex":
         saved_ckpt[trainer.precision_plugin.__class__.__qualname__] = ANY
-    device = torch.device("cpu")
-    if "accelerator" in kwargs:
-        device = torch.device("cuda:0")
+        device = torch.device("cuda:0" if "gpus" in kwargs else "cpu")
     expected = [
         dict(name="Callback.on_init_start", args=(trainer,)),
         dict(name="Callback.on_init_end", args=(trainer,)),

From 88d3c0c003e68ad97abfc06709c3e0a839ade160 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Thu, 24 Mar 2022 20:51:24 -0400
Subject: [PATCH 25/32] try accelerator=gpu again, but with the right indent

---
 tests/models/test_hooks.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index 423ff7a1f28b6..9eda9d9d60043 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -438,10 +438,10 @@ def _predict_batch(trainer, model, batches):
     [
         {},
         # these precision plugins modify the optimization flow, so testing them explicitly
-        pytest.param(dict(gpus=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
-        pytest.param(dict(gpus=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)),
+        pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
+        pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)),
         pytest.param(
-            dict(gpus=1, precision=16, strategy="deepspeed"), marks=RunIf(deepspeed=True, min_gpus=1, standalone=True)
+            dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"), marks=RunIf(deepspeed=True, min_gpus=1, standalone=True)
         ),
     ],
 )
@@ -496,7 +496,7 @@ def training_step(self, batch, batch_idx):
     }
     if kwargs.get("amp_backend") == "native" or kwargs.get("amp_backend") == "apex":
         saved_ckpt[trainer.precision_plugin.__class__.__qualname__] = ANY
-        device = torch.device("cuda:0" if "gpus" in kwargs else "cpu")
+    device = torch.device("cuda:0" if "accelerator" in kwargs else "cpu")
     expected = [
         dict(name="Callback.on_init_start", args=(trainer,)),
         dict(name="Callback.on_init_end", args=(trainer,)),

From 68cdb795097b526f425282b0af6de7f4aad12c77 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 25 Mar 2022 00:52:48 +0000
Subject: [PATCH 26/32] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/models/test_hooks.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index 9eda9d9d60043..e4acdef811589 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -439,9 +439,12 @@ def _predict_batch(trainer, model, batches):
         {},
         # these precision plugins modify the optimization flow, so testing them explicitly
         pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
-        pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)),
         pytest.param(
-            dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"), marks=RunIf(deepspeed=True, min_gpus=1, standalone=True)
+            dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)
+        ),
+        pytest.param(
+            dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"),
+            marks=RunIf(deepspeed=True, min_gpus=1, standalone=True),
         ),
     ],
 )

From 46088a105259ea85dfcfc994f7446e7816b4ffe8 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Thu, 24 Mar 2022 21:56:03 -0400
Subject: [PATCH 27/32] revert test again; it wasn't the indent

---
 tests/models/test_hooks.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index 9eda9d9d60043..eb8802db42ed1 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -438,10 +438,10 @@ def _predict_batch(trainer, model, batches):
     [
         {},
         # these precision plugins modify the optimization flow, so testing them explicitly
-        pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
-        pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)),
+        pytest.param(dict(gpus=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
+        pytest.param(dict(gpus=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)),
         pytest.param(
-            dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"), marks=RunIf(deepspeed=True, min_gpus=1, standalone=True)
+            dict(gpus=1, precision=16, strategy="deepspeed"), marks=RunIf(deepspeed=True, min_gpus=1, standalone=True)
         ),
     ],
 )
@@ -496,7 +496,7 @@ def training_step(self, batch, batch_idx):
     }
     if kwargs.get("amp_backend") == "native" or kwargs.get("amp_backend") == "apex":
         saved_ckpt[trainer.precision_plugin.__class__.__qualname__] = ANY
-    device = torch.device("cuda:0" if "accelerator" in kwargs else "cpu")
+    device = torch.device("cuda:0" if "gpus" in kwargs else "cpu")
     expected = [
         dict(name="Callback.on_init_start", args=(trainer,)),
         dict(name="Callback.on_init_end", args=(trainer,)),

From dfb7d36473b9a869081fe5b59bdb5710180bbe0c Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Thu, 24 Mar 2022 22:00:38 -0400
Subject: [PATCH 28/32] revert test, it wasn't the indent

---
 tests/models/test_hooks.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index 67c762af321ad..eb8802db42ed1 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -438,13 +438,10 @@ def _predict_batch(trainer, model, batches):
     [
         {},
         # these precision plugins modify the optimization flow, so testing them explicitly
-        pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
+        pytest.param(dict(gpus=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
+        pytest.param(dict(gpus=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)),
         pytest.param(
-            dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(amp_apex=True, min_gpus=1)
-        ),
-        pytest.param(
-            dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"),
-            marks=RunIf(deepspeed=True, min_gpus=1, standalone=True),
+            dict(gpus=1, precision=16, strategy="deepspeed"), marks=RunIf(deepspeed=True, min_gpus=1, standalone=True)
         ),
     ],
 )

From 45e6dd7fc41cb7cba934aa923a0e80567214b464 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Fri, 25 Mar 2022 11:30:49 -0400
Subject: [PATCH 29/32] add Callback.state_dict back

---
 tests/models/test_hooks.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index eb8802db42ed1..ab80a07740cc8 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -554,6 +554,7 @@ def training_step(self, batch, batch_idx):
         dict(name="training_epoch_end", args=([dict(loss=ANY)] * train_batches,)),
         dict(name="Callback.on_train_epoch_end", args=(trainer, model)),
         # `ModelCheckpoint.save_checkpoint` is called here from `Callback.on_train_epoch_end`
+        dict(name="Callback.state_dict"),
         dict(name="Callback.on_save_checkpoint", args=(trainer, model, saved_ckpt)),
         dict(name="on_save_checkpoint", args=(saved_ckpt,)),
         dict(name="on_train_epoch_end"),

From a3582b99edc7b28e0d9d89f9f20547508dc39ca8 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Fri, 25 Mar 2022 11:48:30 -0400
Subject: [PATCH 30/32] add Callback.load_state_dict back to
 test_trainer_model_hook_system_fit_no_val_and_resume

---
 tests/models/test_hooks.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index 6de9716d3d26f..548221fa880d8 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -627,6 +627,7 @@ def test_trainer_model_hook_system_fit_no_val_and_resume(tmpdir):
         dict(name="Callback.setup", args=(trainer, model), kwargs=dict(stage="fit")),
         dict(name="setup", kwargs=dict(stage="fit")),
         dict(name="on_load_checkpoint", args=(loaded_ckpt,)),
+        dict(name="Callback.load_state_dict", args=({"foo": True},)),
         dict(name="Callback.on_load_checkpoint", args=(trainer, model, {"foo": True})),
         dict(name="configure_sharded_model"),
         dict(name="Callback.on_configure_sharded_model", args=(trainer, model)),

From d05cc21ef80dbc5367b4ee64d7a6eb8300924ef9 Mon Sep 17 00:00:00 2001
From: Kyle <s.t.o.r.m.detective@gmail.com>
Date: Fri, 25 Mar 2022 12:12:24 -0400
Subject: [PATCH 31/32] wow, missed a few things when reverting the test

---
 tests/models/test_hooks.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py
index 548221fa880d8..97b6c8a0f09a3 100644
--- a/tests/models/test_hooks.py
+++ b/tests/models/test_hooks.py
@@ -627,8 +627,8 @@ def test_trainer_model_hook_system_fit_no_val_and_resume(tmpdir):
         dict(name="Callback.setup", args=(trainer, model), kwargs=dict(stage="fit")),
         dict(name="setup", kwargs=dict(stage="fit")),
         dict(name="on_load_checkpoint", args=(loaded_ckpt,)),
-        dict(name="Callback.load_state_dict", args=({"foo": True},)),
         dict(name="Callback.on_load_checkpoint", args=(trainer, model, {"foo": True})),
+        dict(name="Callback.load_state_dict", args=({"foo": True},)),
         dict(name="configure_sharded_model"),
         dict(name="Callback.on_configure_sharded_model", args=(trainer, model)),
         dict(name="configure_optimizers"),
@@ -650,6 +650,7 @@ def test_trainer_model_hook_system_fit_no_val_and_resume(tmpdir):
         *model._train_batch(trainer, model, steps_after_reload, current_batch=1, current_epoch=1),
         dict(name="training_epoch_end", args=([dict(loss=ANY)] * train_batches,)),
         dict(name="Callback.on_train_epoch_end", args=(trainer, model)),
+        dict(name="Callback.state_dict"),
         dict(name="Callback.on_save_checkpoint", args=(trainer, model, saved_ckpt)),
         dict(name="on_save_checkpoint", args=(saved_ckpt,)),
         dict(name="on_train_epoch_end"),

From 2fbd71da5d24909b084ae70e8d67316aa36658db Mon Sep 17 00:00:00 2001
From: Rohit Gupta <rohitgr1998@gmail.com>
Date: Sat, 26 Mar 2022 19:28:20 +0530
Subject: [PATCH 32/32] Apply suggestions from code review

---
 tests/models/test_tpu.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py
index 078b7e2808339..3b5c53a4d3397 100644
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@@ -50,7 +50,7 @@ def val_dataloader(self):
 
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_model_devices_1(tmpdir):
+def test_model_tpu_devices_1(tmpdir):
     """Make sure model trains on TPU."""
     tutils.reset_seed()
     trainer_options = dict(
@@ -90,7 +90,7 @@ def test_model_tpu_index(tmpdir, tpu_core):
 
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_model_devices_8(tmpdir):
+def test_model_tpu_devices_8(tmpdir):
     """Make sure model trains on TPU."""
     tutils.reset_seed()
     trainer_options = dict(
@@ -110,7 +110,7 @@ def test_model_devices_8(tmpdir):
 
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_model_16bit_devices_1(tmpdir):
+def test_model_16bit_tpu_devices_1(tmpdir):
     """Make sure model trains on TPU."""
     tutils.reset_seed()
     trainer_options = dict(
@@ -152,7 +152,7 @@ def test_model_16bit_tpu_index(tmpdir, tpu_core):
 
 @RunIf(tpu=True)
 @pl_multi_process_test
-def test_model_16bit_devices_8(tmpdir):
+def test_model_16bit_tpu_devices_8(tmpdir):
     """Make sure model trains on TPU."""
     tutils.reset_seed()
     trainer_options = dict(