From a23460778955ff57e6fd61f4b1d9eacb49722ddb Mon Sep 17 00:00:00 2001
From: Junhwa Song <ethan9867@gmail.com>
Date: Thu, 1 Dec 2022 02:33:16 +0900
Subject: [PATCH 01/28] Bump ray from 1.9.1 to 2.1.0

Signed-off-by: Junhwa Song <ethan9867@gmail.com>
---
 requirements/runtime.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/runtime.txt b/requirements/runtime.txt
index ab4bf3ac..7cf3a24b 100644
--- a/requirements/runtime.txt
+++ b/requirements/runtime.txt
@@ -1,5 +1,5 @@
 mmcv-full>=1.4.7
 pandas
 protobuf<=3.20
-ray[default]==1.9.1
+ray[default]==2.1.0
 tabulate

From f6e85e4d4f0fcb0120e29388d5260173505c6cbf Mon Sep 17 00:00:00 2001
From: KKIEEK <kkieek@KKIEEKui-MacBookPro.local>
Date: Thu, 1 Dec 2022 14:19:14 +0900
Subject: [PATCH 02/28] Init

---
 siatune/apis/tune.py            | 58 +-------------------
 siatune/mm/tasks/mmtrainbase.py | 24 +++-----
 siatune/ray/__init__.py         |  5 ++
 siatune/ray/tuner.py            | 97 +++++++++++++++++++++++++++++++++
 4 files changed, 114 insertions(+), 70 deletions(-)
 create mode 100644 siatune/ray/tuner.py

diff --git a/siatune/apis/tune.py b/siatune/apis/tune.py
index 40bbc467..60cf7b73 100644
--- a/siatune/apis/tune.py
+++ b/siatune/apis/tune.py
@@ -1,16 +1,10 @@
 # Copyright (c) SI-Analytics. All rights reserved.
-from os import path as osp
 
-import mmcv
 import ray
 from mmcv.utils import Config
 
 from siatune.mm.tasks import BaseTask
-from siatune.ray.callbacks import build_callback
-from siatune.ray.schedulers import build_scheduler
-from siatune.ray.searchers import build_searcher
-from siatune.ray.spaces import build_space
-from siatune.ray.stoppers import build_stopper
+from siatune.ray import Tuner
 
 
 def tune(task_processor: BaseTask, tune_config: Config,
@@ -29,51 +23,5 @@ def tune(task_processor: BaseTask, tune_config: Config,
     trainable_cfg = tune_config.get('trainable', dict())
     trainable = task_processor.create_trainable(**trainable_cfg)
 
-    assert hasattr(tune_config, 'metric')
-    assert hasattr(tune_config, 'mode') and tune_config.mode in ['min', 'max']
-
-    tune_artifact_dir = osp.join(tune_config.work_dir, 'artifact')
-    mmcv.mkdir_or_exist(tune_artifact_dir)
-
-    stopper = tune_config.get('stop', None)
-    if stopper is not None:
-        stopper = build_stopper(stopper)
-
-    space = tune_config.get('space', None)
-    if space is not None:
-        space = build_space(space)
-
-    resources_per_trial = None
-    if not hasattr(trainable, 'default_resource_request'):
-        resources_per_trial = dict(
-            gpu=task_processor.num_workers *
-            task_processor.num_gpus_per_worker,
-            cpu=task_processor.num_workers *
-            task_processor.num_cpus_per_worker)
-
-    searcher = tune_config.get('searcher', None)
-    if searcher is not None:
-        searcher = build_searcher(searcher)
-
-    scheduler = tune_config.get('scheduler', None)
-    if scheduler is not None:
-        scheduler = build_scheduler(scheduler)
-
-    callbacks = tune_config.get('callbacks', None)
-    if callbacks is not None:
-        callbacks = [build_callback(callback) for callback in callbacks]
-
-    return ray.tune.run(
-        trainable,
-        name=exp_name,
-        metric=tune_config.metric,
-        mode=tune_config.mode,
-        stop=stopper,
-        config=space,
-        resources_per_trial=resources_per_trial,
-        num_samples=tune_config.get('num_samples', -1),
-        local_dir=tune_artifact_dir,
-        search_alg=searcher,
-        scheduler=scheduler,
-        raise_on_failed_trial=tune_config.get('raise_on_failed_trial', False),
-        callbacks=callbacks)
+    tuner = Tuner.from_cfg(tune_config, trainable)
+    return tuner.fit()
diff --git a/siatune/mm/tasks/mmtrainbase.py b/siatune/mm/tasks/mmtrainbase.py
index fac8c4f8..227ea0b1 100644
--- a/siatune/mm/tasks/mmtrainbase.py
+++ b/siatune/mm/tasks/mmtrainbase.py
@@ -4,9 +4,9 @@
 from functools import partial
 
 import mmcv
-import ray
 import torch
-from ray.tune.integration.torch import DistributedTrainableCreator
+from ray.air.config import ScalingConfig
+from ray.train.torch import TorchTrainer
 
 from .base import BaseTask
 from .builder import TASKS
@@ -78,8 +78,7 @@ def context_aware_run(self,
     def create_trainable(
         self,
         backend: str = 'nccl',
-        timeout_s: int = 1800,
-    ) -> ray.tune.trainable:
+    ) -> TorchTrainer:
         """Get ray trainable task.
 
         Args:
@@ -94,14 +93,9 @@ def create_trainable(
 
         assert backend in ['gloo', 'nccl']
 
-        return DistributedTrainableCreator(
-            partial(
-                self.context_aware_run,
-                backend=backend,
-            ),
-            backend=backend,
-            timeout_s=timeout_s,
-            num_workers=self.num_workers,
-            num_gpus_per_worker=self.num_gpus_per_worker,
-            num_cpus_per_worker=self.num_cpus_per_worker,
-        )
+        return TorchTrainer(
+            partial(self.context_aware_run, backend=backend),
+            scaling_config=ScalingConfig(
+                resources_per_worker=dict(
+                    CPU=self.num_cpus_per_worker,
+                    GPU=self.num_gpus_per_worker)))
diff --git a/siatune/ray/__init__.py b/siatune/ray/__init__.py
index 061afde0..cb03c07b 100644
--- a/siatune/ray/__init__.py
+++ b/siatune/ray/__init__.py
@@ -1,4 +1,9 @@
 # Copyright (c) SI-Analytics. All rights reserved.
+from .callbacks import *  # noqa F403
 from .schedulers import *  # noqa F403
+from .searchers import *  # noqa F403
 from .spaces import *  # noqa F403
 from .stoppers import *  # noqa F403
+from .tuner import Tuner
+
+__all__ = ['Tuner']
diff --git a/siatune/ray/tuner.py b/siatune/ray/tuner.py
new file mode 100644
index 00000000..c470c5c8
--- /dev/null
+++ b/siatune/ray/tuner.py
@@ -0,0 +1,97 @@
+# Copyright (c) SI-Analytics. All rights reserved.
+import copy
+import os.path as osp
+
+from ray.air.config import RunConfig
+from ray.tune.tune_config import TuneConfig
+from ray.tune.tuner import Tuner as RayTuner
+
+from siatune.ray import (build_callback, build_scheduler, build_searcher,
+                         build_space, build_stopper)
+
+
+class Tuner:
+    """Wrapper class of :class:`ray.tune.tuner.Tuner`.
+
+    Args:
+        trainable (Callable):
+        work_dir (str):
+        param_space (dict, optional):
+        tune_cfg (dict, optional):
+            Refer to https://github.com/ray-project/ray/blob/ray-2.1.0/python/ray/tune/tune_config.py for details.  # noqa
+        searcher (dict, optional):
+        trial_scheduler (dict, optional):
+        stopper (dict, optional):
+        callbacks (list, optional):
+    """
+
+    def __init__(
+        self,
+        trainable,
+        work_dir,
+        param_space=None,
+        tune_cfg=None,
+        searcher=None,
+        trial_scheduler=None,
+        stopper=None,
+        callbacks=None,
+    ):
+        work_dir = osp.abspath(work_dir)
+
+        if param_space is not None:
+            param_space = build_space(param_space)
+
+        tune_cfg = copy.deepcopy(tune_cfg or dict())
+
+        if searcher is not None:
+            searcher = build_searcher(searcher)
+
+        if trial_scheduler is not None:
+            trial_scheduler = build_scheduler(trial_scheduler)
+
+        if stopper is not None:
+            stopper = build_stopper(stopper)
+
+        if callbacks is not None:
+            if isinstance(callbacks, dict):
+                callbacks = [callbacks]
+            callbacks = [build_callback(callback) for callback in callbacks]
+
+        self.tuner = RayTuner(
+            trainable,
+            param_space=param_space,
+            tune_config=TuneConfig(
+                searcher=searcher, trial_scheduler=trial_scheduler,
+                **tune_cfg),
+            run_config=RunConfig(
+                local_dir=work_dir,
+                stop=stopper,
+                callbacks=callbacks,
+                failure_config=None,  # todo
+                sync_config=None,  # todo
+                checkpoint_config=None,  # todo
+            ),
+        )
+
+    @classmethod
+    def from_cfg(cls, cfg, trainable):
+        cfg = copy.deepcopy(cfg)
+        tuner = cls(
+            trainable,
+            work_dir=cfg['work_dir'],
+            param_space=cfg.get('space', None),
+            tune_cfg=cfg.get('tune_cfg', None),
+            searcher=cfg.get('searcher', None),
+            trial_scheduler=cfg.get('trial_scheduler', None),
+            stopper=cfg.get('stopper', None),
+            callbacks=cfg.get('callbacks', None),
+        )
+
+        return tuner
+
+    @classmethod
+    def resume(cls, path, **kwargs):
+        return RayTuner.restore(path, **kwargs)
+
+    def fit(self):
+        return self.tuner.fit()

From cac34ea2b46df005618c0de472b4aab13164a808 Mon Sep 17 00:00:00 2001
From: KKIEEK <kkieek@KKIEEKui-MacBookPro.local>
Date: Thu, 1 Dec 2022 14:34:26 +0900
Subject: [PATCH 03/28] Update mmseg config

---
 configs/_base_/scheduler/asynchb.py          |  2 +-
 configs/mmseg/mmseg_asynchb_nevergrad_pso.py | 16 +++++-----------
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/configs/_base_/scheduler/asynchb.py b/configs/_base_/scheduler/asynchb.py
index 99426e01..a73b6e0d 100644
--- a/configs/_base_/scheduler/asynchb.py
+++ b/configs/_base_/scheduler/asynchb.py
@@ -1,4 +1,4 @@
-scheduler = dict(
+trial_scheduler = dict(
     type='AsyncHyperBandScheduler',
     time_attr='training_iteration',
     max_t=20,
diff --git a/configs/mmseg/mmseg_asynchb_nevergrad_pso.py b/configs/mmseg/mmseg_asynchb_nevergrad_pso.py
index 923122f0..3d81ecd6 100644
--- a/configs/mmseg/mmseg_asynchb_nevergrad_pso.py
+++ b/configs/mmseg/mmseg_asynchb_nevergrad_pso.py
@@ -4,16 +4,10 @@
     '../_base_/space/optimizer.py', '../_base_/space/batch_size.py'
 ]
 
-space = {
-    'model': {{_base_.model}},
-    'optimizer': {{_base_.optimizer}},
-    'data.samples_per_gpu': {{_base_.batch_size}},
-    'model.decode_head.num_classes': 21,
-    'model.auxiliary_head.num_classes': 21,
-}
+space = dict(
+    data=dict(samples_per_gpu={{_base_.batch_size}}),
+    model={{_base_.model}},
+    optimizer={{_base_.optimizer}})
 
 task = dict(type='MMSegmentation')
-metric = 'val/mIoU'
-mode = 'max'
-raise_on_failed_trial = False
-num_samples = 256
+tune_cfg = dict(num_samples=8, metric='val/mIoU', mode='max')

From 756147b2ab91bb16f784ee5d30b4a1141e6d984b Mon Sep 17 00:00:00 2001
From: KKIEEK <kkieek@KKIEEKui-MacBookPro.local>
Date: Thu, 1 Dec 2022 15:22:01 +0900
Subject: [PATCH 04/28] Fix deprecated warning

---
 siatune/mm/tasks/base.py           |  4 ++--
 siatune/mm/tasks/mmtrainbase.py    | 10 +++++-----
 siatune/ray/callbacks/mlflow.py    |  4 ++--
 siatune/ray/schedulers/pbt.py      |  4 ++--
 siatune/ray/searchers/builder.py   |  4 ++--
 siatune/ray/searchers/flaml.py     |  4 ++--
 siatune/ray/searchers/hyperopt.py  |  2 +-
 siatune/ray/searchers/nevergrad.py |  2 +-
 siatune/ray/spaces/base.py         |  3 ++-
 siatune/ray/spaces/choice.py       |  3 ++-
 siatune/ray/spaces/sample_from.py  |  3 ++-
 11 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/siatune/mm/tasks/base.py b/siatune/mm/tasks/base.py
index 23bb98e1..1e419b36 100644
--- a/siatune/mm/tasks/base.py
+++ b/siatune/mm/tasks/base.py
@@ -4,7 +4,7 @@
 from copy import deepcopy
 from typing import Any, Dict, List, Optional, Sequence
 
-import ray
+from ray.tune import Trainable
 
 from siatune.mm.context import ContextManager
 from siatune.utils import ImmutableContainer
@@ -140,7 +140,7 @@ def run(self, *, args: argparse.Namespace, **kwargs) -> None:
         pass
 
     @abstractmethod
-    def create_trainable(self, *args, **kwargs) -> ray.tune.Trainable:
+    def create_trainable(self, *args, **kwargs) -> Trainable:
         """Get ray trainable task.
 
         Args:
diff --git a/siatune/mm/tasks/mmtrainbase.py b/siatune/mm/tasks/mmtrainbase.py
index 227ea0b1..16e6dc7d 100644
--- a/siatune/mm/tasks/mmtrainbase.py
+++ b/siatune/mm/tasks/mmtrainbase.py
@@ -82,13 +82,11 @@ def create_trainable(
         """Get ray trainable task.
 
         Args:
-            backend (str):
-                The backend for dist training. Defaults to 'nccl'.
-            timeout_s (int):
-                Seconds before the torch process group times out.
+            backend (str): The backend for distributed training.
+                Defaults to 'nccl'.
 
         Returns:
-            ray.tune.trainable: The trainable task.
+            TorchTrainer: The trainable task.
         """
 
         assert backend in ['gloo', 'nccl']
@@ -96,6 +94,8 @@ def create_trainable(
         return TorchTrainer(
             partial(self.context_aware_run, backend=backend),
             scaling_config=ScalingConfig(
+                num_workers=2,
+                use_gpu=True,
                 resources_per_worker=dict(
                     CPU=self.num_cpus_per_worker,
                     GPU=self.num_gpus_per_worker)))
diff --git a/siatune/ray/callbacks/mlflow.py b/siatune/ray/callbacks/mlflow.py
index bfadb2c6..c57f1af8 100644
--- a/siatune/ray/callbacks/mlflow.py
+++ b/siatune/ray/callbacks/mlflow.py
@@ -1,10 +1,10 @@
 # Copyright (c) SI-Analytics. All rights reserved.
 from typing import List
 
+from ray.tune.experiment import Trial
 from ray.tune.integration.mlflow import \
     MLflowLoggerCallback as _MLflowLoggerCallback
 from ray.tune.integration.mlflow import logger
-from ray.tune.trial import Trial
 from ray.tune.utils.util import is_nan_or_inf
 
 from .builder import CALLBACKS
@@ -73,7 +73,7 @@ def log_trial_start(self, trial: 'Trial'):
         set the parent run ID.
 
         Args:
-            trial (Trial): `ray.tune.trial.Trial`
+            trial (Trial): :class:`ray.tune.experiment.trial.Trial`
         """
         # Create run if not already exists.
         if trial not in self._trial_runs:
diff --git a/siatune/ray/schedulers/pbt.py b/siatune/ray/schedulers/pbt.py
index ddfa982b..9280e9bc 100644
--- a/siatune/ray/schedulers/pbt.py
+++ b/siatune/ray/schedulers/pbt.py
@@ -3,10 +3,10 @@
 import random
 from typing import Callable, Dict, Optional
 
-from ray.tune.sample import Domain
+from ray.tune.experiment import Trial
 from ray.tune.schedulers.pbt import \
     PopulationBasedTraining as _PopulationBasedTraining
-from ray.tune.trial import Trial
+from ray.tune.search.sample import Domain
 
 from siatune.ray.schedulers import SCHEDULERS
 from siatune.ray.spaces import build_space
diff --git a/siatune/ray/searchers/builder.py b/siatune/ray/searchers/builder.py
index 6112c8c1..2d81a137 100644
--- a/siatune/ray/searchers/builder.py
+++ b/siatune/ray/searchers/builder.py
@@ -1,11 +1,11 @@
 # Copyright (c) SI-Analytics. All rights reserved.
 from mmcv.utils import Config, Registry
-from ray import tune
+from ray.tune.search import Searcher
 
 SEARCHERS = Registry('searchers')
 
 
-def build_searcher(cfg: Config) -> tune.suggest.Searcher:
+def build_searcher(cfg: Config) -> Searcher:
     """Build the searcher from configs.
 
     Args:
diff --git a/siatune/ray/searchers/flaml.py b/siatune/ray/searchers/flaml.py
index 800385ff..b984c1e8 100644
--- a/siatune/ray/searchers/flaml.py
+++ b/siatune/ray/searchers/flaml.py
@@ -1,6 +1,6 @@
 # Copyright (c) SI-Analytics. All rights reserved.
-from ray.tune.suggest.flaml import CFO as _CFO
-from ray.tune.suggest.flaml import BlendSearch as _BlendSearch
+from ray.tune.search.flaml import CFO as _CFO
+from ray.tune.search.flaml import BlendSearch as _BlendSearch
 
 from .builder import SEARCHERS
 
diff --git a/siatune/ray/searchers/hyperopt.py b/siatune/ray/searchers/hyperopt.py
index d62780d4..37921cfc 100644
--- a/siatune/ray/searchers/hyperopt.py
+++ b/siatune/ray/searchers/hyperopt.py
@@ -1,5 +1,5 @@
 # Copyright (c) SI-Analytics. All rights reserved.
-from ray.tune.suggest.hyperopt import HyperOptSearch as _HyperOptSearch
+from ray.tune.search.hyperopt import HyperOptSearch as _HyperOptSearch
 
 from .builder import SEARCHERS
 
diff --git a/siatune/ray/searchers/nevergrad.py b/siatune/ray/searchers/nevergrad.py
index 10a6e8fe..b4e401c7 100644
--- a/siatune/ray/searchers/nevergrad.py
+++ b/siatune/ray/searchers/nevergrad.py
@@ -2,7 +2,7 @@
 from typing import Dict, List, Optional, Union
 
 from ray.tune.result import DEFAULT_METRIC
-from ray.tune.suggest.nevergrad import NevergradSearch as _NevergradSearch
+from ray.tune.search.nevergrad import NevergradSearch as _NevergradSearch
 
 from .builder import SEARCHERS
 
diff --git a/siatune/ray/spaces/base.py b/siatune/ray/spaces/base.py
index f6e32336..5fd1f3cd 100644
--- a/siatune/ray/spaces/base.py
+++ b/siatune/ray/spaces/base.py
@@ -3,6 +3,7 @@
 from typing import Callable
 
 import ray.tune as tune
+from ray.tune.search.sample import Domain
 
 from .builder import SPACES
 
@@ -15,7 +16,7 @@ def __init__(self, **kwargs) -> None:
         self.kwargs = kwargs
 
     @property
-    def space(self) -> tune.sample.Domain:
+    def space(self) -> Domain:
         """Return the space."""
         return self.sample.__func__(**self.kwargs)
 
diff --git a/siatune/ray/spaces/choice.py b/siatune/ray/spaces/choice.py
index e9cc7c0f..b68ea54d 100644
--- a/siatune/ray/spaces/choice.py
+++ b/siatune/ray/spaces/choice.py
@@ -2,6 +2,7 @@
 from typing import Callable, Optional, Sequence
 
 import ray.tune as tune
+from ray.tune.search.sample import Domain
 
 from siatune.utils import ImmutableContainer
 from .base import BaseSpace
@@ -31,5 +32,5 @@ def __init__(self,
         self.categories = categories
 
     @property
-    def space(self) -> tune.sample.Domain:
+    def space(self) -> Domain:
         return self.sample.__func__(self.categories)
diff --git a/siatune/ray/spaces/sample_from.py b/siatune/ray/spaces/sample_from.py
index 1772e83e..314b5b18 100644
--- a/siatune/ray/spaces/sample_from.py
+++ b/siatune/ray/spaces/sample_from.py
@@ -2,6 +2,7 @@
 from typing import Callable, Union
 
 import ray.tune as tune
+from ray.tune.search.sample import Domain
 
 from .base import BaseSpace
 from .builder import SPACES
@@ -25,5 +26,5 @@ def __init__(self, func: Union[str, Callable]) -> None:
         self.func = func
 
     @property
-    def space(self) -> tune.sample.Domain:
+    def space(self) -> Domain:
         return self.sample.__func__(self.func)

From 73aa245da315cf16c3f04b04a908d9d91b934d27 Mon Sep 17 00:00:00 2001
From: KKIEEK <kkieek@KKIEEKui-MacBookPro.local>
Date: Thu, 1 Dec 2022 16:07:57 +0900
Subject: [PATCH 05/28] Fix trainable function signature

---
 siatune/mm/tasks/base.py           |  9 +++------
 siatune/mm/tasks/mmtrainbase.py    | 31 ++----------------------------
 siatune/ray/searchers/nevergrad.py |  1 -
 siatune/ray/tuner.py               |  5 ++---
 4 files changed, 7 insertions(+), 39 deletions(-)

diff --git a/siatune/mm/tasks/base.py b/siatune/mm/tasks/base.py
index 1e419b36..0558dfbe 100644
--- a/siatune/mm/tasks/base.py
+++ b/siatune/mm/tasks/base.py
@@ -106,16 +106,14 @@ def parse_args(self, args: Sequence[str]) -> argparse.Namespace:
         """
         pass
 
-    def context_aware_run(self,
-                          searched_cfg: Dict,
-                          checkpoint_dir: Optional[str] = None,
-                          **kwargs) -> Any:
+    def context_aware_run(self, searched_cfg: Dict) -> Any:
         """Gather and refine the information received by users and Ray.tune to
         execute the objective task.
 
         Args:
             searched_cfg (Dict): The searched configuration.
             kwargs (**kwargs): The kwargs.
+
         Returns:
             Any: The result of the objective task.
         """
@@ -124,9 +122,8 @@ def context_aware_run(self,
         context = dict(
             args=deepcopy(self.args),
             searched_cfg=deepcopy(ImmutableContainer.decouple(searched_cfg)),
-            checkpoint_dir=checkpoint_dir,
+            # checkpoint_dir=checkpoint_dir,
         )
-        context.update(kwargs)
         return context_manager(self.run)(**context)
 
     @abstractmethod
diff --git a/siatune/mm/tasks/mmtrainbase.py b/siatune/mm/tasks/mmtrainbase.py
index 16e6dc7d..619f91d4 100644
--- a/siatune/mm/tasks/mmtrainbase.py
+++ b/siatune/mm/tasks/mmtrainbase.py
@@ -1,7 +1,5 @@
 # Copyright (c) SI-Analytics. All rights reserved.
-import os
 from abc import ABCMeta, abstractmethod
-from functools import partial
 
 import mmcv
 import torch
@@ -56,29 +54,7 @@ def train_model(
         """
         pass
 
-    def context_aware_run(self,
-                          searched_cfg,
-                          backend='nccl',
-                          **kwargs) -> None:
-        """Gather and refine the information received by users and Ray.tune to
-        execute the objective task.
-
-        Args:
-            searched_cfg (Config): The searched configs.
-            backend (str):
-                The backend for dist training. Defaults to 'nccl'.
-            kwargs (**kwargs): The kwargs.
-        """
-        # set non blocking mode on the nccl backend
-        # https://github.com/pytorch/pytorch/issues/50820
-        if backend == 'nccl' and os.getenv('NCCL_BLOCKING_WAIT') is None:
-            os.environ['NCCL_BLOCKING_WAIT'] = '0'
-        return super().context_aware_run(searched_cfg, **kwargs)
-
-    def create_trainable(
-        self,
-        backend: str = 'nccl',
-    ) -> TorchTrainer:
+    def create_trainable(self) -> TorchTrainer:
         """Get ray trainable task.
 
         Args:
@@ -88,11 +64,8 @@ def create_trainable(
         Returns:
             TorchTrainer: The trainable task.
         """
-
-        assert backend in ['gloo', 'nccl']
-
         return TorchTrainer(
-            partial(self.context_aware_run, backend=backend),
+            self.context_aware_run,
             scaling_config=ScalingConfig(
                 num_workers=2,
                 use_gpu=True,
diff --git a/siatune/ray/searchers/nevergrad.py b/siatune/ray/searchers/nevergrad.py
index b4e401c7..d4620f8b 100644
--- a/siatune/ray/searchers/nevergrad.py
+++ b/siatune/ray/searchers/nevergrad.py
@@ -68,7 +68,6 @@ def __init__(self,
             metric=metric,
             mode=mode,
             points_to_evaluate=points_to_evaluate,
-            max_concurrent=None,
             **kwargs)
 
     def _setup_nevergrad(self) -> None:
diff --git a/siatune/ray/tuner.py b/siatune/ray/tuner.py
index c470c5c8..73025de7 100644
--- a/siatune/ray/tuner.py
+++ b/siatune/ray/tuner.py
@@ -59,10 +59,9 @@ def __init__(
 
         self.tuner = RayTuner(
             trainable,
-            param_space=param_space,
+            param_space=dict(train_loop_config=param_space),
             tune_config=TuneConfig(
-                searcher=searcher, trial_scheduler=trial_scheduler,
-                **tune_cfg),
+                search_alg=searcher, scheduler=trial_scheduler, **tune_cfg),
             run_config=RunConfig(
                 local_dir=work_dir,
                 stop=stopper,

From f8fa7b262897f55962581e367f730fde297e971a Mon Sep 17 00:00:00 2001
From: KKIEEK <kkieek@KKIEEKui-MacBookPro.local>
Date: Thu, 1 Dec 2022 18:21:40 +0900
Subject: [PATCH 06/28] Fix rewriter

---
 siatune/mm/context/rewriters/dump.py | 4 ++--
 siatune/mm/context/rewriters/path.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/siatune/mm/context/rewriters/dump.py b/siatune/mm/context/rewriters/dump.py
index 6267c48b..8bfa48db 100644
--- a/siatune/mm/context/rewriters/dump.py
+++ b/siatune/mm/context/rewriters/dump.py
@@ -3,7 +3,7 @@
 from os import path as osp
 from typing import Dict
 
-import ray
+from ray.air import session
 
 from siatune.utils import dump_cfg
 from .base import BaseRewriter
@@ -46,7 +46,7 @@ def __call__(self, context: Dict) -> Dict:
             Dict: The context after rewriting.
         """
         cfg = context.pop(self.key)
-        trial_id = ray.tune.get_trial_id()
+        trial_id = session.get_trial_id()
         tmp_path = self.get_temporary_path(f'{trial_id}.py')
         setattr(context.get('args'), self.arg_name, tmp_path)
         dump_cfg(cfg, tmp_path)
diff --git a/siatune/mm/context/rewriters/path.py b/siatune/mm/context/rewriters/path.py
index 041bb36c..49b45d86 100644
--- a/siatune/mm/context/rewriters/path.py
+++ b/siatune/mm/context/rewriters/path.py
@@ -1,7 +1,7 @@
 # Copyright (c) SI-Analytics. All rights reserved.
 from os import path as osp
 
-import ray
+from ray.air import session
 
 from .base import BaseRewriter
 from .builder import REWRITERS
@@ -31,5 +31,5 @@ def __call__(self, context: dict) -> dict:
         """
         value = getattr(context['args'], self.arg_name)
         setattr(context['args'], self.arg_name,
-                osp.join(value, ray.tune.get_trial_id()))
+                osp.join(value, session.get_trial_id()))
         return context

From 5d3ac5ba427a3eeeb7c03a8c0ad197ed3942d0bd Mon Sep 17 00:00:00 2001
From: KKIEEK <kkieek@KKIEEKui-MacBookPro.local>
Date: Thu, 1 Dec 2022 20:03:57 +0900
Subject: [PATCH 07/28] Fix minor

---
 siatune/mm/tasks/base.py        | 1 -
 siatune/mm/tasks/mmseg.py       | 2 --
 siatune/mm/tasks/mmtrainbase.py | 9 +++++----
 siatune/ray/tuner.py            | 2 +-
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/siatune/mm/tasks/base.py b/siatune/mm/tasks/base.py
index 0558dfbe..a2352b1d 100644
--- a/siatune/mm/tasks/base.py
+++ b/siatune/mm/tasks/base.py
@@ -122,7 +122,6 @@ def context_aware_run(self, searched_cfg: Dict) -> Any:
         context = dict(
             args=deepcopy(self.args),
             searched_cfg=deepcopy(ImmutableContainer.decouple(searched_cfg)),
-            # checkpoint_dir=checkpoint_dir,
         )
         return context_manager(self.run)(**context)
 
diff --git a/siatune/mm/tasks/mmseg.py b/siatune/mm/tasks/mmseg.py
index 98b82564..205a396c 100644
--- a/siatune/mm/tasks/mmseg.py
+++ b/siatune/mm/tasks/mmseg.py
@@ -80,7 +80,6 @@ def build_model(self,
                 The train opt. Defaults to None.
             test_cfg (Optional[Config]):
                 The Test opt. Defaults to None.
-
         Returns:
             torch.nn.Module: The model.
         """
@@ -98,7 +97,6 @@ def build_dataset(
             cfg (Config): The configs.
             default_args (Optional[Config]):
                 The default args. Defaults to None.
-
         Returns:
             torch.utils.data.Dataset: The dataset.
         """
diff --git a/siatune/mm/tasks/mmtrainbase.py b/siatune/mm/tasks/mmtrainbase.py
index 619f91d4..0c57a852 100644
--- a/siatune/mm/tasks/mmtrainbase.py
+++ b/siatune/mm/tasks/mmtrainbase.py
@@ -4,7 +4,7 @@
 import mmcv
 import torch
 from ray.air.config import ScalingConfig
-from ray.train.torch import TorchTrainer
+from ray.train.torch import TorchConfig, TorchTrainer
 
 from .base import BaseTask
 from .builder import TASKS
@@ -67,8 +67,9 @@ def create_trainable(self) -> TorchTrainer:
         return TorchTrainer(
             self.context_aware_run,
             scaling_config=ScalingConfig(
-                num_workers=2,
-                use_gpu=True,
+                num_workers=self.num_workers,
+                use_gpu=torch.cuda.is_available(),
                 resources_per_worker=dict(
                     CPU=self.num_cpus_per_worker,
-                    GPU=self.num_gpus_per_worker)))
+                    GPU=self.num_gpus_per_worker)),
+            torch_config=TorchConfig(backend='gloo'))
diff --git a/siatune/ray/tuner.py b/siatune/ray/tuner.py
index 73025de7..e1aaf079 100644
--- a/siatune/ray/tuner.py
+++ b/siatune/ray/tuner.py
@@ -90,7 +90,7 @@ def from_cfg(cls, cfg, trainable):
 
     @classmethod
     def resume(cls, path, **kwargs):
-        return RayTuner.restore(path, **kwargs)
+        return cls.restore(path, **kwargs)
 
     def fit(self):
         return self.tuner.fit()

From 04a5250934b36b1fda7e63bd9662deae9eaf1d14 Mon Sep 17 00:00:00 2001
From: KKIEEK <kkieek@KKIEEKui-MacBookPro.local>
Date: Fri, 2 Dec 2022 15:10:53 +0900
Subject: [PATCH 08/28] Fix reporter

---
 siatune/mm/hooks/reporter.py    | 4 ++--
 siatune/mm/tasks/mmtrainbase.py | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/siatune/mm/hooks/reporter.py b/siatune/mm/hooks/reporter.py
index 47ec9880..e769843f 100644
--- a/siatune/mm/hooks/reporter.py
+++ b/siatune/mm/hooks/reporter.py
@@ -1,8 +1,8 @@
 # Copyright (c) SI-Analytics. All rights reserved.
-import ray
 from mmcv.runner import HOOKS, BaseRunner
 from mmcv.runner.dist_utils import get_dist_info
 from mmcv.runner.hooks.logger import LoggerHook
+from ray.air import session
 from torch import distributed as dist
 
 
@@ -90,4 +90,4 @@ def log(self, runner: BaseRunner) -> None:
                 filter(lambda elem: self.filtering_key in elem, tags.keys())):
             return
         tags['global_step'] = self.get_iter(runner)
-        ray.tune.report(**tags)
+        session.report(tags)
diff --git a/siatune/mm/tasks/mmtrainbase.py b/siatune/mm/tasks/mmtrainbase.py
index 0c57a852..c4103a1d 100644
--- a/siatune/mm/tasks/mmtrainbase.py
+++ b/siatune/mm/tasks/mmtrainbase.py
@@ -67,9 +67,9 @@ def create_trainable(self) -> TorchTrainer:
         return TorchTrainer(
             self.context_aware_run,
             scaling_config=ScalingConfig(
-                num_workers=self.num_workers,
-                use_gpu=torch.cuda.is_available(),
-                resources_per_worker=dict(
+                trainer_resources=dict(
                     CPU=self.num_cpus_per_worker,
-                    GPU=self.num_gpus_per_worker)),
+                    GPU=self.num_gpus_per_worker),
+                num_workers=self.num_workers,
+                use_gpu=torch.cuda.is_available()),
             torch_config=TorchConfig(backend='gloo'))

From 59a86da980e52286685bd752fc52397f15618325 Mon Sep 17 00:00:00 2001
From: KKIEEK <kkieek@KKIEEKui-MacBookPro.local>
Date: Fri, 2 Dec 2022 15:49:15 +0900
Subject: [PATCH 09/28] Fix apis

---
 siatune/apis/analysis.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/siatune/apis/analysis.py b/siatune/apis/analysis.py
index 48b5c9c0..31e25d56 100644
--- a/siatune/apis/analysis.py
+++ b/siatune/apis/analysis.py
@@ -5,19 +5,19 @@
 from typing import Optional
 
 from mmcv.utils import Config, get_logger
-from ray import tune
+from ray.tune import ResultGrid
 
 from siatune.utils import ImmutableContainer, dump_cfg
 
 
-def log_analysis(analysis: tune.ExperimentAnalysis,
+def log_analysis(results: ResultGrid,
                  tune_config: Config,
                  task_config: Optional[Config] = None,
                  log_dir: Optional[str] = None) -> None:
     """Log the analysis of the experiment.
 
     Args:
-        analysis (tune.ExperimentAnalysis): The analysis of the experiment.
+        results (ResultGrid): Result of `Tuner.fit()`.
         tune_config (Config): The tune config.
         task_config (Optional[Config]): The task config. Defaults to None.
         log_dir (Optional[str]): The log dir. Defaults to None.
@@ -33,10 +33,9 @@ def log_analysis(analysis: tune.ExperimentAnalysis,
     logger = get_logger(
         'siatune', log_file=osp.join(log_dir, f'{timestamp}.log'))
 
-    logger.info(
-        f'Best Hyperparam: \n'
-        f'{pformat(ImmutableContainer.decouple(analysis.best_config))}')
-    logger.info(
-        f'Best Results: \n'
-        f'{pformat(ImmutableContainer.decouple(analysis.best_result))}')
-    logger.info(f'Best Logdir: {analysis.best_logdir}')
+    result = results.get_best_result()
+    logger.info(f'Best Result: \n'
+                f'{pformat(ImmutableContainer.decouple(result))}')
+    logger.info(f'Best Hyperparam: \n'
+                f'{pformat(ImmutableContainer.decouple(result.config))}')
+    logger.info(f'Best Logdir: {result.log_dir}')

From 4fb42dd8c14ffb7344044c670f925532ca1387bc Mon Sep 17 00:00:00 2001
From: KKIEEK <kkieek@KKIEEKui-MacBookPro.local>
Date: Fri, 2 Dec 2022 20:28:13 +0900
Subject: [PATCH 10/28] Fix RayCheckpointHook

---
 siatune/mm/hooks/checkpoint.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/siatune/mm/hooks/checkpoint.py b/siatune/mm/hooks/checkpoint.py
index faada1dc..f0a1e135 100644
--- a/siatune/mm/hooks/checkpoint.py
+++ b/siatune/mm/hooks/checkpoint.py
@@ -10,7 +10,7 @@
 from mmcv.runner.checkpoint import get_state_dict, weights_to_cpu
 from mmcv.runner.dist_utils import master_only
 from mmcv.runner.hooks import CheckpointHook as _CheckpointHook
-from ray.tune.integration.torch import distributed_checkpoint_dir
+from ray.air import session
 from torch.optim import Optimizer
 
 
@@ -100,9 +100,7 @@ def _save_checkpoint(self, runner: BaseRunner) -> None:
             for name, optim in optimizer.items():
                 checkpoint['optimizer'][name] = optim.state_dict()
 
-        with distributed_checkpoint_dir(
-                step=(runner.epoch + 1) //
-                self.interval if self.by_epoch else (runner.iter + 1) //
-                self.interval) as checkpoint_dir:
+        ckpt = session.get_checkpoint()
+        with ckpt.as_directory() as checkpoint_dir:
             path = os.path.join(checkpoint_dir, 'ray_ckpt.pth')
             torch.save(checkpoint, path)

From 2c1215c8f2574248e24b7166dac2576dc2109863 Mon Sep 17 00:00:00 2001
From: KKIEEK <kkieek@KKIEEKui-MacBookPro.local>
Date: Fri, 2 Dec 2022 20:58:37 +0900
Subject: [PATCH 11/28] Fix requirements

---
 requirements/optional.txt | 2 +-
 requirements/runtime.txt  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements/optional.txt b/requirements/optional.txt
index a5b95cc0..8ef39eb3 100644
--- a/requirements/optional.txt
+++ b/requirements/optional.txt
@@ -4,5 +4,5 @@ hyperopt==0.2.5
 mlflow==1.21.0
 nevergrad==0.4.3.post7
 optuna==2.10.0
+scikit-learn
 scikit-optimize==0.9.0
-sklearn
diff --git a/requirements/runtime.txt b/requirements/runtime.txt
index 7cf3a24b..d9ef98e1 100644
--- a/requirements/runtime.txt
+++ b/requirements/runtime.txt
@@ -1,5 +1,6 @@
 mmcv-full>=1.4.7
 pandas
 protobuf<=3.20
+pyarrow
 ray[default]==2.1.0
 tabulate

From 709bb9cc4f6b3d551374b302efd854862d903ed5 Mon Sep 17 00:00:00 2001
From: KKIEEK <kkieek@KKIEEKui-MacBookPro.local>
Date: Fri, 2 Dec 2022 21:36:43 +0900
Subject: [PATCH 12/28] Fix test code for rewriters

---
 tests/test_mm/test_rewriters.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_mm/test_rewriters.py b/tests/test_mm/test_rewriters.py
index 282c29d7..5cdd9329 100644
--- a/tests/test_mm/test_rewriters.py
+++ b/tests/test_mm/test_rewriters.py
@@ -33,7 +33,7 @@ def __call__(self, context: Dict) -> Dict:
         build_rewriter(dict(type='DummyRewriter')), DummyRewriter)
 
 
-@patch('ray.tune.get_trial_id')
+@patch('ray.air.session.get_trial_id')
 def test_dump(mock_get_trial_id):
     mock_get_trial_id.return_value = 'test'
     dump = Dump(key='cfg', arg_name='config')
@@ -99,7 +99,7 @@ def test_patch():
     })._cfg_dict
 
 
-@patch('ray.tune.get_trial_id')
+@patch('ray.air.session.get_trial_id')
 def test_append_trial_id_to_path(mock_get_trial_id):
     mock_get_trial_id.return_value = 'test'
     args = MagicMock()

From 940320bf76b4108b25078592a318e0b27ca4c204 Mon Sep 17 00:00:00 2001
From: KKIEEK <kkieek@KKIEEKui-MacBookPro.local>
Date: Fri, 2 Dec 2022 21:54:21 +0900
Subject: [PATCH 13/28] Fix test code for hooks

---
 siatune/mm/hooks/checkpoint.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/siatune/mm/hooks/checkpoint.py b/siatune/mm/hooks/checkpoint.py
index f0a1e135..4c2109e4 100644
--- a/siatune/mm/hooks/checkpoint.py
+++ b/siatune/mm/hooks/checkpoint.py
@@ -4,13 +4,13 @@
 from typing import Optional
 
 import mmcv
+import ray.tune as tune
 import torch
 from mmcv.parallel import is_module_wrapper
 from mmcv.runner import HOOKS, BaseRunner
 from mmcv.runner.checkpoint import get_state_dict, weights_to_cpu
 from mmcv.runner.dist_utils import master_only
 from mmcv.runner.hooks import CheckpointHook as _CheckpointHook
-from ray.air import session
 from torch.optim import Optimizer
 
 
@@ -100,7 +100,10 @@ def _save_checkpoint(self, runner: BaseRunner) -> None:
             for name, optim in optimizer.items():
                 checkpoint['optimizer'][name] = optim.state_dict()
 
-        ckpt = session.get_checkpoint()
-        with ckpt.as_directory() as checkpoint_dir:
+        step = (runner.epoch + 1) // self.interval
+        if not self.by_epoch:
+            step //= runner.iter + 1
+
+        with tune.checkpoint_dir(step=step) as checkpoint_dir:
             path = os.path.join(checkpoint_dir, 'ray_ckpt.pth')
             torch.save(checkpoint, path)

From cddfc3c7b3a29f00fadaea5c919fa42f9ea08511 Mon Sep 17 00:00:00 2001
From: KKIEEK <kkieek@KKIEEKui-MacBookPro.local>
Date: Fri, 2 Dec 2022 23:38:38 +0900
Subject: [PATCH 14/28] Fix test code for tasks

---
 tests/test_mm/test_tasks.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/test_mm/test_tasks.py b/tests/test_mm/test_tasks.py
index f0d6d0c2..72a7df38 100644
--- a/tests/test_mm/test_tasks.py
+++ b/tests/test_mm/test_tasks.py
@@ -6,6 +6,7 @@
 import torch
 from mmcv.utils import Config
 from ray import tune
+from ray.air import session
 
 from siatune.mm.tasks import (TASKS, BaseTask, BlackBoxTask,
                               ContinuousTestFunction, DiscreteTestFunction,
@@ -194,7 +195,7 @@ def test_mmcls(*not_used):
     task.run(args=task.args)
 
 
-@patch('ray.tune.report', side_effect=report_to_session)
+@patch('ray.air.session.report', side_effect=report_to_session)
 def test_mm_train_based_task(mock_report):
     with pytest.raises(TypeError):
         MMTrainBasedTask()
@@ -250,7 +251,7 @@ def train_model(self, model, dataset, cfg):
                     loss.backward()
                     optimizer.step()
                     total_loss += loss.item()
-                tune.report(loss=total_loss / (batch_idx + 1))
+                session.report(loss=total_loss / (batch_idx + 1))
 
         def run(self, *, searched_cfg, **kwargs):
             cfg = searched_cfg.get('cfg')
@@ -275,4 +276,6 @@ def run(self, *, searched_cfg, **kwargs):
     task.set_resource(1, 0, 1)
     task.context_aware_run(searched_cfg=dict(cfg=cfg))
     assert 'loss' in get_session()
-    tune.run(task.create_trainable(backend='gloo'), config=dict(cfg=cfg))
+
+    trainable = task.create_trainable()
+    tune.Tuner(trainable).fit()

From b47f3c00f7266d20c446e391c948ea6d6addf542 Mon Sep 17 00:00:00 2001
From: KKIEEK <kkieek@KKIEEKui-MacBookPro.local>
Date: Sat, 3 Dec 2022 04:48:48 +0900
Subject: [PATCH 15/28] Fix test code for apis

---
 tests/test_apis/test_apis.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/tests/test_apis/test_apis.py b/tests/test_apis/test_apis.py
index 16260ade..40a4b64c 100644
--- a/tests/test_apis/test_apis.py
+++ b/tests/test_apis/test_apis.py
@@ -1,9 +1,10 @@
 import os
+import random
 import tempfile
 from unittest.mock import MagicMock
 
 import mmcv
-from ray.tune.trainable import Trainable
+import ray
 
 from siatune.apis import log_analysis, tune
 
@@ -37,14 +38,11 @@ def test_log_analysis():
 
 def test_tune():
 
-    class TestTrainable(Trainable):
-
-        def step(self):
-            result = {'name': self.trial_name, 'trial_id': self.trial_id}
-            return result
+    def trainable(config):
+        ray.tune.report({'metric': random.random()})
 
     mock_task_processor = MagicMock()
-    mock_task_processor.create_trainable.return_value = TestTrainable
+    mock_task_processor.create_trainable.return_value = trainable
     with tempfile.TemporaryDirectory() as tmpdir:
         tune_config = mmcv.Config(
             dict(

From ca42bfc6749f72530a956d90c779db12a5f40211 Mon Sep 17 00:00:00 2001
From: Younghwan Na <100389977+yhna940@users.noreply.github.com>
Date: Thu, 15 Dec 2022 10:30:03 +0900
Subject: [PATCH 16/28] :memo: Del checkpoint for base task proc

---
 siatune/mm/tasks/base.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/siatune/mm/tasks/base.py b/siatune/mm/tasks/base.py
index a2352b1d..364c4b2f 100644
--- a/siatune/mm/tasks/base.py
+++ b/siatune/mm/tasks/base.py
@@ -29,9 +29,7 @@ class BaseTask(metaclass=ABCMeta):
             1. args (argparse.Namespace): The low level CLI arguments.
             2. searched_cfg (Dict):
                 The configuration searched by the algorithm.
-            3. checkpoint_dir (Optional[str]):
-                The directory of checkpoints that contains the states.
-        Inputs: searched_cfg (Dict), checkpoint_dir (Optional[str])
+        Inputs: searched_cfg (Dict)
         Outputs: None
     """
 

From 411f3077a9704cb5458204da9857987e987f5e3b Mon Sep 17 00:00:00 2001
From: Junhwa Song <ethan9867@gmail.com>
Date: Thu, 15 Dec 2022 10:55:54 +0900
Subject: [PATCH 17/28] Update siatune/apis/analysis.py

Co-authored-by: Hakjin Lee <nijkah@gmail.com>
Signed-off-by: Junhwa Song <ethan9867@gmail.com>
---
 siatune/apis/analysis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/siatune/apis/analysis.py b/siatune/apis/analysis.py
index 31e25d56..19b1f936 100644
--- a/siatune/apis/analysis.py
+++ b/siatune/apis/analysis.py
@@ -17,7 +17,7 @@ def log_analysis(results: ResultGrid,
     """Log the analysis of the experiment.
 
     Args:
-        results (ResultGrid): Result of `Tuner.fit()`.
+        results (ResultGrid): Experiment results of `Tuner.fit()`.
         tune_config (Config): The tune config.
         task_config (Optional[Config]): The task config. Defaults to None.
         log_dir (Optional[str]): The log dir. Defaults to None.

From 791111d7b3c8d2d096e826d91c0dba9f87b4aa26 Mon Sep 17 00:00:00 2001
From: Junhwa Song <ethan9867@gmail.com>
Date: Thu, 15 Dec 2022 11:40:48 +0900
Subject: [PATCH 18/28] Update siatune/mm/tasks/mmtrainbase.py

Co-authored-by: Hakjin Lee <nijkah@gmail.com>
Signed-off-by: Junhwa Song <ethan9867@gmail.com>
---
 siatune/mm/tasks/mmtrainbase.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/siatune/mm/tasks/mmtrainbase.py b/siatune/mm/tasks/mmtrainbase.py
index c4103a1d..7aed4ae4 100644
--- a/siatune/mm/tasks/mmtrainbase.py
+++ b/siatune/mm/tasks/mmtrainbase.py
@@ -58,8 +58,6 @@ def create_trainable(self) -> TorchTrainer:
         """Get ray trainable task.
 
         Args:
-            backend (str): The backend for distributed training.
-                Defaults to 'nccl'.
 
         Returns:
             TorchTrainer: The trainable task.

From 703d5a18f87d0990b313ab0151f67958b604ed0c Mon Sep 17 00:00:00 2001
From: Junhwa Song <ethan9867@gmail.com>
Date: Thu, 15 Dec 2022 12:19:24 +0900
Subject: [PATCH 19/28] Update siatune/mm/tasks/mmtrainbase.py

Co-authored-by: Hakjin Lee <nijkah@gmail.com>
Signed-off-by: Junhwa Song <ethan9867@gmail.com>
---
 siatune/mm/tasks/mmtrainbase.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/siatune/mm/tasks/mmtrainbase.py b/siatune/mm/tasks/mmtrainbase.py
index 7aed4ae4..d9213f96 100644
--- a/siatune/mm/tasks/mmtrainbase.py
+++ b/siatune/mm/tasks/mmtrainbase.py
@@ -57,8 +57,6 @@ def train_model(
     def create_trainable(self) -> TorchTrainer:
         """Get ray trainable task.
 
-        Args:
-
         Returns:
             TorchTrainer: The trainable task.
         """

From 9eda02d45ef6d250c63834b7f1caf82920a76701 Mon Sep 17 00:00:00 2001
From: Junhwa Song <ethan9867@gmail.com>
Date: Thu, 15 Dec 2022 13:09:09 +0900
Subject: [PATCH 20/28] Support custom trainer and backend (#91)

* Support custom trainer and backend

* Add comment

* Add assertion

* Fix typo

* Update siatune/ray/config.py

* Apply lint

* Fix test code

Co-authored-by: Hakjin Lee <nijkah@gmail.com>
---
 siatune/mm/tasks/mmtrainbase.py | 14 +++++---
 siatune/ray/config.py           | 57 +++++++++++++++++++++++++++++++++
 tests/test_mm/test_tasks.py     |  2 +-
 3 files changed, 67 insertions(+), 6 deletions(-)
 create mode 100644 siatune/ray/config.py

diff --git a/siatune/mm/tasks/mmtrainbase.py b/siatune/mm/tasks/mmtrainbase.py
index d9213f96..b3c94937 100644
--- a/siatune/mm/tasks/mmtrainbase.py
+++ b/siatune/mm/tasks/mmtrainbase.py
@@ -4,8 +4,9 @@
 import mmcv
 import torch
 from ray.air.config import ScalingConfig
-from ray.train.torch import TorchConfig, TorchTrainer
+from ray.train.data_parallel_trainer import DataParallelTrainer
 
+from siatune.ray.config import CustomBackendConfig
 from .base import BaseTask
 from .builder import TASKS
 
@@ -54,18 +55,21 @@ def train_model(
         """
         pass
 
-    def create_trainable(self) -> TorchTrainer:
+    def create_trainable(self) -> DataParallelTrainer:
         """Get ray trainable task.
 
         Returns:
             TorchTrainer: The trainable task.
         """
-        return TorchTrainer(
+        assert self.num_workers == self.num_gpus_per_worker, (
+            '`num_workers` must be equal to `num_gpus_per_worker`.')
+
+        return DataParallelTrainer(
             self.context_aware_run,
+            backend_config=CustomBackendConfig(),
             scaling_config=ScalingConfig(
                 trainer_resources=dict(
                     CPU=self.num_cpus_per_worker,
                     GPU=self.num_gpus_per_worker),
                 num_workers=self.num_workers,
-                use_gpu=torch.cuda.is_available()),
-            torch_config=TorchConfig(backend='gloo'))
+                use_gpu=torch.cuda.is_available()))
diff --git a/siatune/ray/config.py b/siatune/ray/config.py
new file mode 100644
index 00000000..75794798
--- /dev/null
+++ b/siatune/ray/config.py
@@ -0,0 +1,57 @@
+# Copyright (c) SI-Analytics. All rights reserved.
+# Modified from https://github.com/ray-project/ray/blob/ray-2.1.0/python/ray/train/torch/config.py  # noqa
+
+import logging
+import os
+from dataclasses import dataclass
+
+import ray
+import torch.distributed as dist
+from ray.train._internal.utils import get_address_and_port
+from ray.train._internal.worker_group import WorkerGroup
+from ray.train.backend import BackendConfig
+from ray.train.torch.config import _set_nccl_network_interface, _TorchBackend
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class CustomBackendConfig(BackendConfig):
+    """Configuration for torch process group setup."""
+
+    @property
+    def backend_cls(self):
+        return _CustomTorchBackend
+
+
+class _CustomTorchBackend(_TorchBackend):
+
+    def on_start(self, worker_group: WorkerGroup,
+                 backend_config: BackendConfig):
+        if not dist.is_available():
+            raise RuntimeError('Distributed torch is not available.')
+
+        worker_group.execute(_set_nccl_network_interface)
+
+        master_addr, master_port = worker_group.execute_single(
+            0, get_address_and_port)
+
+        def set_env_vars(addr, port, rank, world_size):
+            os.environ['MASTER_ADDR'] = addr
+            os.environ['MASTER_PORT'] = str(port)
+            os.environ['RANK'] = str(rank)
+            os.environ['LOCAL_RANK'] = str(rank)
+            os.environ['WORLD_SIZE'] = str(world_size)
+
+        setup_futures = []
+        for i in range(len(worker_group)):
+            setup_futures.append(
+                worker_group.execute_single_async(
+                    i,
+                    set_env_vars,
+                    addr=master_addr,
+                    port=master_port,
+                    rank=i,
+                    world_size=len(worker_group),
+                ))
+        ray.get(setup_futures)
diff --git a/tests/test_mm/test_tasks.py b/tests/test_mm/test_tasks.py
index 72a7df38..a779dc86 100644
--- a/tests/test_mm/test_tasks.py
+++ b/tests/test_mm/test_tasks.py
@@ -273,7 +273,7 @@ def run(self, *, searched_cfg, **kwargs):
             )))
 
     task = TestTask()
-    task.set_resource(1, 0, 1)
+    task.set_resource()
     task.context_aware_run(searched_cfg=dict(cfg=cfg))
     assert 'loss' in get_session()
 

From 082ea7b4c47e2a02352565a4c1d9789800928c90 Mon Sep 17 00:00:00 2001
From: Junhwa Song <ethan9867@gmail.com>
Date: Thu, 15 Dec 2022 18:24:28 +0900
Subject: [PATCH 21/28] Update siatune/mm/tasks/mmtrainbase.py

Co-authored-by: Hakjin Lee <nijkah@gmail.com>
Signed-off-by: Junhwa Song <ethan9867@gmail.com>
---
 siatune/mm/tasks/mmtrainbase.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/siatune/mm/tasks/mmtrainbase.py b/siatune/mm/tasks/mmtrainbase.py
index b3c94937..e897a71a 100644
--- a/siatune/mm/tasks/mmtrainbase.py
+++ b/siatune/mm/tasks/mmtrainbase.py
@@ -59,7 +59,7 @@ def create_trainable(self) -> DataParallelTrainer:
         """Get ray trainable task.
 
         Returns:
-            TorchTrainer: The trainable task.
+            DataParallelTrainer: The trainable task.
         """
         assert self.num_workers == self.num_gpus_per_worker, (
             '`num_workers` must be equal to `num_gpus_per_worker`.')

From 789ca62c2daccb5eca8aa924258fa612a5ef3cfb Mon Sep 17 00:00:00 2001
From: Junhwa Song <ethan9867@gmail.com>
Date: Fri, 16 Dec 2022 10:34:56 +0900
Subject: [PATCH 22/28] Upgrade MMTask (#97)

* Update class signature

* Update mmseg

* Update mmdet

* Update mmcls

* Update configs

* Fix test code
---
 .../mmcls_cifar_100_asynchb_nevergrad_pso.py  |   7 +-
 configs/mmdet/mmdet_asynchb_nevergrad_pso.py  |   7 +-
 configs/mmseg/mmseg_asynchb_nevergrad_pso.py  |  11 +-
 setup.cfg                                     |   4 +
 siatune/mm/tasks/mmcls.py                     | 242 ++++++++---------
 siatune/mm/tasks/mmdet.py                     | 254 +++++++++---------
 siatune/mm/tasks/mmseg.py                     | 225 ++++++++--------
 siatune/mm/tasks/mmtrainbase.py               |  48 +---
 tests/test_mm/test_tasks.py                   |  45 ++--
 9 files changed, 399 insertions(+), 444 deletions(-)

diff --git a/configs/mmcls/mmcls_cifar_100_asynchb_nevergrad_pso.py b/configs/mmcls/mmcls_cifar_100_asynchb_nevergrad_pso.py
index eda56e16..39cf8842 100644
--- a/configs/mmcls/mmcls_cifar_100_asynchb_nevergrad_pso.py
+++ b/configs/mmcls/mmcls_cifar_100_asynchb_nevergrad_pso.py
@@ -5,14 +5,11 @@
 ]
 
 space = {
+    'data.samples_per_gpu': {{_base_.batch_size}},
     'model': {{_base_.model}},
     'model.head.num_classes': 100,
     'optimizer': {{_base_.optimizer}},
-    'data.samples_per_gpu': {{_base_.batch_size}},
 }
 
 task = dict(type='MMClassification')
-metric = 'val/accuracy_top-1'
-mode = 'max'
-raise_on_failed_trial = False
-num_samples = 256
+tune_cfg = dict(num_samples=8, metric='val/accuracy_top-1', mode='max')
diff --git a/configs/mmdet/mmdet_asynchb_nevergrad_pso.py b/configs/mmdet/mmdet_asynchb_nevergrad_pso.py
index d06450ab..e780f291 100644
--- a/configs/mmdet/mmdet_asynchb_nevergrad_pso.py
+++ b/configs/mmdet/mmdet_asynchb_nevergrad_pso.py
@@ -5,13 +5,10 @@
 ]
 
 space = {
+    'data.samples_per_gpu': {{_base_.batch_size}},
     'model': {{_base_.model}},
     'optimizer': {{_base_.optimizer}},
-    'data.samples_per_gpu': {{_base_.batch_size}},
 }
 
 task = dict(type='MMDetection')
-metric = 'val/AP'
-mode = 'max'
-raise_on_failed_trial = False
-num_samples = 256
+tune_cfg = dict(num_samples=8, metric='val/AP', mode='max')
diff --git a/configs/mmseg/mmseg_asynchb_nevergrad_pso.py b/configs/mmseg/mmseg_asynchb_nevergrad_pso.py
index 3d81ecd6..a2e13b36 100644
--- a/configs/mmseg/mmseg_asynchb_nevergrad_pso.py
+++ b/configs/mmseg/mmseg_asynchb_nevergrad_pso.py
@@ -4,10 +4,13 @@
     '../_base_/space/optimizer.py', '../_base_/space/batch_size.py'
 ]
 
-space = dict(
-    data=dict(samples_per_gpu={{_base_.batch_size}}),
-    model={{_base_.model}},
-    optimizer={{_base_.optimizer}})
+space = {
+    'data.samples_per_gpu': {{_base_.batch_size}},
+    'model': {{_base_.model}},
+    'model.decode_head.num_classes': 21,
+    'model.auxiliary_head.num_classes': 21,
+    'optimizer': {{_base_.optimizer}},
+}
 
 task = dict(type='MMSegmentation')
 tune_cfg = dict(num_samples=8, metric='val/mIoU', mode='max')
diff --git a/setup.cfg b/setup.cfg
index bdbd251f..7852e3b4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,3 +1,7 @@
+[flake8]
+per-file-ignores =
+    siatune/mm/tasks/mm*.py: E251,E501
+
 [isort]
 line_length = 79
 multi_line_output = 0
diff --git a/siatune/mm/tasks/mmcls.py b/siatune/mm/tasks/mmcls.py
index 0e5803b7..fdd8dc7a 100644
--- a/siatune/mm/tasks/mmcls.py
+++ b/siatune/mm/tasks/mmcls.py
@@ -3,14 +3,9 @@
 import copy
 import os
 import time
+import warnings
 from os import path as osp
-from typing import Optional, Sequence
-
-import mmcv
-import torch
-import torch.distributed as dist
-from mmcv.runner import get_dist_info
-from mmcv.utils import Config, DictAction, get_git_hash
+from typing import Sequence
 
 from .builder import TASKS
 from .mmtrainbase import MMTrainBasedTask
@@ -18,16 +13,18 @@
 
 @TASKS.register_module()
 class MMClassification(MMTrainBasedTask):
-    """MMClassification Wrapping class for ray tune."""
+    """MMClassification wrapper class for `ray.tune`.
 
-    def parse_args(self, args: Sequence[str]) -> argparse.Namespace:
-        """Define and parse the necessary arguments for the task.
+    It is modified from https://github.com/open-mmlab/mmclassification/blob/v0.23.2/tools/train.py
 
-        Args:
-            args (Sequence[str]): The args.
-        Returns:
-            argparse.Namespace: The parsed args.
-        """
+    Attributes:
+        args (Sequence[str]):
+    """
+
+    VERSION = 'v0.23.2'
+
+    def parse_args(self, task_args: Sequence[str]):
+        from mmcv import DictAction
 
         parser = argparse.ArgumentParser(description='Train a model')
         parser.add_argument('config', help='train config file path')
@@ -39,6 +36,31 @@ def parse_args(self, args: Sequence[str]) -> argparse.Namespace:
             '--no-validate',
             action='store_true',
             help='whether not to evaluate the checkpoint during training')
+        group_gpus = parser.add_mutually_exclusive_group()
+        group_gpus.add_argument(
+            '--device', help='device used for training. (Deprecated)')
+        group_gpus.add_argument(
+            '--gpus',
+            type=int,
+            help='(Deprecated, please use --gpu-id) number of gpus to use '
+            '(only applicable to non-distributed training)')
+        group_gpus.add_argument(
+            '--gpu-ids',
+            type=int,
+            nargs='+',
+            help='(Deprecated, please use --gpu-id) ids of gpus to use '
+            '(only applicable to non-distributed training)')
+        group_gpus.add_argument(
+            '--gpu-id',
+            type=int,
+            default=0,
+            help='id of gpu to use '
+            '(only applicable to non-distributed training)')
+        parser.add_argument(
+            '--ipu-replicas',
+            type=int,
+            default=None,
+            help='num of ipu replicas to use')
         parser.add_argument(
             '--seed', type=int, default=None, help='random seed')
         parser.add_argument(
@@ -53,108 +75,55 @@ def parse_args(self, args: Sequence[str]) -> argparse.Namespace:
             '--cfg-options',
             nargs='+',
             action=DictAction,
-            help='override some settings in the used config, the key-value '
-            'pair in xxx=yyy format will be merged into config file. If the '
-            'value to be overwritten is a list, it should be like key="[a,b]" '
-            'or key=a,b It also allows nested list/tuple values, e.g. '
-            'key="[(a,b),(c,d)]" Note that the quotation marks are necessary '
-            'and that no white space is allowed.')
-        args = parser.parse_args(args)
-        return args
-
-    def build_model(self,
-                    cfg: Config,
-                    train_cfg: Optional[Config] = None,
-                    test_cfg: Optional[Config] = None) -> torch.nn.Module:
-        """Build the model from configs.
-
-        Args:
-            cfg (Config): The configs.
-            train_cfg (Optional[Config]):
-                The train opt. Defaults to None.
-            test_cfg (Optional[Config]):
-                The Test opt. Defaults to None.
-
-        Returns:
-            torch.nn.Module: The model.
-        """
-
-        from mmcls.models import build_classifier
-        return build_classifier(cfg)
-
-    def build_dataset(
-            self,
-            cfg: Config,
-            default_args: Optional[Config] = None) -> torch.utils.data.Dataset:
-        """Build the dataset from configs.
-
-        Args:
-            cfg (Config): The configs.
-            default_args (Optional[Config]):
-                The default args. Defaults to None.
-
-        Returns:
-            torch.utils.data.Dataset: The dataset.
-        """
-
-        from mmcls.datasets.builder import build_dataset
-        return build_dataset(cfg, default_args)
-
-    def train_model(self,
-                    model: torch.nn.Module,
-                    dataset: torch.utils.data.Dataset,
-                    cfg: Config,
-                    distributed: bool = True,
-                    validate: bool = False,
-                    timestamp: Optional[str] = None,
-                    meta: Optional[dict] = None) -> None:
-        from mmcls.apis.train import train_model
-        """Train the model.
-
-        Args:
-            model (torch.nn.Module): The model.
-            dataset (torch.utils.data.Dataset): The dataset.
-            cfg (Config): The configs.
-            distributed (bool):
-                Whether or not distributed. Defaults to True.
-            validate (bool):
-                Whether or not validate. Defaults to False.
-            timestamp (Optional[str]):
-                The timestamp. Defaults to None.
-            meta (Optional[dict]):
-                The meta. Defaults to None.
-        """
+            help='override some settings in the used config, the key-value pair '
+            'in xxx=yyy format will be merged into config file. If the value to '
+            'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+            'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+            'Note that the quotation marks are necessary and that no white space '
+            'is allowed.')
+        parser.add_argument(
+            '--launcher',
+            choices=['none', 'pytorch', 'slurm', 'mpi'],
+            default='none',
+            help='job launcher')
+        parser.add_argument('--local_rank', type=int, default=0)
+        args = parser.parse_args(task_args)
+        if 'LOCAL_RANK' not in os.environ:
+            os.environ['LOCAL_RANK'] = str(args.local_rank)
 
-        train_model(
-            model, dataset, cfg, distributed, validate, timestamp, meta=meta)
-        return
+        return args
 
-    def run(self, *, args: argparse.Namespace, **kwargs) -> None:
+    def run(self, args: argparse.Namespace):
         """Run the task.
 
         Args:
             args (argparse.Namespace):
                 The args that received from context manager.
         """
-
+        import mmcv
+        import torch
+        import torch.distributed as dist
         from mmcls import __version__
-        from mmcls.apis import init_random_seed, set_random_seed
-        from mmcls.utils import (collect_env, get_root_logger,
-                                 setup_multi_processes)
-
-        if 'LOCAL_RANK' not in os.environ:
-            os.environ['LOCAL_RANK'] = str(dist.get_rank())
+        from mmcls.apis import init_random_seed, set_random_seed, train_model
+        from mmcls.datasets import build_dataset
+        from mmcls.models import build_classifier
+        from mmcls.utils import (auto_select_device, collect_env,
+                                 get_root_logger, setup_multi_processes)
+        from mmcv import Config
+        from mmcv.runner import get_dist_info, init_dist
 
         cfg = Config.fromfile(args.config)
         if args.cfg_options is not None:
             cfg.merge_from_dict(args.cfg_options)
 
+        # set multi-process settings
+        setup_multi_processes(cfg)
+
         # set cudnn_benchmark
         if cfg.get('cudnn_benchmark', False):
             torch.backends.cudnn.benchmark = True
 
-        # work_dir is determined in this priority:
-        # CLI > segment in file > filename
+        # work_dir is determined in this priority: CLI > segment in file > filename
         if args.work_dir is not None:
             # update configs according to CLI args if args.work_dir is not None
             cfg.work_dir = args.work_dir
@@ -164,12 +133,32 @@ def run(self, *, args: argparse.Namespace, **kwargs) -> None:
                                     osp.splitext(osp.basename(args.config))[0])
         if args.resume_from is not None:
             cfg.resume_from = args.resume_from
+        if args.gpus is not None:
+            cfg.gpu_ids = range(1)
+            warnings.warn('`--gpus` is deprecated because we only support '
+                          'single GPU mode in non-distributed training. '
+                          'Use `gpus=1` now.')
+        if args.gpu_ids is not None:
+            cfg.gpu_ids = args.gpu_ids[0:1]
+            warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
+                          'Because we only support single GPU mode in '
+                          'non-distributed training. Use the first GPU '
+                          'in `gpu_ids` now.')
+        if args.gpus is None and args.gpu_ids is None:
+            cfg.gpu_ids = [args.gpu_id]
+
+        if args.ipu_replicas is not None:
+            cfg.ipu_replicas = args.ipu_replicas
+            args.device = 'ipu'
 
         # init distributed env first, since logger depends on the dist info.
-        distributed = True
-        # gpu_ids is used to calculate iter when resuming checkpoint
-        _, world_size = get_dist_info()
-        cfg.gpu_ids = range(world_size)
+        if args.launcher == 'none':
+            distributed = False
+        else:
+            distributed = True
+            init_dist(args.launcher, **cfg.dist_params)
+            _, world_size = get_dist_info()
+            cfg.gpu_ids = range(world_size)
 
         # create work_dir
         mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
@@ -180,18 +169,15 @@ def run(self, *, args: argparse.Namespace, **kwargs) -> None:
         log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
         logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
 
-        # set multi-process settings
-        setup_multi_processes(cfg)
-
         # init the meta dict to record some important information such as
         # environment info and seed, which will be logged
         meta = dict()
         # log env info
         env_info_dict = collect_env()
-        env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
+        env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
         dash_line = '-' * 60 + '\n'
-        logger.info('Environment info:\n' + dash_line + env_info +  # noqa W504
-                    '\n' + dash_line)
+        logger.info('Environment info:\n' + dash_line + env_info + '\n' +
+                    dash_line)
         meta['env_info'] = env_info
 
         # log some basic info
@@ -199,45 +185,39 @@ def run(self, *, args: argparse.Namespace, **kwargs) -> None:
         logger.info(f'Config:\n{cfg.pretty_text}')
 
         # set random seeds
-        seed = init_random_seed(args.seed)
+        cfg.device = args.device or auto_select_device()
+        seed = init_random_seed(args.seed, device=cfg.device)
         seed = seed + dist.get_rank() if args.diff_seed else seed
         logger.info(f'Set random seed to {seed}, '
                     f'deterministic: {args.deterministic}')
         set_random_seed(seed, deterministic=args.deterministic)
         cfg.seed = seed
         meta['seed'] = seed
-        meta['exp_name'] = osp.basename(args.config)
 
-        model = self.build_model(
-            cfg.model,
-            train_cfg=cfg.get('train_cfg'),
-            test_cfg=cfg.get('test_cfg'))
+        model = build_classifier(cfg.model)
         model.init_weights()
 
-        # SyncBN is not support for DP
-        logger.info(model)
-
-        datasets = [self.build_dataset(cfg.data.train)]
+        datasets = [build_dataset(cfg.data.train)]
         if len(cfg.workflow) == 2:
             val_dataset = copy.deepcopy(cfg.data.val)
             val_dataset.pipeline = cfg.data.train.pipeline
-            datasets.append(self.build_dataset(val_dataset))
-        if cfg.checkpoint_config is not None:
-            # save mmcls version, config file content and class names in
-            # checkpoints as meta data
-            cfg.checkpoint_config.meta = dict(
-                mmcls_version=f'{__version__}+{get_git_hash()[:7]}',
+            datasets.append(build_dataset(val_dataset))
+
+        # save mmcls version, config file content and class names in
+        # runner as meta data
+        meta.update(
+            dict(
+                mmcls_version=__version__,
                 config=cfg.pretty_text,
-                CLASSES=datasets[0].CLASSES)
+                CLASSES=datasets[0].CLASSES))
+
         # add an attribute for visualization convenience
-        model.CLASSES = datasets[0].CLASSES
-        # passing checkpoint meta for saving best checkpoint
-        meta.update(cfg.checkpoint_config.meta)
-        self.train_model(
+        train_model(
             model,
             datasets,
             cfg,
-            distributed=True,
+            distributed=distributed,
             validate=(not args.no_validate),
             timestamp=timestamp,
+            device=cfg.device,
             meta=meta)
diff --git a/siatune/mm/tasks/mmdet.py b/siatune/mm/tasks/mmdet.py
index d84d182e..6504d339 100644
--- a/siatune/mm/tasks/mmdet.py
+++ b/siatune/mm/tasks/mmdet.py
@@ -3,14 +3,9 @@
 import copy
 import os
 import time
+import warnings
 from os import path as osp
-from typing import Optional, Sequence
-
-import mmcv
-import torch
-import torch.distributed as dist
-from mmcv.runner import get_dist_info
-from mmcv.utils import Config, DictAction, get_git_hash
+from typing import Sequence
 
 from .builder import TASKS
 from .mmtrainbase import MMTrainBasedTask
@@ -18,16 +13,18 @@
 
 @TASKS.register_module()
 class MMDetection(MMTrainBasedTask):
-    """MMDetection Wrapping class for ray tune."""
+    """MMDetection wrapper class for `ray.tune`.
 
-    def parse_args(self, args: Sequence[str]) -> argparse.Namespace:
-        """Define and parse the necessary arguments for the task.
+    It is modified from https://github.com/open-mmlab/mmdetection/blob/v2.25.2/tools/train.py
 
-        Args:
-            args (Sequence[str]): The args.
-        Returns:
-            argparse.Namespace: The parsed args.
-        """
+    Attributes:
+        args (Sequence[str]):
+    """
+
+    VERSION = 'v2.25.2'
+
+    def parse_args(self, task_args: Sequence[str]):
+        from mmcv import DictAction
 
         parser = argparse.ArgumentParser(description='Train a detector')
         parser.add_argument('config', help='train config file path')
@@ -43,6 +40,24 @@ def parse_args(self, args: Sequence[str]) -> argparse.Namespace:
             '--no-validate',
             action='store_true',
             help='whether not to evaluate the checkpoint during training')
+        group_gpus = parser.add_mutually_exclusive_group()
+        group_gpus.add_argument(
+            '--gpus',
+            type=int,
+            help='(Deprecated, please use --gpu-id) number of gpus to use '
+            '(only applicable to non-distributed training)')
+        group_gpus.add_argument(
+            '--gpu-ids',
+            type=int,
+            nargs='+',
+            help='(Deprecated, please use --gpu-id) ids of gpus to use '
+            '(only applicable to non-distributed training)')
+        group_gpus.add_argument(
+            '--gpu-id',
+            type=int,
+            default=0,
+            help='id of gpu to use '
+            '(only applicable to non-distributed training)')
         parser.add_argument(
             '--seed', type=int, default=None, help='random seed')
         parser.add_argument(
@@ -57,93 +72,44 @@ def parse_args(self, args: Sequence[str]) -> argparse.Namespace:
             '--options',
             nargs='+',
             action=DictAction,
-            help='override some settings in the used config, the '
-            'key-value pair in xxx=yyy format will be merged into config file'
-            ' (deprecate), change to --cfg-options instead.')
+            help='override some settings in the used config, the key-value pair '
+            'in xxx=yyy format will be merged into config file (deprecate), '
+            'change to --cfg-options instead.')
         parser.add_argument(
             '--cfg-options',
             nargs='+',
             action=DictAction,
-            help='override some settings in the used config, the '
-            'key-value pair in xxx=yyy format will be merged into config '
-            'file. If the value to be overwritten is a list, it should be '
-            'like key="[a,b]" or key=a,b It also allows nested list/tuple '
-            'values, e.g. key="[(a,b),(c,d)]" Note that the quotation marks '
-            'are necessary and that no white space is allowed.')
+            help='override some settings in the used config, the key-value pair '
+            'in xxx=yyy format will be merged into config file. If the value to '
+            'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+            'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+            'Note that the quotation marks are necessary and that no white space '
+            'is allowed.')
+        parser.add_argument(
+            '--launcher',
+            choices=['none', 'pytorch', 'slurm', 'mpi'],
+            default='none',
+            help='job launcher')
+        parser.add_argument('--local_rank', type=int, default=0)
         parser.add_argument(
             '--auto-scale-lr',
             action='store_true',
             help='enable automatically scaling LR.')
-        args = parser.parse_args(args)
-        return args
-
-    def build_model(self,
-                    cfg: Config,
-                    train_cfg: Optional[Config] = None,
-                    test_cfg: Optional[Config] = None) -> torch.nn.Module:
-        """Build the model from configs.
-
-        Args:
-            cfg (Config): The configs.
-            train_cfg (Optional[Config]):
-                The train opt. Defaults to None.
-            test_cfg (Optional[Config]):
-                The Test opt. Defaults to None.
-
-        Returns:
-            torch.nn.Module: The model.
-        """
-
-        from mmdet.models.builder import build_detector
-        return build_detector(cfg, train_cfg, test_cfg)
-
-    def build_dataset(
-            self,
-            cfg: Config,
-            default_args: Optional[Config] = None) -> torch.utils.data.Dataset:
-        """Build the dataset from configs.
-
-        Args:
-            cfg (Config): The configs.
-            default_args (Optional[Config]):
-                The default args. Defaults to None.
-
-        Returns:
-            torch.utils.data.Dataset: The dataset.
-        """
-
-        from mmdet.datasets.builder import build_dataset
-        return build_dataset(cfg, default_args)
-
-    def train_model(self,
-                    model: torch.nn.Module,
-                    dataset: torch.utils.data.Dataset,
-                    cfg: Config,
-                    distributed: bool = True,
-                    validate: bool = False,
-                    timestamp: Optional[str] = None,
-                    meta: Optional[dict] = None) -> None:
-        """Train the model.
+        args = parser.parse_args(task_args)
+        if 'LOCAL_RANK' not in os.environ:
+            os.environ['LOCAL_RANK'] = str(args.local_rank)
 
-        Args:
-            model (torch.nn.Module): The model.
-            dataset (torch.utils.data.Dataset): The dataset.
-            cfg (Config): The configs.
-            distributed (bool):
-                Whether or not distributed. Defaults to True.
-            validate (bool):
-                Whether or not validate. Defaults to False.
-            timestamp (Optional[str]):
-                The timestamp. Defaults to None.
-            meta (Optional[dict]):
-                The meta. Defaults to None.
-        """
+        if args.options and args.cfg_options:
+            raise ValueError(
+                '--options and --cfg-options cannot be both '
+                'specified, --options is deprecated in favor of --cfg-options')
+        if args.options:
+            warnings.warn('--options is deprecated in favor of --cfg-options')
+            args.cfg_options = args.options
 
-        from mmdet.apis.train import train_detector
-        train_detector(model, dataset, cfg, distributed, validate, timestamp,
-                       meta)
+        return args
 
-    def run(self, *, args: argparse.Namespace, **kwargs) -> None:
+    def run(self, args: argparse.Namespace):
         """Run the task.
 
         Args:
@@ -151,24 +117,52 @@ def run(self, *, args: argparse.Namespace, **kwargs) -> None:
                 The args that received from context manager.
         """
 
+        import mmcv
+        import torch
+        import torch.distributed as dist
+        from mmcv import Config
+        from mmcv.runner import get_dist_info, init_dist
+        from mmcv.utils import get_git_hash
         from mmdet import __version__
-        from mmdet.apis import init_random_seed, set_random_seed
+        from mmdet.apis import (init_random_seed, set_random_seed,
+                                train_detector)
+        from mmdet.datasets import build_dataset
+        from mmdet.models import build_detector
         from mmdet.utils import (collect_env, get_device, get_root_logger,
-                                 setup_multi_processes)
-
-        if 'LOCAL_RANK' not in os.environ:
-            os.environ['LOCAL_RANK'] = str(dist.get_rank())
+                                 replace_cfg_vals, setup_multi_processes,
+                                 update_data_root)
 
         cfg = Config.fromfile(args.config)
+
+        # replace the ${key} with the value of cfg.key
+        cfg = replace_cfg_vals(cfg)
+
+        # update data root according to MMDET_DATASETS
+        update_data_root(cfg)
+
         if args.cfg_options is not None:
             cfg.merge_from_dict(args.cfg_options)
 
+        if args.auto_scale_lr:
+            if 'auto_scale_lr' in cfg and \
+                    'enable' in cfg.auto_scale_lr and \
+                    'base_batch_size' in cfg.auto_scale_lr:
+                cfg.auto_scale_lr.enable = True
+            else:
+                warnings.warn('Can not find "auto_scale_lr" or '
+                              '"auto_scale_lr.enable" or '
+                              '"auto_scale_lr.base_batch_size" in your'
+                              ' configuration file. Please update all the '
+                              'configuration files to mmdet >= 2.24.1.')
+
+        # set multi-process settings
+        setup_multi_processes(cfg)
+
         # set cudnn_benchmark
         if cfg.get('cudnn_benchmark', False):
             torch.backends.cudnn.benchmark = True
 
-        # work_dir is determined in this priority:
-        # CLI > segment in file > filename
+        # work_dir is determined in this priority: CLI > segment in file > filename
         if args.work_dir is not None:
             # update configs according to CLI args if args.work_dir is not None
             cfg.work_dir = args.work_dir
@@ -176,16 +170,33 @@ def run(self, *, args: argparse.Namespace, **kwargs) -> None:
             # use config filename as default work_dir if cfg.work_dir is None
             cfg.work_dir = osp.join('./work_dirs',
                                     osp.splitext(osp.basename(args.config))[0])
+
         if args.resume_from is not None:
             cfg.resume_from = args.resume_from
-
         cfg.auto_resume = args.auto_resume
+        if args.gpus is not None:
+            cfg.gpu_ids = range(1)
+            warnings.warn('`--gpus` is deprecated because we only support '
+                          'single GPU mode in non-distributed training. '
+                          'Use `gpus=1` now.')
+        if args.gpu_ids is not None:
+            cfg.gpu_ids = args.gpu_ids[0:1]
+            warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
+                          'Because we only support single GPU mode in '
+                          'non-distributed training. Use the first GPU '
+                          'in `gpu_ids` now.')
+        if args.gpus is None and args.gpu_ids is None:
+            cfg.gpu_ids = [args.gpu_id]
 
         # init distributed env first, since logger depends on the dist info.
-        distributed = True
-        # gpu_ids is used to calculate iter when resuming checkpoint
-        _, world_size = get_dist_info()
-        cfg.gpu_ids = range(world_size)
+        if args.launcher == 'none':
+            distributed = False
+        else:
+            distributed = True
+            init_dist(args.launcher, **cfg.dist_params)
+            # re-set gpu_ids with distributed training mode
+            _, world_size = get_dist_info()
+            cfg.gpu_ids = range(world_size)
 
         # create work_dir
         mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
@@ -194,23 +205,19 @@ def run(self, *, args: argparse.Namespace, **kwargs) -> None:
         # init the logger before other steps
         timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
         log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
-        print(cfg)
         logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
 
-        # set multi-process settings
-        setup_multi_processes(cfg)
-
         # init the meta dict to record some important information such as
         # environment info and seed, which will be logged
         meta = dict()
         # log env info
         env_info_dict = collect_env()
-        env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
+        env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
         dash_line = '-' * 60 + '\n'
-        logger.info('Environment info:\n' + dash_line + env_info +  # noqa W504
-                    '\n' + dash_line)
+        logger.info('Environment info:\n' + dash_line + env_info + '\n' +
+                    dash_line)
         meta['env_info'] = env_info
-
+        meta['config'] = cfg.pretty_text
         # log some basic info
         logger.info(f'Distributed training: {distributed}')
         logger.info(f'Config:\n{cfg.pretty_text}')
@@ -226,37 +233,32 @@ def run(self, *, args: argparse.Namespace, **kwargs) -> None:
         meta['seed'] = seed
         meta['exp_name'] = osp.basename(args.config)
 
-        model = self.build_model(
+        model = build_detector(
             cfg.model,
             train_cfg=cfg.get('train_cfg'),
             test_cfg=cfg.get('test_cfg'))
         model.init_weights()
 
-        # SyncBN is not support for DP
-        logger.info(model)
-
-        datasets = [self.build_dataset(cfg.data.train)]
+        datasets = [build_dataset(cfg.data.train)]
         if len(cfg.workflow) == 2:
+            assert 'val' in [mode for (mode, _) in cfg.workflow]
             val_dataset = copy.deepcopy(cfg.data.val)
-            val_dataset.pipeline = cfg.data.train.pipeline
-            datasets.append(self.build_dataset(val_dataset))
+            val_dataset.pipeline = cfg.data.train.get(
+                'pipeline', cfg.data.train.dataset.get('pipeline'))
+            datasets.append(build_dataset(val_dataset))
         if cfg.checkpoint_config is not None:
             # save mmdet version, config file content and class names in
             # checkpoints as meta data
             cfg.checkpoint_config.meta = dict(
-                mmdet_version=f'{__version__}+{get_git_hash()[:7]}',
-                config=cfg.pretty_text,
-                CLASSES=datasets[0].CLASSES,
-                PALETTE=datasets[0].PALETTE)
+                mmdet_version=__version__ + get_git_hash()[:7],
+                CLASSES=datasets[0].CLASSES)
         # add an attribute for visualization convenience
         model.CLASSES = datasets[0].CLASSES
-        # passing checkpoint meta for saving best checkpoint
-        meta.update(cfg.checkpoint_config.meta)
-        self.train_model(
+        train_detector(
             model,
             datasets,
             cfg,
-            distributed=True,
+            distributed=distributed,
             validate=(not args.no_validate),
             timestamp=timestamp,
             meta=meta)
diff --git a/siatune/mm/tasks/mmseg.py b/siatune/mm/tasks/mmseg.py
index 205a396c..14d2b929 100644
--- a/siatune/mm/tasks/mmseg.py
+++ b/siatune/mm/tasks/mmseg.py
@@ -3,14 +3,9 @@
 import copy
 import os
 import time
+import warnings
 from os import path as osp
-from typing import Optional, Sequence
-
-import mmcv
-import torch
-import torch.distributed as dist
-from mmcv.runner import get_dist_info
-from mmcv.utils import Config, DictAction, get_git_hash
+from typing import Sequence
 
 from .builder import TASKS
 from .mmtrainbase import MMTrainBasedTask
@@ -18,16 +13,18 @@
 
 @TASKS.register_module()
 class MMSegmentation(MMTrainBasedTask):
-    """MMSegmentation Wrapping class for ray tune."""
+    """MMSegmentation wrapper class for `ray.tune`.
 
-    def parse_args(self, args: Sequence[str]) -> argparse.Namespace:
-        """Define and parse the necessary arguments for the task.
+    It is modified from https://github.com/open-mmlab/mmsegmentation/blob/v0.25.0/tools/train.py
 
-        Args:
-            args (Sequence[str]): The args.
-        Returns:
-            argparse.Namespace: The parsed args.
-        """
+    Attributes:
+        args (Sequence[str]):
+    """
+
+    VERSION = 'v0.25.0'
+
+    def parse_args(self, task_args: Sequence[str]):
+        from mmcv.utils import DictAction
 
         parser = argparse.ArgumentParser(description='Train a segmentor')
         parser.add_argument('config', help='train config file path')
@@ -41,6 +38,24 @@ def parse_args(self, args: Sequence[str]) -> argparse.Namespace:
             '--no-validate',
             action='store_true',
             help='whether not to evaluate the checkpoint during training')
+        group_gpus = parser.add_mutually_exclusive_group()
+        group_gpus.add_argument(
+            '--gpus',
+            type=int,
+            help='(Deprecated, please use --gpu-id) number of gpus to use '
+            '(only applicable to non-distributed training)')
+        group_gpus.add_argument(
+            '--gpu-ids',
+            type=int,
+            nargs='+',
+            help='(Deprecated, please use --gpu-id) ids of gpus to use '
+            '(only applicable to non-distributed training)')
+        group_gpus.add_argument(
+            '--gpu-id',
+            type=int,
+            default=0,
+            help='id of gpu to use '
+            '(only applicable to non-distributed training)')
         parser.add_argument(
             '--seed', type=int, default=None, help='random seed')
         parser.add_argument(
@@ -51,102 +66,76 @@ def parse_args(self, args: Sequence[str]) -> argparse.Namespace:
             '--deterministic',
             action='store_true',
             help='whether to set deterministic options for CUDNN backend.')
+        parser.add_argument(
+            '--options',
+            nargs='+',
+            action=DictAction,
+            help=
+            "--options is deprecated in favor of --cfg_options' and it will "
+            'not be supported in version v0.22.0. Override some settings in the '
+            'used config, the key-value pair in xxx=yyy format will be merged '
+            'into config file. If the value to be overwritten is a list, it '
+            'should be like key="[a,b]" or key=a,b It also allows nested '
+            'list/tuple values, e.g. key="[(a,b),(c,d)]" Note that the quotation '
+            'marks are necessary and that no white space is allowed.')
         parser.add_argument(
             '--cfg-options',
             nargs='+',
             action=DictAction,
-            help='override some settings in the used config, the key-value '
-            'pair in xxx=yyy format will be merged into config file. If the '
-            'value to be overwritten is a list, it should be like key="[a,b]" '
-            'or key=a,b It also allows nested list/tuple values, e.g. '
-            'key="[(a,b),(c,d)]" Note that the quotation marks are necessary '
-            'and that no white space is allowed.')
+            help='override some settings in the used config, the key-value pair '
+            'in xxx=yyy format will be merged into config file. If the value to '
+            'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+            'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+            'Note that the quotation marks are necessary and that no white space '
+            'is allowed.')
+        parser.add_argument(
+            '--launcher',
+            choices=['none', 'pytorch', 'slurm', 'mpi'],
+            default='none',
+            help='job launcher')
+        parser.add_argument('--local_rank', type=int, default=0)
         parser.add_argument(
             '--auto-resume',
             action='store_true',
             help='resume from the latest checkpoint automatically.')
-        args = parser.parse_args(args)
-        return args
-
-    def build_model(self,
-                    cfg: Config,
-                    train_cfg: Optional[Config] = None,
-                    test_cfg: Optional[Config] = None) -> torch.nn.Module:
-        """Build the model from configs.
-
-        Args:
-            cfg (Config): The configs.
-            train_cfg (Optional[Config]):
-                The train opt. Defaults to None.
-            test_cfg (Optional[Config]):
-                The Test opt. Defaults to None.
-        Returns:
-            torch.nn.Module: The model.
-        """
-
-        from mmseg.models.builder import build_segmentor
-        return build_segmentor(cfg, train_cfg, test_cfg)
-
-    def build_dataset(
-            self,
-            cfg: Config,
-            default_args: Optional[Config] = None) -> torch.utils.data.Dataset:
-        """Build the dataset from configs.
-
-        Args:
-            cfg (Config): The configs.
-            default_args (Optional[Config]):
-                The default args. Defaults to None.
-        Returns:
-            torch.utils.data.Dataset: The dataset.
-        """
-
-        from mmseg.datasets.builder import build_dataset
-        return build_dataset(cfg, default_args)
-
-    def train_model(self,
-                    model: torch.nn.Module,
-                    dataset: torch.utils.data.Dataset,
-                    cfg: Config,
-                    distributed: bool = True,
-                    validate: bool = False,
-                    timestamp: Optional[str] = None,
-                    meta: Optional[dict] = None) -> None:
-        """Train the model.
-
-        Args:
-            model (torch.nn.Module): The model.
-            dataset (torch.utils.data.Dataset): The dataset.
-            cfg (Config): The configs.
-            distributed (bool):
-                Whether or not distributed. Defaults to True.
-            validate (bool):
-                Whether or not validate. Defaults to False.
-            timestamp (Optional[str]):
-                The timestamp. Defaults to None.
-            meta (Optional[dict]): The meta. Defaults to None.
-        """
+        args = parser.parse_args(task_args)
+        if 'LOCAL_RANK' not in os.environ:
+            os.environ['LOCAL_RANK'] = str(args.local_rank)
+
+        if args.options and args.cfg_options:
+            raise ValueError(
+                '--options and --cfg-options cannot be both '
+                'specified, --options is deprecated in favor of --cfg-options. '
+                '--options will not be supported in version v0.22.0.')
+        if args.options:
+            warnings.warn(
+                '--options is deprecated in favor of --cfg-options. '
+                '--options will not be supported in version v0.22.0.')
+            args.cfg_options = args.options
 
-        from mmseg.apis.train import train_segmentor
-        return train_segmentor(model, dataset, cfg, distributed, validate,
-                               timestamp, meta)
+        return args
 
-    def run(self, *, args, **kwargs) -> None:
+    def run(self, args: argparse.Namespace):
         """Run the task.
 
         Args:
             args (argparse.Namespace):
                 The args that received from context manager.
         """
-
+        import mmcv
+        import torch
+        import torch.distributed as dist
+        from mmcv.cnn.utils import revert_sync_batchnorm
+        from mmcv.runner import get_dist_info, init_dist
+        from mmcv.utils import Config, get_git_hash
         from mmseg import __version__
-        from mmseg.apis import init_random_seed, set_random_seed
-        from mmseg.utils import (collect_env, get_root_logger,
+        from mmseg.apis import (init_random_seed, set_random_seed,
+                                train_segmentor)
+        from mmseg.datasets import build_dataset
+        from mmseg.models import build_segmentor
+        from mmseg.utils import (collect_env, get_device, get_root_logger,
                                  setup_multi_processes)
 
-        if 'LOCAL_RANK' not in os.environ:
-            os.environ['LOCAL_RANK'] = str(dist.get_rank())
-
         cfg = Config.fromfile(args.config)
         if args.cfg_options is not None:
             cfg.merge_from_dict(args.cfg_options)
@@ -155,8 +144,7 @@ def run(self, *, args, **kwargs) -> None:
         if cfg.get('cudnn_benchmark', False):
             torch.backends.cudnn.benchmark = True
 
-        # work_dir is determined in this priority: CLI >
-        # segment in file > filename
+        # work_dir is determined in this priority: CLI > segment in file > filename
         if args.work_dir is not None:
             # update configs according to CLI args if args.work_dir is not None
             cfg.work_dir = args.work_dir
@@ -168,14 +156,31 @@ def run(self, *, args, **kwargs) -> None:
             cfg.load_from = args.load_from
         if args.resume_from is not None:
             cfg.resume_from = args.resume_from
+        if args.gpus is not None:
+            cfg.gpu_ids = range(1)
+            warnings.warn('`--gpus` is deprecated because we only support '
+                          'single GPU mode in non-distributed training. '
+                          'Use `gpus=1` now.')
+        if args.gpu_ids is not None:
+            cfg.gpu_ids = args.gpu_ids[0:1]
+            warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '
+                          'Because we only support single GPU mode in '
+                          'non-distributed training. Use the first GPU '
+                          'in `gpu_ids` now.')
+        if args.gpus is None and args.gpu_ids is None:
+            cfg.gpu_ids = [args.gpu_id]
 
         cfg.auto_resume = args.auto_resume
 
         # init distributed env first, since logger depends on the dist info.
-        distributed = True
-        # gpu_ids is used to calculate iter when resuming checkpoint
-        _, world_size = get_dist_info()
-        cfg.gpu_ids = range(world_size)
+        if args.launcher == 'none':
+            distributed = False
+        else:
+            distributed = True
+            init_dist(args.launcher, **cfg.dist_params)
+            # gpu_ids is used to calculate iter when resuming checkpoint
+            _, world_size = get_dist_info()
+            cfg.gpu_ids = range(world_size)
 
         # create work_dir
         mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
@@ -196,8 +201,8 @@ def run(self, *, args, **kwargs) -> None:
         env_info_dict = collect_env()
         env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
         dash_line = '-' * 60 + '\n'
-        logger.info('Environment info:\n' + dash_line + env_info +  # noqa W504
-                    '\n' + dash_line)
+        logger.info('Environment info:\n' + dash_line + env_info + '\n' +
+                    dash_line)
         meta['env_info'] = env_info
 
         # log some basic info
@@ -205,7 +210,8 @@ def run(self, *, args, **kwargs) -> None:
         logger.info(f'Config:\n{cfg.pretty_text}')
 
         # set random seeds
-        seed = init_random_seed(args.seed)
+        cfg.device = get_device()
+        seed = init_random_seed(args.seed, device=cfg.device)
         seed = seed + dist.get_rank() if args.diff_seed else seed
         logger.info(f'Set random seed to {seed}, '
                     f'deterministic: {args.deterministic}')
@@ -214,20 +220,27 @@ def run(self, *, args, **kwargs) -> None:
         meta['seed'] = seed
         meta['exp_name'] = osp.basename(args.config)
 
-        model = self.build_model(
+        model = build_segmentor(
             cfg.model,
             train_cfg=cfg.get('train_cfg'),
             test_cfg=cfg.get('test_cfg'))
         model.init_weights()
 
         # SyncBN is not support for DP
+        if not distributed:
+            warnings.warn(
+                'SyncBN is only supported with DDP. To be compatible with DP, '
+                'we convert SyncBN to BN. Please use dist_train.sh which can '
+                'avoid this error.')
+            model = revert_sync_batchnorm(model)
+
         logger.info(model)
 
-        datasets = [self.build_dataset(cfg.data.train)]
+        datasets = [build_dataset(cfg.data.train)]
         if len(cfg.workflow) == 2:
             val_dataset = copy.deepcopy(cfg.data.val)
             val_dataset.pipeline = cfg.data.train.pipeline
-            datasets.append(self.build_dataset(val_dataset))
+            datasets.append(build_dataset(val_dataset))
         if cfg.checkpoint_config is not None:
             # save mmseg version, config file content and class names in
             # checkpoints as meta data
@@ -240,11 +253,11 @@ def run(self, *, args, **kwargs) -> None:
         model.CLASSES = datasets[0].CLASSES
         # passing checkpoint meta for saving best checkpoint
         meta.update(cfg.checkpoint_config.meta)
-        self.train_model(
+        train_segmentor(
             model,
             datasets,
             cfg,
-            distributed=True,
+            distributed=distributed,
             validate=(not args.no_validate),
             timestamp=timestamp,
             meta=meta)
diff --git a/siatune/mm/tasks/mmtrainbase.py b/siatune/mm/tasks/mmtrainbase.py
index e897a71a..e1f71928 100644
--- a/siatune/mm/tasks/mmtrainbase.py
+++ b/siatune/mm/tasks/mmtrainbase.py
@@ -1,7 +1,6 @@
 # Copyright (c) SI-Analytics. All rights reserved.
-from abc import ABCMeta, abstractmethod
+from abc import ABCMeta
 
-import mmcv
 import torch
 from ray.air.config import ScalingConfig
 from ray.train.data_parallel_trainer import DataParallelTrainer
@@ -15,52 +14,13 @@
 class MMTrainBasedTask(BaseTask, metaclass=ABCMeta):
     """Wrap the apis of open mm train-based projects."""
 
-    @abstractmethod
-    def build_model(self, cfg: mmcv.Config, **kwargs) -> torch.nn.Module:
-        """Build the model from configs.
-
-        Args:
-            cfg (Config): The configs.
-        Returns:
-            torch.nn.Module: The model.
-        """
-        pass
-
-    @abstractmethod
-    def build_dataset(self, cfg: mmcv.Config,
-                      **kwargs) -> torch.utils.data.Dataset:
-        """Build the dataset from configs.
-
-        Args:
-            cfg (Config): The configs.
-        Returns:
-            torch.utils.data.Dataset: The dataset.
-        """
-        pass
-
-    @abstractmethod
-    def train_model(
-        self,
-        model: torch.nn.Module,
-        dataset: torch.utils.data.Dataset,
-        cfg: mmcv.Config,
-        **kwargs,
-    ) -> None:
-        """Train the model.
-
-        Args:
-            model (torch.nn.Module): The model.
-            dataset (torch.utils.data.Dataset): The dataset.
-            cfg (Config): The configs.
-        """
-        pass
-
     def create_trainable(self) -> DataParallelTrainer:
-        """Get ray trainable task.
+        """Get a :class:`DataParallelTrainer` instance.
 
         Returns:
-            DataParallelTrainer: The trainable task.
+            DataParallelTrainer: Trainer to optimize hyperparameter.
         """
+
         assert self.num_workers == self.num_gpus_per_worker, (
             '`num_workers` must be equal to `num_gpus_per_worker`.')
 
diff --git a/tests/test_mm/test_tasks.py b/tests/test_mm/test_tasks.py
index a779dc86..a602f9ad 100644
--- a/tests/test_mm/test_tasks.py
+++ b/tests/test_mm/test_tasks.py
@@ -1,7 +1,9 @@
 import argparse
-import os
 from unittest.mock import patch
 
+import mmcls  # noqa: F401
+import mmdet  # noqa: F401
+import mmseg  # noqa: F401
 import pytest
 import torch
 from mmcv.utils import Config
@@ -162,36 +164,33 @@ def test_discrete_test_function(mock_report):
         assert isinstance(get_session().get('result'), float)
 
 
-@patch.object(MMSegmentation, 'train_model')
-@patch.object(MMSegmentation, 'build_model')
-@patch.object(MMSegmentation, 'build_dataset')
-def test_mmseg(*not_used):
-    os.environ['LOCAL_RANK'] = '0'
-
-    task = MMSegmentation()
-    task.set_args(['tests/data/config.py'])
+@patch('mmcls.apis.train_model')
+@patch('mmcls.datasets.build_dataset')
+@patch('mmcls.models.build_classifier')
+def test_mmcls(*not_used):
+    task = MMClassification()
+    task_args = ['tests/data/config.py']
+    task.set_args(task_args)
     task.run(args=task.args)
 
 
-@patch.object(MMDetection, 'train_model')
-@patch.object(MMDetection, 'build_model')
-@patch.object(MMDetection, 'build_dataset')
+@patch('mmdet.apis.train_detector')
+@patch('mmdet.datasets.build_dataset')
+@patch('mmdet.models.build_detector')
 def test_mmdet(*not_used):
-    os.environ['LOCAL_RANK'] = '0'
-
     task = MMDetection()
-    task.set_args(['tests/data/config.py'])
+    task_args = ['tests/data/config.py']
+    task.set_args(task_args)
     task.run(args=task.args)
 
 
-@patch.object(MMClassification, 'train_model')
-@patch.object(MMClassification, 'build_model')
-@patch.object(MMClassification, 'build_dataset')
-def test_mmcls(*not_used):
-    os.environ['LOCAL_RANK'] = '0'
-
-    task = MMClassification()
-    task.set_args(['tests/data/config.py'])
+@patch('mmseg.apis.train_segmentor')
+@patch('mmseg.datasets.build_dataset')
+@patch('mmseg.models.build_segmentor')
+def test_mmseg(*not_used):
+    task = MMSegmentation()
+    task_args = ['tests/data/config.py']
+    task.set_args(task_args)
     task.run(args=task.args)
 
 

From d2ff007ccd80b1eabafa7c0c64bf4812cf5440f4 Mon Sep 17 00:00:00 2001
From: Junhwa Song <ethan9867@gmail.com>
Date: Fri, 16 Dec 2022 19:21:09 +0900
Subject: [PATCH 23/28] Fix minor (#100)

* Fix blocking issue at test_tasks.py

* Support single GPU tuning

* Bump FLAML to v1.0.14 to avoid deprecated warning
---
 requirements/optional.txt                 |   2 +-
 siatune/mm/tasks/mmtrainbase.py           |   4 +-
 tests/test_mm/test_tasks.py               | 280 ----------------------
 tests/test_mm/test_tasks/test_base.py     |  66 +++++
 tests/test_mm/test_tasks/test_blackbox.py |  80 +++++++
 tests/test_mm/test_tasks/test_mmtask.py   |  37 +++
 6 files changed, 185 insertions(+), 284 deletions(-)
 delete mode 100644 tests/test_mm/test_tasks.py
 create mode 100644 tests/test_mm/test_tasks/test_base.py
 create mode 100644 tests/test_mm/test_tasks/test_blackbox.py
 create mode 100644 tests/test_mm/test_tasks/test_mmtask.py

diff --git a/requirements/optional.txt b/requirements/optional.txt
index 5bddb703..4ce60537 100644
--- a/requirements/optional.txt
+++ b/requirements/optional.txt
@@ -1,5 +1,5 @@
 bayesian-optimization==1.2.0
-flaml==0.9.7
+flaml==1.0.14
 hyperopt==0.2.5
 mlflow==1.23.1
 nevergrad==0.4.3.post7
diff --git a/siatune/mm/tasks/mmtrainbase.py b/siatune/mm/tasks/mmtrainbase.py
index e1f71928..3f2607f3 100644
--- a/siatune/mm/tasks/mmtrainbase.py
+++ b/siatune/mm/tasks/mmtrainbase.py
@@ -28,8 +28,6 @@ def create_trainable(self) -> DataParallelTrainer:
             self.context_aware_run,
             backend_config=CustomBackendConfig(),
             scaling_config=ScalingConfig(
-                trainer_resources=dict(
-                    CPU=self.num_cpus_per_worker,
-                    GPU=self.num_gpus_per_worker),
+                trainer_resources=dict(CPU=self.num_cpus_per_worker),
                 num_workers=self.num_workers,
                 use_gpu=torch.cuda.is_available()))
diff --git a/tests/test_mm/test_tasks.py b/tests/test_mm/test_tasks.py
deleted file mode 100644
index a602f9ad..00000000
--- a/tests/test_mm/test_tasks.py
+++ /dev/null
@@ -1,280 +0,0 @@
-import argparse
-from unittest.mock import patch
-
-import mmcls  # noqa: F401
-import mmdet  # noqa: F401
-import mmseg  # noqa: F401
-import pytest
-import torch
-from mmcv.utils import Config
-from ray import tune
-from ray.air import session
-
-from siatune.mm.tasks import (TASKS, BaseTask, BlackBoxTask,
-                              ContinuousTestFunction, DiscreteTestFunction,
-                              MMClassification, MMDetection, MMSegmentation,
-                              MMTrainBasedTask, build_task_processor)
-from siatune.utils.config import dump_cfg
-
-_session = dict()
-
-
-def report_to_session(*args, **kwargs):
-    _session = get_session()
-    _session.update(kwargs)
-    for arg in args:
-        if isinstance(arg, dict):
-            _session.update(arg)
-
-
-def get_session():
-    global _session
-    return _session
-
-
-@patch('ray.tune.report', side_effect=report_to_session)
-def test_base_task(mock_report):
-    with pytest.raises(TypeError):
-        BaseTask()
-
-    class TestRewriter:
-
-        def __call__(self, context):
-            context.get('args').test = -1
-            return context
-
-    class TestTask(BaseTask):
-
-        def parse_args(self, *args, **kwargs):
-            return argparse.Namespace(test=1)
-
-        def run(self, *, args, **kwargs):
-            tune.report(test=args.test)
-            return args.test
-
-        def create_trainable(self):
-            return self.context_aware_run
-
-    task = TestTask([TestRewriter()])
-    task.set_args('')
-    assert task.args == argparse.Namespace(test=1)
-    assert isinstance(task.rewriters, list)
-    task.context_aware_run({})
-    assert get_session().get('test') == -1
-
-    tune.run(task.create_trainable(), config={})
-
-
-def test_black_box_task():
-    with pytest.raises(TypeError):
-        BlackBoxTask()
-
-    class TestTask(BlackBoxTask):
-
-        def run(self, *args, **kwargs):
-            tune.report(test=1)
-
-    task = TestTask()
-    task.set_args('')
-    assert task.args == argparse.Namespace()
-    tune.run(task.create_trainable(), config={})
-
-
-def test_build_task_processor():
-
-    class TestTaks(BaseTask):
-
-        def parse_args(self, *args, **kwargs):
-            pass
-
-        def run(self, *args, **kwargs):
-            pass
-
-        def create_trainable(self, *args, **kwargs):
-            pass
-
-    TASKS.register_module(TestTaks)
-    assert isinstance(build_task_processor(dict(type='TestTaks')), TestTaks)
-
-
-@patch('ray.tune.report', side_effect=report_to_session)
-def test_continuous_test_function(mock_report):
-    func = ContinuousTestFunction()
-    predefined_cont_funcs = [
-        'delayedsphere',
-        'sphere',
-        'sphere1',
-        'sphere2',
-        'sphere4',
-        'maxdeceptive',
-        'sumdeceptive',
-        'altcigar',
-        'discus',
-        'cigar',
-        'bentcigar',
-        'multipeak',
-        'altellipsoid',
-        'stepellipsoid',
-        'ellipsoid',
-        'rastrigin',
-        'bucherastrigin',
-        'doublelinearslope',
-        'stepdoublelinearslope',
-        'hm',
-        'rosenbrock',
-        'ackley',
-        'schwefel_1_2',
-        'griewank',
-        'deceptiveillcond',
-        'deceptivepath',
-        'deceptivemultimodal',
-        'lunacek',
-        'genzcornerpeak',
-        'minusgenzcornerpeak',
-        'genzgaussianpeakintegral',
-        'minusgenzgaussianpeakintegral',
-        'slope',
-        'linear',
-        'st0',
-        'st1',
-        'st10',
-        'st100',
-    ]
-
-    for func_name in predefined_cont_funcs:
-        dump_cfg(
-            Config(dict(func=func_name, _variable0=0.0, _variable1=0.0)),
-            'test.py')
-        args = argparse.Namespace(config='test.py')
-        func.run(args=args)
-        assert isinstance(get_session().get('result'), float)
-
-
-@patch('ray.tune.report', side_effect=report_to_session)
-def test_discrete_test_function(mock_report):
-    func = DiscreteTestFunction()
-
-    predefined_discrete_funcs = ['onemax', 'leadingones', 'jump']
-    for func_name in predefined_discrete_funcs:
-        dump_cfg(
-            Config(dict(func=func_name, _variable0=0.0, _variable1=0.0)),
-            'test.py')
-        args = argparse.Namespace(config='test.py')
-        func.run(args=args)
-        assert isinstance(get_session().get('result'), float)
-
-
-@patch('mmcls.apis.train_model')
-@patch('mmcls.datasets.build_dataset')
-@patch('mmcls.models.build_classifier')
-def test_mmcls(*not_used):
-    task = MMClassification()
-    task_args = ['tests/data/config.py']
-    task.set_args(task_args)
-    task.run(args=task.args)
-
-
-@patch('mmdet.apis.train_detector')
-@patch('mmdet.datasets.build_dataset')
-@patch('mmdet.models.build_detector')
-def test_mmdet(*not_used):
-    task = MMDetection()
-    task_args = ['tests/data/config.py']
-    task.set_args(task_args)
-    task.run(args=task.args)
-
-
-@patch('mmseg.apis.train_segmentor')
-@patch('mmseg.datasets.build_dataset')
-@patch('mmseg.models.build_segmentor')
-def test_mmseg(*not_used):
-    task = MMSegmentation()
-    task_args = ['tests/data/config.py']
-    task.set_args(task_args)
-    task.run(args=task.args)
-
-
-@patch('ray.air.session.report', side_effect=report_to_session)
-def test_mm_train_based_task(mock_report):
-    with pytest.raises(TypeError):
-        MMTrainBasedTask()
-
-    class TestTask(MMTrainBasedTask):
-
-        def parse_args(self, args):
-            parser = argparse.ArgumentParser()
-            return parser.parse_args(args)
-
-        def build_model(self, cfg):
-
-            class Regression(torch.nn.Module):
-
-                def __init__(self, input_dim, output_dim):
-                    super().__init__()
-                    self.linear = torch.nn.Linear(input_dim, output_dim)
-
-                def forward(self, x):
-                    return self.linear(x)
-
-            return Regression(cfg.input_dim, cfg.output_dim)
-
-        def build_dataset(self, cfg):
-
-            class Dataset(torch.utils.data.Dataset):
-
-                def __init__(self, num_points):
-                    torch.manual_seed(0)
-                    self._x = torch.randn(num_points, 1)
-                    self._y = 2 * self._x + 1
-                    self.num_points = num_points
-
-                def __getitem__(self, index):
-                    return self._x[index], self._y[index]
-
-                def __len__(self):
-                    return self.num_points
-
-            return Dataset(cfg.num_points)
-
-        def train_model(self, model, dataset, cfg):
-            criterion = torch.nn.MSELoss()
-            optimizer = torch.optim.SGD(model.parameters(), lr=cfg.lr)
-            data_loader = torch.utils.data.DataLoader(
-                dataset, batch_size=cfg.batch_size)
-            for _ in range(cfg.num_epochs):
-                total_loss = 0.
-                for batch_idx, (data, target) in enumerate(data_loader):
-                    optimizer.zero_grad()
-                    output = model(data)
-                    loss = criterion(output, target)
-                    loss.backward()
-                    optimizer.step()
-                    total_loss += loss.item()
-                session.report(loss=total_loss / (batch_idx + 1))
-
-        def run(self, *, searched_cfg, **kwargs):
-            cfg = searched_cfg.get('cfg')
-            model = self.build_model(cfg.model)
-            dataset = self.build_dataset(cfg.data)
-            self.train_model(model, dataset, cfg.train)
-
-    cfg = Config(
-        dict(
-            model=dict(
-                input_dim=1,
-                output_dim=1,
-            ),
-            data=dict(num_points=128, ),
-            train=dict(
-                lr=0.1,
-                batch_size=32,
-                num_epochs=4,
-            )))
-
-    task = TestTask()
-    task.set_resource()
-    task.context_aware_run(searched_cfg=dict(cfg=cfg))
-    assert 'loss' in get_session()
-
-    trainable = task.create_trainable()
-    tune.Tuner(trainable).fit()
diff --git a/tests/test_mm/test_tasks/test_base.py b/tests/test_mm/test_tasks/test_base.py
new file mode 100644
index 00000000..ee3f19ef
--- /dev/null
+++ b/tests/test_mm/test_tasks/test_base.py
@@ -0,0 +1,66 @@
+import argparse
+
+import pytest
+import ray
+from ray import tune
+from ray.tune.result_grid import ResultGrid
+
+from siatune.mm.tasks import TASKS, BaseTask, build_task_processor
+
+
+@pytest.fixture
+def init_ray():
+    if ray.is_initialized():
+        ray.shutdown()
+    return ray.init(num_cpus=1)
+
+
+def test_base_task(init_ray):
+    with pytest.raises(TypeError):
+        BaseTask()
+
+    class TestTask(BaseTask):
+
+        def parse_args(self, args):
+            parser = argparse.ArgumentParser()
+            parser.add_argument('test')
+            return parser.parse_args(args)
+
+        def run(self, args):
+            tune.report(test=args.test)
+
+        def create_trainable(self):
+            return self.context_aware_run
+
+    class TestRewriter:
+
+        def __call__(self, context):
+            args = context.pop('args')
+            args.test = 'success'
+            return dict(args=args)
+
+    task = TestTask(rewriters=[TestRewriter()])
+    task.set_args(['default'])
+    assert task.args == argparse.Namespace(test='default')
+
+    trainable = task.create_trainable()
+    results = ResultGrid(tune.run(trainable, config={}))
+    assert results[0].metrics['test'] == 'success'
+
+
+def test_build_task_processor():
+
+    @TASKS.register_module()
+    class TestTask(BaseTask):
+
+        def parse_args(self, args):
+            pass
+
+        def run(self, args):
+            pass
+
+        def create_trainable(self):
+            pass
+
+    task = build_task_processor(dict(type='TestTask', rewriters=[]))
+    assert isinstance(task, (BaseTask, TestTask))
diff --git a/tests/test_mm/test_tasks/test_blackbox.py b/tests/test_mm/test_tasks/test_blackbox.py
new file mode 100644
index 00000000..41bdfdad
--- /dev/null
+++ b/tests/test_mm/test_tasks/test_blackbox.py
@@ -0,0 +1,80 @@
+import argparse
+from unittest.mock import patch
+
+from mmcv.utils import Config
+
+from siatune.mm.tasks import ContinuousTestFunction, DiscreteTestFunction
+from siatune.utils.config import dump_cfg
+
+session = dict()
+
+
+def report_to_session(**kwargs):
+    session.update(kwargs)
+
+
+@patch('ray.tune.report', side_effect=report_to_session)
+def test_continuous_test_function(init_ray):
+    func = ContinuousTestFunction()
+    predefined_cont_funcs = [
+        'delayedsphere',
+        'sphere',
+        'sphere1',
+        'sphere2',
+        'sphere4',
+        'maxdeceptive',
+        'sumdeceptive',
+        'altcigar',
+        'discus',
+        'cigar',
+        'bentcigar',
+        'multipeak',
+        'altellipsoid',
+        'stepellipsoid',
+        'ellipsoid',
+        'rastrigin',
+        'bucherastrigin',
+        'doublelinearslope',
+        'stepdoublelinearslope',
+        'hm',
+        'rosenbrock',
+        'ackley',
+        'schwefel_1_2',
+        'griewank',
+        'deceptiveillcond',
+        'deceptivepath',
+        'deceptivemultimodal',
+        'lunacek',
+        'genzcornerpeak',
+        'minusgenzcornerpeak',
+        'genzgaussianpeakintegral',
+        'minusgenzgaussianpeakintegral',
+        'slope',
+        'linear',
+        'st0',
+        'st1',
+        'st10',
+        'st100',
+    ]
+
+    for func_name in predefined_cont_funcs:
+        dump_cfg(
+            Config(dict(func=func_name, _variable0=0.0, _variable1=0.0)),
+            'test.py')
+        args = argparse.Namespace(config='test.py')
+        func.run(args=args)
+        assert isinstance(session['result'], float)
+
+
+@patch('ray.tune.report', side_effect=report_to_session)
+def test_discrete_test_function(init_ray):
+    func = DiscreteTestFunction()
+
+    predefined_discrete_funcs = ['onemax', 'leadingones', 'jump']
+    for func_name in predefined_discrete_funcs:
+        dump_cfg(
+            Config(dict(func=func_name, _variable0=0.0, _variable1=0.0)),
+            'test.py')
+        args = argparse.Namespace(config='test.py')
+        func.run(args=args)
+        assert isinstance(session['result'], float)
diff --git a/tests/test_mm/test_tasks/test_mmtask.py b/tests/test_mm/test_tasks/test_mmtask.py
new file mode 100644
index 00000000..01dda0fa
--- /dev/null
+++ b/tests/test_mm/test_tasks/test_mmtask.py
@@ -0,0 +1,37 @@
+from unittest.mock import patch
+
+import mmcls  # noqa: F401
+import mmdet  # noqa: F401
+import mmseg  # noqa: F401
+
+from siatune.mm.tasks import MMClassification, MMDetection, MMSegmentation
+
+
+@patch('mmcls.apis.train_model')
+@patch('mmcls.datasets.build_dataset')
+@patch('mmcls.models.build_classifier')
+def test_mmcls(*not_used):
+    task = MMClassification()
+    task_args = ['tests/data/config.py']
+    task.set_args(task_args)
+    task.run(args=task.args)
+
+
+@patch('mmdet.apis.train_detector')
+@patch('mmdet.datasets.build_dataset')
+@patch('mmdet.models.build_detector')
+def test_mmdet(*not_used):
+    task = MMDetection()
+    task_args = ['tests/data/config.py']
+    task.set_args(task_args)
+    task.run(args=task.args)
+
+
+@patch('mmseg.apis.train_segmentor')
+@patch('mmseg.datasets.build_dataset')
+@patch('mmseg.models.build_segmentor')
+def test_mmseg(*not_used):
+    task = MMSegmentation()
+    task_args = ['tests/data/config.py']
+    task.set_args(task_args)
+    task.run(args=task.args)

From e63911dde4d3f871f09531d2f920fe651f93cebb Mon Sep 17 00:00:00 2001
From: Junhwa Song <ethan9867@gmail.com>
Date: Sat, 17 Dec 2022 00:01:54 +0900
Subject: [PATCH 24/28] Update siatune/mm/tasks/mmtrainbase.py

Signed-off-by: Junhwa Song <ethan9867@gmail.com>
---
 siatune/mm/tasks/mmtrainbase.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/siatune/mm/tasks/mmtrainbase.py b/siatune/mm/tasks/mmtrainbase.py
index 3f2607f3..e9c4bb1e 100644
--- a/siatune/mm/tasks/mmtrainbase.py
+++ b/siatune/mm/tasks/mmtrainbase.py
@@ -21,9 +21,6 @@ def create_trainable(self) -> DataParallelTrainer:
             DataParallelTrainer: Trainer to optimize hyperparameter.
         """
 
-        assert self.num_workers == self.num_gpus_per_worker, (
-            '`num_workers` must be equal to `num_gpus_per_worker`.')
-
         return DataParallelTrainer(
             self.context_aware_run,
             backend_config=CustomBackendConfig(),

From 9d4f5e68af307453c8f457705711a2870192b3b3 Mon Sep 17 00:00:00 2001
From: KKIEEK <kkieek@KKIEEKui-MacBookPro.local>
Date: Sat, 17 Dec 2022 01:58:25 +0900
Subject: [PATCH 25/28] Fix typo

---
 tests/test_mm/test_tasks/test_blackbox.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_mm/test_tasks/test_blackbox.py b/tests/test_mm/test_tasks/test_blackbox.py
index 41bdfdad..84947d7a 100644
--- a/tests/test_mm/test_tasks/test_blackbox.py
+++ b/tests/test_mm/test_tasks/test_blackbox.py
@@ -14,7 +14,7 @@ def report_to_session(**kwargs):
 
 
 @patch('ray.tune.report', side_effect=report_to_session)
-def test_continuous_test_function(init_ray):
+def test_continuous_test_function(*not_used):
     func = ContinuousTestFunction()
     predefined_cont_funcs = [
         'delayedsphere',
@@ -67,7 +67,7 @@ def test_continuous_test_function(init_ray):
 
 
 @patch('ray.tune.report', side_effect=report_to_session)
-def test_discrete_test_function(init_ray):
+def test_discrete_test_function(*not_used):
     func = DiscreteTestFunction()
 
     predefined_discrete_funcs = ['onemax', 'leadingones', 'jump']

From cf5a79b8b45fc546b7648c33589b8342581337a9 Mon Sep 17 00:00:00 2001
From: Junhwa Song <ethan9867@gmail.com>
Date: Sat, 17 Dec 2022 15:39:45 +0900
Subject: [PATCH 26/28] Supplement documentations (#102)

---
 siatune/mm/tasks/mmcls.py |  9 +++++++-
 siatune/mm/tasks/mmdet.py |  9 +++++++-
 siatune/mm/tasks/mmseg.py |  9 +++++++-
 siatune/ray/tuner.py      | 47 +++++++++++++++++++++++----------------
 4 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/siatune/mm/tasks/mmcls.py b/siatune/mm/tasks/mmcls.py
index fdd8dc7a..c77485a1 100644
--- a/siatune/mm/tasks/mmcls.py
+++ b/siatune/mm/tasks/mmcls.py
@@ -18,7 +18,14 @@ class MMClassification(MMTrainBasedTask):
     It is modified from https://github.com/open-mmlab/mmclassification/blob/v0.23.2/tools/train.py
 
     Attributes:
-        args (Sequence[str]):
+        args (argparse.Namespace): The arguments for `tools/train.py`
+            script file. It is parsed by :method:`parse_args`.
+        num_workers (int): The number of workers to launch.
+        num_cpus_per_worker (int): The number of CPUs per worker.
+            Default to 1.
+        num_gpus_per_worker (int): The number of GPUs per worker.
+            Since it must be equal `num_workers` attribute, it is
+            not used in MMClassification.
     """
 
     VERSION = 'v0.23.2'
diff --git a/siatune/mm/tasks/mmdet.py b/siatune/mm/tasks/mmdet.py
index 6504d339..e19448f5 100644
--- a/siatune/mm/tasks/mmdet.py
+++ b/siatune/mm/tasks/mmdet.py
@@ -18,7 +18,14 @@ class MMDetection(MMTrainBasedTask):
     It is modified from https://github.com/open-mmlab/mmdetection/blob/v2.25.2/tools/train.py
 
     Attributes:
-        args (Sequence[str]):
+        args (argparse.Namespace): The arguments for `tools/train.py`
+            script file. It is parsed by :method:`parse_args`.
+        num_workers (int): The number of workers to launch.
+        num_cpus_per_worker (int): The number of CPUs per worker.
+            Default to 1.
+        num_gpus_per_worker (int): The number of GPUs per worker.
+            Since it must be equal `num_workers` attribute, it is
+            not used in MMDetection.
     """
 
     VERSION = 'v2.25.2'
diff --git a/siatune/mm/tasks/mmseg.py b/siatune/mm/tasks/mmseg.py
index 14d2b929..db17d687 100644
--- a/siatune/mm/tasks/mmseg.py
+++ b/siatune/mm/tasks/mmseg.py
@@ -18,7 +18,14 @@ class MMSegmentation(MMTrainBasedTask):
     It is modified from https://github.com/open-mmlab/mmsegmentation/blob/v0.25.0/tools/train.py
 
     Attributes:
-        args (Sequence[str]):
+        args (argparse.Namespace): The arguments for `tools/train.py`
+            script file. It is parsed by :method:`parse_args`.
+        num_workers (int): The number of workers to launch.
+        num_cpus_per_worker (int): The number of CPUs per worker.
+            Default to 1.
+        num_gpus_per_worker (int): The number of GPUs per worker.
+            Since it must be equal `num_workers` attribute, it is
+            not used in MMSegmentation.
     """
 
     VERSION = 'v0.25.0'
diff --git a/siatune/ray/tuner.py b/siatune/ray/tuner.py
index e1aaf079..045bbf5b 100644
--- a/siatune/ray/tuner.py
+++ b/siatune/ray/tuner.py
@@ -1,6 +1,7 @@
 # Copyright (c) SI-Analytics. All rights reserved.
 import copy
 import os.path as osp
+from typing import Any, Callable, Optional, Union
 
 from ray.air.config import RunConfig
 from ray.tune.tune_config import TuneConfig
@@ -14,27 +15,35 @@ class Tuner:
     """Wrapper class of :class:`ray.tune.tuner.Tuner`.
 
     Args:
-        trainable (Callable):
-        work_dir (str):
-        param_space (dict, optional):
-        tune_cfg (dict, optional):
-            Refer to https://github.com/ray-project/ray/blob/ray-2.1.0/python/ray/tune/tune_config.py for details.  # noqa
-        searcher (dict, optional):
-        trial_scheduler (dict, optional):
-        stopper (dict, optional):
-        callbacks (list, optional):
+        trainable (Callable): The trainable to be tuned.
+        work_dir (str): The working directory to save checkpoints. The logs
+            will be saved in the subdirectory of `work_dir`.
+        param_space (dict, optional): Search space of the tuning task.
+        tune_cfg (dict, optional): Tuning algorithm specific configs
+            except for `search_alg` and `scheduler`.
+            Refer to :class:`ray.tune.tune_config.TuneConfig` for more info.
+        searcher (dict, optional): Search algorithm for optimization.
+            Default to random search.
+            Refer to :module:`ray.tune.search` for more options.
+        trial_scheduler (dict, optional): Scheduler for executing the trial.
+            Default to FIFO scheduler.
+            Refer to :module:`ray.tune.schedulers` for more options.
+        stopper (dict, optional): Stop conditions to consider.
+            Refer to :class:`ray.tune.stopper.Stopper` for more info.
+        callbacks (dict | list, optional): Callbacks to invoke.
+            Refer to :class:``ray.tune.callback.Callback` for more info.
     """
 
     def __init__(
         self,
-        trainable,
-        work_dir,
-        param_space=None,
-        tune_cfg=None,
-        searcher=None,
-        trial_scheduler=None,
-        stopper=None,
-        callbacks=None,
+        trainable: Callable[[dict], Any],
+        work_dir: str,
+        param_space: Optional[dict] = None,
+        tune_cfg: Optional[dict] = None,
+        searcher: Optional[dict] = None,
+        trial_scheduler: Optional[dict] = None,
+        stopper: Optional[dict] = None,
+        callbacks: Optional[Union[dict, list]] = None,
     ):
         work_dir = osp.abspath(work_dir)
 
@@ -73,7 +82,7 @@ def __init__(
         )
 
     @classmethod
-    def from_cfg(cls, cfg, trainable):
+    def from_cfg(cls, cfg: dict, trainable: Callable[[dict], Any]):
         cfg = copy.deepcopy(cfg)
         tuner = cls(
             trainable,
@@ -89,7 +98,7 @@ def from_cfg(cls, cfg, trainable):
         return tuner
 
     @classmethod
-    def resume(cls, path, **kwargs):
+    def resume(cls, path: str, **kwargs):
         return cls.restore(path, **kwargs)
 
     def fit(self):

From ab040696b9d2dbb6bd7a955d7245ec15463915fb Mon Sep 17 00:00:00 2001
From: Junhwa Song <ethan9867@gmail.com>
Date: Sat, 17 Dec 2022 15:43:07 +0900
Subject: [PATCH 27/28] Update siatune/ray/tuner.py

Signed-off-by: Junhwa Song <ethan9867@gmail.com>
---
 siatune/ray/tuner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/siatune/ray/tuner.py b/siatune/ray/tuner.py
index 045bbf5b..08fe9a58 100644
--- a/siatune/ray/tuner.py
+++ b/siatune/ray/tuner.py
@@ -31,7 +31,7 @@ class Tuner:
         stopper (dict, optional): Stop conditions to consider.
             Refer to :class:`ray.tune.stopper.Stopper` for more info.
         callbacks (dict | list, optional): Callbacks to invoke.
-            Refer to :class:``ray.tune.callback.Callback` for more info.
+            Refer to :class:`ray.tune.callback.Callback` for more info.
     """
 
     def __init__(

From 9ac7b32d9f52aef10b89f9c2cf99ac78a1cb77c8 Mon Sep 17 00:00:00 2001
From: Junhwa Song <ethan9867@gmail.com>
Date: Mon, 19 Dec 2022 11:21:51 +0900
Subject: [PATCH 28/28] Support resume (#104)

---
 siatune/ray/tuner.py | 13 +++++++++----
 siatune/run.py       |  5 +++++
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/siatune/ray/tuner.py b/siatune/ray/tuner.py
index 08fe9a58..695078b2 100644
--- a/siatune/ray/tuner.py
+++ b/siatune/ray/tuner.py
@@ -32,6 +32,8 @@ class Tuner:
             Refer to :class:`ray.tune.stopper.Stopper` for more info.
         callbacks (dict | list, optional): Callbacks to invoke.
             Refer to :class:`ray.tune.callback.Callback` for more info.
+        resume (str, optional): The experiment path to resume.
+            Default to None.
     """
 
     def __init__(
@@ -44,6 +46,7 @@ def __init__(
         trial_scheduler: Optional[dict] = None,
         stopper: Optional[dict] = None,
         callbacks: Optional[Union[dict, list]] = None,
+        resume: Optional[str] = None,
     ):
         work_dir = osp.abspath(work_dir)
 
@@ -66,6 +69,8 @@ def __init__(
                 callbacks = [callbacks]
             callbacks = [build_callback(callback) for callback in callbacks]
 
+        self.resume = resume
+
         self.tuner = RayTuner(
             trainable,
             param_space=dict(train_loop_config=param_space),
@@ -93,13 +98,13 @@ def from_cfg(cls, cfg: dict, trainable: Callable[[dict], Any]):
             trial_scheduler=cfg.get('trial_scheduler', None),
             stopper=cfg.get('stopper', None),
             callbacks=cfg.get('callbacks', None),
+            resume=cfg.get('resume', None),
         )
 
         return tuner
 
-    @classmethod
-    def resume(cls, path: str, **kwargs):
-        return cls.restore(path, **kwargs)
-
     def fit(self):
+        if self.resume is not None:
+            self.tuner = RayTuner.restore(self.resume)
+
         return self.tuner.fit()
diff --git a/siatune/run.py b/siatune/run.py
index 50ed7708..21647776 100644
--- a/siatune/run.py
+++ b/siatune/run.py
@@ -21,6 +21,8 @@ def parse_args() -> Namespace:
     parser.add_argument('tune_config', help='tune config file path')
     parser.add_argument(
         '--work-dir', default=None, help='the dir to save logs and models')
+    parser.add_argument(
+        '--resume', default=None, help='the experiment path to resume')
     parser.add_argument(
         '--address',
         default=None,
@@ -89,6 +91,9 @@ def main() -> None:
     if hasattr(task_processor.args, 'work_dir'):
         task_processor.args.work_dir = tune_config.work_dir
 
+    if args.resume is not None:
+        tune_config.resume = args.resume
+
     ray.init(
         address=args.address, num_cpus=args.num_cpus, num_gpus=args.num_gpus)
     assert ray.is_initialized()