From 982fa4da6f8b4b355c55cd75769b6e077ce3b9b8 Mon Sep 17 00:00:00 2001
From: "jianlong.qu-ext" <jianlong.qu-EXT@mthreads.com>
Date: Mon, 17 Feb 2025 18:17:55 +0800
Subject: [PATCH 1/3] add mmdet tests

---
 mmdet/__init__.py                             |   2 +-
 mmdet/apis/inference.py                       |   5 +-
 mmdet/models/backbones/csp_darknet.py         |  11 +-
 mmdet/models/layers/se_layer.py               |  10 +-
 mmdet/models/losses/focal_loss.py             |   2 +
 .../assigners/iou2d_calculator.py             |   2 +-
 .../assigners/sim_ota_assigner.py             |  29 +-
 .../task_modules/samplers/random_sampler.py   |   8 +-
 .../samplers/score_hlr_sampler.py             |   7 +
 mmdet/models/task_modules/tracking/aflink.py  |   7 +
 mmdet/structures/bbox/bbox_overlaps.py        |   2 +-
 mmdet/utils/benchmark.py                      | 100 +++++--
 mmdet/utils/contextmanagers.py                | 236 ++++++++++------
 mmdet/utils/memory.py                         |   7 +-
 mmdet/utils/profiling.py                      |  18 +-
 tests/test_apis/test_inference.py             |  58 ++--
 .../test_hooks/test_mean_teacher_hook.py      |   3 +-
 tests/test_engine/test_runner/test_loops.py   |   3 +-
 .../test_detectors/test_cornernet.py          |  14 +-
 tests/test_models/test_detectors/test_glip.py |   8 +-
 .../test_detectors/test_kd_single_stage.py    |  17 +-
 .../test_detectors/test_maskformer.py         |  33 ++-
 .../test_panoptic_two_stage_segmentor.py      |  13 +-
 tests/test_models/test_detectors/test_rpn.py  |  27 +-
 .../test_detectors/test_single_stage.py       |  44 ++-
 .../test_single_stage_instance_seg.py         |  37 ++-
 .../test_detectors/test_two_stage.py          |  38 ++-
 tests/test_models/test_mot/test_byte_track.py |  17 +-
 tests/test_models/test_mot/test_deep_sort.py  |   9 +-
 tests/test_models/test_mot/test_oc_sort.py    |  13 +-
 tests/test_models/test_mot/test_qdtrack.py    |  19 +-
 tests/test_models/test_mot/test_sort.py       |   9 +-
 .../test_models/test_mot/test_strong_sort.py  |   9 +-
 .../test_necks/test_ct_resnet_neck.py         |  11 +
 .../test_bbox_heads/test_double_bbox_head.py  |   7 +-
 .../test_roi_heads/test_cascade_roi_head.py   | 131 ++++++---
 .../test_roi_heads/test_dynamic_roi_head.py   |   6 +-
 .../test_roi_heads/test_grid_roi_head.py      |  20 +-
 .../test_roi_heads/test_htc_roi_head.py       | 195 ++++++++-----
 .../test_mask_heads/test_coarse_mask_head.py  |   7 +-
 .../test_mask_heads/test_fcn_mask_head.py     |   6 +-
 .../test_feature_relay_head.py                |   6 +-
 .../test_fused_semantic_head.py               |   7 +-
 .../test_global_context_head.py               |   7 +-
 .../test_mask_heads/test_grid_head.py         |  15 +-
 .../test_mask_heads/test_htc_mask_head.py     |   6 +-
 .../test_mask_heads/test_maskiou_head.py      |  13 +-
 .../test_mask_heads/test_scnet_mask_head.py   |   6 +-
 .../test_scnet_semantic_head.py               |   7 +-
 .../test_mask_scoring_roI_head.py             | 259 ++++++++++++------
 .../test_multi_instance_roi_head.py           | 127 ++++++---
 .../test_roi_heads/test_pisa_roi_head.py      |   6 +-
 .../test_point_rend_roi_head.py               | 107 +++++---
 .../test_roi_heads/test_scnet_roi_head.py     | 196 ++++++++-----
 .../test_roi_heads/test_sparse_roi_head.py    | 166 +++++++----
 .../test_roi_heads/test_standard_roi_head.py  | 179 ++++++++----
 .../test_roi_heads/test_trident_roi_head.py   |  83 ++++--
 .../test_anchor_generator.py                  | 141 +++++++++-
 .../test_models/test_vis/test_mask2former.py  |  17 +-
 .../test_vis/test_masktrack_rcnn.py           |  19 +-
 .../test_bbox/test_base_boxes.py              |  18 +-
 tests/test_utils/test_benchmark.py            |   5 +-
 62 files changed, 1831 insertions(+), 759 deletions(-)

diff --git a/mmdet/__init__.py b/mmdet/__init__.py
index 3ac884ac8b4..49589e46b26 100644
--- a/mmdet/__init__.py
+++ b/mmdet/__init__.py
@@ -6,7 +6,7 @@
 from .version import __version__, version_info
 
 mmcv_minimum_version = '2.0.0rc4'
-mmcv_maximum_version = '2.2.0'
+mmcv_maximum_version = '2.2.1'
 mmcv_version = digit_version(mmcv.__version__)
 
 mmengine_minimum_version = '0.7.1'
diff --git a/mmdet/apis/inference.py b/mmdet/apis/inference.py
index 7e6f914ecab..e7008d1550e 100644
--- a/mmdet/apis/inference.py
+++ b/mmdet/apis/inference.py
@@ -22,7 +22,6 @@
 from ..structures import DetDataSample, SampleList
 from ..utils import get_test_pipeline_cfg
 
-
 def init_detector(
     config: Union[str, Path, Config],
     checkpoint: Optional[str] = None,
@@ -289,7 +288,9 @@ def inference_mot(model: nn.Module, img: np.ndarray, frame_id: int,
     test_pipeline = build_test_pipeline(cfg)
     data = test_pipeline(data)
 
-    if not next(model.parameters()).is_cuda:
+#   if not next(model.parameters()).is_cuda:
+    if not next(model.parameters()).is_cuda and not (next(model.parameters()).device.type =='musa'):
+    
         for m in model.modules():
             assert not isinstance(
                 m, RoIPool
diff --git a/mmdet/models/backbones/csp_darknet.py b/mmdet/models/backbones/csp_darknet.py
index a890b486f25..03e43124d45 100644
--- a/mmdet/models/backbones/csp_darknet.py
+++ b/mmdet/models/backbones/csp_darknet.py
@@ -115,9 +115,14 @@ def __init__(self,
 
     def forward(self, x):
         x = self.conv1(x)
-        with torch.cuda.amp.autocast(enabled=False):
-            x = torch.cat(
-                [x] + [pooling(x) for pooling in self.poolings], dim=1)
+        if x.device.type == 'musa':
+            with torch_musa.core.amp.autocast(enabled=False):
+                x = torch.cat(
+                    [x] + [pooling(x) for pooling in self.poolings], dim=1)
+        else:
+            with torch.cuda.amp.autocast(enabled=False):
+                x = torch.cat(
+                    [x] + [pooling(x) for pooling in self.poolings], dim=1)
         x = self.conv2(x)
         return x
 
diff --git a/mmdet/models/layers/se_layer.py b/mmdet/models/layers/se_layer.py
index 5598dabaf6f..5b7a280809a 100644
--- a/mmdet/models/layers/se_layer.py
+++ b/mmdet/models/layers/se_layer.py
@@ -154,9 +154,13 @@ def __init__(self, channels: int, init_cfg: OptMultiConfig = None) -> None:
             self.act = nn.Hardsigmoid(inplace=True)
 
     def forward(self, x: Tensor) -> Tensor:
-        """Forward function for ChannelAttention."""
-        with torch.cuda.amp.autocast(enabled=False):
-            out = self.global_avgpool(x)
+        if x.device.type == 'musa':
+            with torch_musa.core.amp.autocast(enabled=False):
+                out = self.global_avgpool(x)
+        else:
+            """Forward function for ChannelAttention."""
+            with torch.cuda.amp.autocast(enabled=False):
+                out = self.global_avgpool(x)
         out = self.fc(out)
         out = self.act(out)
         return x * out
diff --git a/mmdet/models/losses/focal_loss.py b/mmdet/models/losses/focal_loss.py
index 15bef293a59..60f4e9bab10 100644
--- a/mmdet/models/losses/focal_loss.py
+++ b/mmdet/models/losses/focal_loss.py
@@ -234,6 +234,8 @@ def forward(self,
                     calculate_loss_func = py_sigmoid_focal_loss
                 elif torch.cuda.is_available() and pred.is_cuda:
                     calculate_loss_func = sigmoid_focal_loss
+                elif torch.musa.is_available() and pred.device.type == 'musa':
+                    calculate_loss_func = sigmoid_focal_loss
                 else:
                     num_classes = pred.size(1)
                     target = F.one_hot(target, num_classes=num_classes + 1)
diff --git a/mmdet/models/task_modules/assigners/iou2d_calculator.py b/mmdet/models/task_modules/assigners/iou2d_calculator.py
index b6daa94feb4..facb63a8acb 100644
--- a/mmdet/models/task_modules/assigners/iou2d_calculator.py
+++ b/mmdet/models/task_modules/assigners/iou2d_calculator.py
@@ -54,7 +54,7 @@ def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):
             bboxes1 = cast_tensor_type(bboxes1, self.scale, self.dtype)
             bboxes2 = cast_tensor_type(bboxes2, self.scale, self.dtype)
             overlaps = bbox_overlaps(bboxes1, bboxes2, mode, is_aligned)
-            if not overlaps.is_cuda and overlaps.dtype == torch.float16:
+            if not overlaps.is_cuda and overlaps.device.type != 'musa' and overlaps.dtype == torch.float16:
                 # resume cpu float32
                 overlaps = overlaps.float()
             return overlaps
diff --git a/mmdet/models/task_modules/assigners/sim_ota_assigner.py b/mmdet/models/task_modules/assigners/sim_ota_assigner.py
index d54a8b91d13..fc397a3514e 100644
--- a/mmdet/models/task_modules/assigners/sim_ota_assigner.py
+++ b/mmdet/models/task_modules/assigners/sim_ota_assigner.py
@@ -115,13 +115,28 @@ def assign(self,
 
         valid_pred_scores = valid_pred_scores.unsqueeze(1).repeat(1, num_gt, 1)
         # disable AMP autocast and calculate BCE with FP32 to avoid overflow
-        with torch.cuda.amp.autocast(enabled=False):
-            cls_cost = (
-                F.binary_cross_entropy(
-                    valid_pred_scores.to(dtype=torch.float32),
-                    gt_onehot_label,
-                    reduction='none',
-                ).sum(-1).to(dtype=valid_pred_scores.dtype))
+        try:
+            import torch_musa  
+            IS_MUSA_AVAILABLE = True
+        except Exception:
+            IS_MUSA_AVAILABLE = False
+
+        if IS_MUSA_AVAILABLE:
+            with torch_musa.core.amp.autocast(enabled=False):
+                cls_cost = (
+                    F.binary_cross_entropy(
+                        valid_pred_scores.to(dtype=torch.float32),
+                        gt_onehot_label,
+                        reduction='none',
+                    ).sum(-1).to(dtype=valid_pred_scores.dtype))
+        else:
+            with torch.cuda.amp.autocast(enabled=False):
+                cls_cost = (
+                    F.binary_cross_entropy(
+                        valid_pred_scores.to(dtype=torch.float32),
+                        gt_onehot_label,
+                        reduction='none',
+                    ).sum(-1).to(dtype=valid_pred_scores.dtype))
 
         cost_matrix = (
             cls_cost * self.cls_weight + iou_cost * self.iou_weight +
diff --git a/mmdet/models/task_modules/samplers/random_sampler.py b/mmdet/models/task_modules/samplers/random_sampler.py
index fa03665fc36..61a09dc6690 100644
--- a/mmdet/models/task_modules/samplers/random_sampler.py
+++ b/mmdet/models/task_modules/samplers/random_sampler.py
@@ -53,11 +53,17 @@ def random_choice(self, gallery: Union[Tensor, ndarray, list],
             Tensor or ndarray: sampled indices.
         """
         assert len(gallery) >= num
-
+        try:
+            import torch_musa  
+            IS_MUSA_AVAILABLE = True
+        except Exception:
+            IS_MUSA_AVAILABLE = False
         is_tensor = isinstance(gallery, torch.Tensor)
         if not is_tensor:
             if torch.cuda.is_available():
                 device = torch.cuda.current_device()
+            elif IS_MUSA_AVAILABLE:
+                device = torch.musa.current_device()
             else:
                 device = 'cpu'
             gallery = torch.tensor(gallery, dtype=torch.long, device=device)
diff --git a/mmdet/models/task_modules/samplers/score_hlr_sampler.py b/mmdet/models/task_modules/samplers/score_hlr_sampler.py
index 0227585b923..0d48c2e3840 100644
--- a/mmdet/models/task_modules/samplers/score_hlr_sampler.py
+++ b/mmdet/models/task_modules/samplers/score_hlr_sampler.py
@@ -89,10 +89,17 @@ def random_choice(gallery: Union[Tensor, ndarray, list],
         """
         assert len(gallery) >= num
 
+        try:
+            import torch_musa  
+            IS_MUSA_AVAILABLE = True
+        except Exception:
+            IS_MUSA_AVAILABLE = False
         is_tensor = isinstance(gallery, torch.Tensor)
         if not is_tensor:
             if torch.cuda.is_available():
                 device = torch.cuda.current_device()
+            elif IS_MUSA_AVAILABLE:
+                device = torch.musa.current_device()
             else:
                 device = 'cpu'
             gallery = torch.tensor(gallery, dtype=torch.long, device=device)
diff --git a/mmdet/models/task_modules/tracking/aflink.py b/mmdet/models/task_modules/tracking/aflink.py
index 52461067e37..d9f1b68e779 100644
--- a/mmdet/models/task_modules/tracking/aflink.py
+++ b/mmdet/models/task_modules/tracking/aflink.py
@@ -158,10 +158,17 @@ def __init__(self,
         self.confidence_threshold = confidence_threshold
 
         self.model = AFLinkModel()
+        try:
+            import torch_musa  
+            IS_MUSA_AVAILABLE = True
+        except Exception:
+            IS_MUSA_AVAILABLE = False
         if checkpoint:
             load_checkpoint(self.model, checkpoint)
         if torch.cuda.is_available():
             self.model.cuda()
+        elif torch.musa.is_available():
+            self.model.musa()
         self.model.eval()
 
         self.device = next(self.model.parameters()).device
diff --git a/mmdet/structures/bbox/bbox_overlaps.py b/mmdet/structures/bbox/bbox_overlaps.py
index 8e3435d28b3..bf2cbb30c3c 100644
--- a/mmdet/structures/bbox/bbox_overlaps.py
+++ b/mmdet/structures/bbox/bbox_overlaps.py
@@ -3,7 +3,7 @@
 
 
 def fp16_clamp(x, min=None, max=None):
-    if not x.is_cuda and x.dtype == torch.float16:
+    if not x.is_cuda and x.device.type != 'musa' and x.dtype == torch.float16:
         # clamp for cpu float16, tensor fp16 has no clamp implementation
         return x.float().clamp(min, max).half()
 
diff --git a/mmdet/utils/benchmark.py b/mmdet/utils/benchmark.py
index 5419b2d175e..e5efc065c7d 100644
--- a/mmdet/utils/benchmark.py
+++ b/mmdet/utils/benchmark.py
@@ -19,6 +19,7 @@
 from torch.nn.parallel import DistributedDataParallel
 
 from mmdet.registry import DATASETS, MODELS
+from mmengine.device.utils import is_musa_available
 
 try:
     import psutil
@@ -193,14 +194,23 @@ def _init_model(self, checkpoint: str, is_fuse_conv_bn: bool) -> nn.Module:
         if is_fuse_conv_bn:
             model = fuse_conv_bn(model)
 
-        model = model.cuda()
 
-        if self.distributed:
-            model = DistributedDataParallel(
-                model,
-                device_ids=[torch.cuda.current_device()],
-                broadcast_buffers=False,
-                find_unused_parameters=False)
+        if is_musa_available():
+            model = model.musa()
+            if self.distributed:
+                model = DistributedDataParallel(
+                    model,
+                    device_ids=[torch.musa.current_device()],
+                    broadcast_buffers=False,
+                    find_unused_parameters=False)
+        else :
+            model = model.cuda()
+            if self.distributed:
+                model = DistributedDataParallel(
+                    model,
+                    device_ids=[torch.cuda.current_device()],
+                    broadcast_buffers=False,
+                    find_unused_parameters=False)
 
         model.eval()
         return model
@@ -209,37 +219,69 @@ def run_once(self) -> dict:
         """Executes the benchmark once."""
         pure_inf_time = 0
         fps = 0
+        if is_musa_available():
+            for i, data in enumerate(self.data_loader):
 
-        for i, data in enumerate(self.data_loader):
+                if (i + 1) % self.log_interval == 0:
+                    print_log('==================================', self.logger)
 
-            if (i + 1) % self.log_interval == 0:
-                print_log('==================================', self.logger)
+                torch.musa.synchronize()
+                start_time = time.perf_counter()
 
-            torch.cuda.synchronize()
-            start_time = time.perf_counter()
+                with torch.no_grad():
+                    self.model.test_step(data)
 
-            with torch.no_grad():
-                self.model.test_step(data)
+                torch.musa.synchronize()
+                elapsed = time.perf_counter() - start_time
 
-            torch.cuda.synchronize()
-            elapsed = time.perf_counter() - start_time
+                if i >= self.num_warmup:
+                    pure_inf_time += elapsed
+                    if (i + 1) % self.log_interval == 0:
+                        fps = (i + 1 - self.num_warmup) / pure_inf_time
+                        musa_memory = get_max_musa_memory()
 
-            if i >= self.num_warmup:
-                pure_inf_time += elapsed
-                if (i + 1) % self.log_interval == 0:
+                        print_log(
+                            f'Done image [{i + 1:<3}/{self.max_iter}], '
+                            f'fps: {fps:.1f} img/s, '
+                            f'times per image: {1000 / fps:.1f} ms/img, '
+                            f'musa memory: {musa_memory} MB', self.logger)
+                        print_process_memory(self._process, self.logger)
+
+                if (i + 1) == self.max_iter:
                     fps = (i + 1 - self.num_warmup) / pure_inf_time
-                    cuda_memory = get_max_cuda_memory()
+                    break
 
-                    print_log(
-                        f'Done image [{i + 1:<3}/{self.max_iter}], '
-                        f'fps: {fps:.1f} img/s, '
-                        f'times per image: {1000 / fps:.1f} ms/img, '
-                        f'cuda memory: {cuda_memory} MB', self.logger)
-                    print_process_memory(self._process, self.logger)
+        else:
+            for i, data in enumerate(self.data_loader):
 
-            if (i + 1) == self.max_iter:
-                fps = (i + 1 - self.num_warmup) / pure_inf_time
-                break
+                if (i + 1) % self.log_interval == 0:
+                    print_log('==================================', self.logger)
+
+                torch.cuda.synchronize()
+                start_time = time.perf_counter()
+
+                with torch.no_grad():
+                    self.model.test_step(data)
+
+                torch.cuda.synchronize()
+                elapsed = time.perf_counter() - start_time
+
+                if i >= self.num_warmup:
+                    pure_inf_time += elapsed
+                    if (i + 1) % self.log_interval == 0:
+                        fps = (i + 1 - self.num_warmup) / pure_inf_time
+                        cuda_memory = get_max_cuda_memory()
+
+                        print_log(
+                            f'Done image [{i + 1:<3}/{self.max_iter}], '
+                            f'fps: {fps:.1f} img/s, '
+                            f'times per image: {1000 / fps:.1f} ms/img, '
+                            f'cuda memory: {cuda_memory} MB', self.logger)
+                        print_process_memory(self._process, self.logger)
+
+                if (i + 1) == self.max_iter:
+                    fps = (i + 1 - self.num_warmup) / pure_inf_time
+                    break
 
         return {'fps': fps}
 
diff --git a/mmdet/utils/contextmanagers.py b/mmdet/utils/contextmanagers.py
index fa12bfcaff1..a56d587faad 100644
--- a/mmdet/utils/contextmanagers.py
+++ b/mmdet/utils/contextmanagers.py
@@ -7,7 +7,7 @@
 from typing import List
 
 import torch
-
+from mmengine.device.utils import is_musa_available
 logger = logging.getLogger(__name__)
 
 DEBUG_COMPLETED_TIME = bool(os.environ.get('DEBUG_COMPLETED_TIME', False))
@@ -20,72 +20,135 @@ async def completed(trace_name='',
                     streams: List[torch.cuda.Stream] = None):
     """Async context manager that waits for work to complete on given CUDA
     streams."""
-    if not torch.cuda.is_available():
+    if not torch.cuda.is_available() and not is_musa_available():
         yield
         return
+    if is_musa_available():
+        stream_before_context_switch = torch.musa.current_stream()
+        if not streams:
+            streams = [stream_before_context_switch]
+        else:
+            streams = [s if s else stream_before_context_switch for s in streams]
 
-    stream_before_context_switch = torch.cuda.current_stream()
-    if not streams:
-        streams = [stream_before_context_switch]
-    else:
-        streams = [s if s else stream_before_context_switch for s in streams]
+        end_events = [
+            torch.musa.Event(enable_timing=DEBUG_COMPLETED_TIME) for _ in streams
+        ]
 
-    end_events = [
-        torch.cuda.Event(enable_timing=DEBUG_COMPLETED_TIME) for _ in streams
-    ]
+        if DEBUG_COMPLETED_TIME:
+            start = torch.musa.Event(enable_timing=True)
+            stream_before_context_switch.record_event(start)
+
+            cpu_start = time.monotonic()
+        logger.debug('%s %s starting, streams: %s', trace_name, name, streams)
+        grad_enabled_before = torch.is_grad_enabled()
+        try:
+            yield
+        finally:
+            current_stream = torch.musa.current_stream()
+            assert current_stream == stream_before_context_switch
 
-    if DEBUG_COMPLETED_TIME:
-        start = torch.cuda.Event(enable_timing=True)
-        stream_before_context_switch.record_event(start)
+            if DEBUG_COMPLETED_TIME:
+                cpu_end = time.monotonic()
+            for i, stream in enumerate(streams):
+                event = end_events[i]
+                stream.record_event(event)
+
+            grad_enabled_after = torch.is_grad_enabled()
+
+            # observed change of torch.is_grad_enabled() during concurrent run of
+            # async_test_bboxes code
+            assert (grad_enabled_before == grad_enabled_after
+                    ), 'Unexpected is_grad_enabled() value change'
+
+            are_done = [e.query() for e in end_events]
+            logger.debug('%s %s completed: %s streams: %s', trace_name, name,
+                        are_done, streams)
+            with torch.musa.stream(stream_before_context_switch):
+                while not all(are_done):
+                    await asyncio.sleep(sleep_interval)
+                    are_done = [e.query() for e in end_events]
+                    logger.debug(
+                        '%s %s completed: %s streams: %s',
+                        trace_name,
+                        name,
+                        are_done,
+                        streams,
+                    )
+
+            current_stream = torch.musa.current_stream()
+            assert current_stream == stream_before_context_switch
+
+            if DEBUG_COMPLETED_TIME:
+                cpu_time = (cpu_end - cpu_start) * 1000
+                stream_times_ms = ''
+                for i, stream in enumerate(streams):
+                    elapsed_time = start.elapsed_time(end_events[i])
+                    stream_times_ms += f' {stream} {elapsed_time:.2f} ms'
+                logger.info('%s %s %.2f ms %s', trace_name, name, cpu_time,
+                            stream_times_ms)
+    else:
+        stream_before_context_switch = torch.cuda.current_stream()
+        if not streams:
+            streams = [stream_before_context_switch]
+        else:
+            streams = [s if s else stream_before_context_switch for s in streams]
 
-        cpu_start = time.monotonic()
-    logger.debug('%s %s starting, streams: %s', trace_name, name, streams)
-    grad_enabled_before = torch.is_grad_enabled()
-    try:
-        yield
-    finally:
-        current_stream = torch.cuda.current_stream()
-        assert current_stream == stream_before_context_switch
+        end_events = [
+            torch.cuda.Event(enable_timing=DEBUG_COMPLETED_TIME) for _ in streams
+        ]
 
         if DEBUG_COMPLETED_TIME:
-            cpu_end = time.monotonic()
-        for i, stream in enumerate(streams):
-            event = end_events[i]
-            stream.record_event(event)
-
-        grad_enabled_after = torch.is_grad_enabled()
-
-        # observed change of torch.is_grad_enabled() during concurrent run of
-        # async_test_bboxes code
-        assert (grad_enabled_before == grad_enabled_after
-                ), 'Unexpected is_grad_enabled() value change'
-
-        are_done = [e.query() for e in end_events]
-        logger.debug('%s %s completed: %s streams: %s', trace_name, name,
-                     are_done, streams)
-        with torch.cuda.stream(stream_before_context_switch):
-            while not all(are_done):
-                await asyncio.sleep(sleep_interval)
-                are_done = [e.query() for e in end_events]
-                logger.debug(
-                    '%s %s completed: %s streams: %s',
-                    trace_name,
-                    name,
-                    are_done,
-                    streams,
-                )
-
-        current_stream = torch.cuda.current_stream()
-        assert current_stream == stream_before_context_switch
+            start = torch.cuda.Event(enable_timing=True)
+            stream_before_context_switch.record_event(start)
 
-        if DEBUG_COMPLETED_TIME:
-            cpu_time = (cpu_end - cpu_start) * 1000
-            stream_times_ms = ''
+            cpu_start = time.monotonic()
+        logger.debug('%s %s starting, streams: %s', trace_name, name, streams)
+        grad_enabled_before = torch.is_grad_enabled()
+        try:
+            yield
+        finally:
+            current_stream = torch.cuda.current_stream()
+            assert current_stream == stream_before_context_switch
+
+            if DEBUG_COMPLETED_TIME:
+                cpu_end = time.monotonic()
             for i, stream in enumerate(streams):
-                elapsed_time = start.elapsed_time(end_events[i])
-                stream_times_ms += f' {stream} {elapsed_time:.2f} ms'
-            logger.info('%s %s %.2f ms %s', trace_name, name, cpu_time,
-                        stream_times_ms)
+                event = end_events[i]
+                stream.record_event(event)
+
+            grad_enabled_after = torch.is_grad_enabled()
+
+            # observed change of torch.is_grad_enabled() during concurrent run of
+            # async_test_bboxes code
+            assert (grad_enabled_before == grad_enabled_after
+                    ), 'Unexpected is_grad_enabled() value change'
+
+            are_done = [e.query() for e in end_events]
+            logger.debug('%s %s completed: %s streams: %s', trace_name, name,
+                        are_done, streams)
+            with torch.cuda.stream(stream_before_context_switch):
+                while not all(are_done):
+                    await asyncio.sleep(sleep_interval)
+                    are_done = [e.query() for e in end_events]
+                    logger.debug(
+                        '%s %s completed: %s streams: %s',
+                        trace_name,
+                        name,
+                        are_done,
+                        streams,
+                    )
+
+            current_stream = torch.cuda.current_stream()
+            assert current_stream == stream_before_context_switch
+
+            if DEBUG_COMPLETED_TIME:
+                cpu_time = (cpu_end - cpu_start) * 1000
+                stream_times_ms = ''
+                for i, stream in enumerate(streams):
+                    elapsed_time = start.elapsed_time(end_events[i])
+                    stream_times_ms += f' {stream} {elapsed_time:.2f} ms'
+                logger.info('%s %s %.2f ms %s', trace_name, name, cpu_time,
+                            stream_times_ms)
 
 
 @contextlib.asynccontextmanager
@@ -98,25 +161,44 @@ async def concurrent(streamqueue: asyncio.Queue,
 
     Queue tasks define the pool of streams used for concurrent execution.
     """
-    if not torch.cuda.is_available():
+    if not torch.cuda.is_available() and not is_musa_available():
         yield
         return
-
-    initial_stream = torch.cuda.current_stream()
-
-    with torch.cuda.stream(initial_stream):
-        stream = await streamqueue.get()
-        assert isinstance(stream, torch.cuda.Stream)
-
-        try:
-            with torch.cuda.stream(stream):
-                logger.debug('%s %s is starting, stream: %s', trace_name, name,
-                             stream)
-                yield
-                current = torch.cuda.current_stream()
-                assert current == stream
-                logger.debug('%s %s has finished, stream: %s', trace_name,
-                             name, stream)
-        finally:
-            streamqueue.task_done()
-            streamqueue.put_nowait(stream)
+    if is_musa_available():
+        initial_stream = torch.musa.current_stream()
+
+        with torch.musa.stream(initial_stream):
+            stream = await streamqueue.get()
+            assert isinstance(stream, torch.musa.Stream)
+
+            try:
+                with torch.musa.stream(stream):
+                    logger.debug('%s %s is starting, stream: %s', trace_name, name,
+                                stream)
+                    yield
+                    current = torch.musa.current_stream()
+                    assert current == stream
+                    logger.debug('%s %s has finished, stream: %s', trace_name,
+                                name, stream)
+            finally:
+                streamqueue.task_done()
+                streamqueue.put_nowait(stream)
+    else:
+        initial_stream = torch.cuda.current_stream()
+
+        with torch.cuda.stream(initial_stream):
+            stream = await streamqueue.get()
+            assert isinstance(stream, torch.cuda.Stream)
+
+            try:
+                with torch.cuda.stream(stream):
+                    logger.debug('%s %s is starting, stream: %s', trace_name, name,
+                                stream)
+                    yield
+                    current = torch.cuda.current_stream()
+                    assert current == stream
+                    logger.debug('%s %s has finished, stream: %s', trace_name,
+                                name, stream)
+            finally:
+                streamqueue.task_done()
+                streamqueue.put_nowait(stream)
diff --git a/mmdet/utils/memory.py b/mmdet/utils/memory.py
index b6f9cbc7f9e..af29d4df8b0 100644
--- a/mmdet/utils/memory.py
+++ b/mmdet/utils/memory.py
@@ -6,7 +6,7 @@
 
 import torch
 from mmengine.logging import MMLogger
-
+from mmengine.device.utils import is_musa_available
 
 def cast_tensor_type(inputs, src_type=None, dst_type=None):
     """Recursively convert Tensor in inputs from ``src_type`` to ``dst_type``.
@@ -143,7 +143,10 @@ def wrapped(*args, **kwargs):
                     return func(*args, **kwargs)
 
                 # Clear cache and retry
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.musa.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 with _ignore_torch_cuda_oom():
                     return func(*args, **kwargs)
 
diff --git a/mmdet/utils/profiling.py b/mmdet/utils/profiling.py
index 2f53f456c72..273f241b109 100644
--- a/mmdet/utils/profiling.py
+++ b/mmdet/utils/profiling.py
@@ -4,7 +4,7 @@
 import time
 
 import torch
-
+from mmengine.device.utils import is_musa_available
 if sys.version_info >= (3, 7):
 
     @contextlib.contextmanager
@@ -18,13 +18,19 @@ def profile_time(trace_name,
         Useful as a temporary context manager to find sweet spots of code
         suitable for async implementation.
         """
-        if (not enabled) or not torch.cuda.is_available():
+        if (not enabled) or not torch.cuda.is_available() and is_musa_available():
             yield
             return
-        stream = stream if stream else torch.cuda.current_stream()
-        end_stream = end_stream if end_stream else stream
-        start = torch.cuda.Event(enable_timing=True)
-        end = torch.cuda.Event(enable_timing=True)
+        if is_musa_available():
+            stream = stream if stream else torch.musa.current_stream()
+            end_stream = end_stream if end_stream else stream
+            start = torch.musa.Event(enable_timing=True)
+            end = torch.musa.Event(enable_timing=True)
+        else:
+            stream = stream if stream else torch.cuda.current_stream()
+            end_stream = end_stream if end_stream else stream
+            start = torch.cuda.Event(enable_timing=True)
+            end = torch.cuda.Event(enable_timing=True)
         stream.record_event(start)
         try:
             cpu_start = time.monotonic()
diff --git a/tests/test_apis/test_inference.py b/tests/test_apis/test_inference.py
index e42f86c64e8..a72710eb5a1 100644
--- a/tests/test_apis/test_inference.py
+++ b/tests/test_apis/test_inference.py
@@ -8,16 +8,25 @@
 from mmdet.apis import inference_detector, init_detector
 from mmdet.structures import DetDataSample
 from mmdet.utils import register_all_modules
-
+from mmengine.device.utils import is_musa_available
 # TODO: Waiting to fix multiple call error bug
 register_all_modules()
 
 
-@pytest.mark.parametrize('config,devices',
-                         [('configs/retinanet/retinanet_r18_fpn_1x_coco.py',
-                           ('cpu', 'cuda'))])
-def test_init_detector(config, devices):
-    assert all([device in ['cpu', 'cuda'] for device in devices])
+@pytest.mark.parametrize('config', ['configs/retinanet/retinanet_r18_fpn_1x_coco.py'])
+@pytest.mark.parametrize('device', [
+    'cpu',
+    pytest.param(
+        'cuda',
+        marks=pytest.mark.skipif(
+            not torch.cuda.is_available(), reason='requires cuda support')),
+    pytest.param(
+        'musa',
+        marks=pytest.mark.skipif(
+            not is_musa_available(), reason='requires musa support')),
+])
+def test_init_detector(config, device):
+    # assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
     project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
     project_dir = os.path.join(project_dir, '..')
@@ -32,28 +41,32 @@ def test_init_detector(config, devices):
                 init_cfg=dict(
                     type='Pretrained', checkpoint='torchvision://resnet18'))))
 
-    for device in devices:
-        if device == 'cuda' and not torch.cuda.is_available():
-            pytest.skip('test requires GPU and torch+cuda')
-
-        model = init_detector(
-            config_file, device=device, cfg_options=cfg_options)
+    # for device in devices:
+        # pytest.set_trace()
+        # if device == 'cuda' and not torch.cuda.is_available():
+        #     pytest.skip('test requires GPU and torch+cuda')
+        # elif device == 'musa' and not is_musa_available():
+        #     print('$$$$$$$$$$$$$$$$$$$$$$$')
+        #     pytest.skip('test requires GPU and torch+musa')
+    model = init_detector(
+        config_file, device=device, cfg_options=cfg_options)
 
-        # test init_detector with :obj:`Path`
-        config_path_object = Path(config_file)
-        model = init_detector(config_path_object, device=device)
+    # test init_detector with :obj:`Path`
+    config_path_object = Path(config_file)
+    model = init_detector(config_path_object, device=device)
 
-        # test init_detector with undesirable type
-        with pytest.raises(TypeError):
-            config_list = [config_file]
-            model = init_detector(config_list)  # noqa: F841
+    # test init_detector with undesirable type
+    # pytest.set_trace()
+    with pytest.raises(TypeError):
+        config_list = [config_file]
+        model = init_detector(config_list)  # noqa: F841
 
 
 @pytest.mark.parametrize('config,devices',
                          [('configs/retinanet/retinanet_r18_fpn_1x_coco.py',
-                           ('cpu', 'cuda'))])
+                           ('cpu', 'cuda','musa'))])
 def test_inference_detector(config, devices):
-    assert all([device in ['cpu', 'cuda'] for device in devices])
+    assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
     project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
     project_dir = os.path.join(project_dir, '..')
@@ -68,7 +81,8 @@ def test_inference_detector(config, devices):
     for device in devices:
         if device == 'cuda' and not torch.cuda.is_available():
             pytest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa' and not is_musa_available():
+            pytest.skip('test requires GPU and torch+musa')
         model = init_detector(config_file, device=device)
         result = inference_detector(model, img1)
         assert isinstance(result, DetDataSample)
diff --git a/tests/test_engine/test_hooks/test_mean_teacher_hook.py b/tests/test_engine/test_hooks/test_mean_teacher_hook.py
index 41d056e4071..4692234a96e 100644
--- a/tests/test_engine/test_hooks/test_mean_teacher_hook.py
+++ b/tests/test_engine/test_hooks/test_mean_teacher_hook.py
@@ -11,6 +11,7 @@
 from mmengine.registry import MODEL_WRAPPERS
 from mmengine.runner import Runner
 from torch.utils.data import Dataset
+from mmengine.device.utils import is_musa_available
 
 from mmdet.registry import DATASETS
 from mmdet.utils import register_all_modules
@@ -98,7 +99,7 @@ def tearDown(self):
         self.temp_dir.cleanup()
 
     def test_mean_teacher_hook(self):
-        device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+        device = 'cuda:0' if torch.cuda.is_available() else ('musa:0' if is_musa_available() else 'cpu')
         model = ToyModel2().to(device)
         runner = Runner(
             model=model,
diff --git a/tests/test_engine/test_runner/test_loops.py b/tests/test_engine/test_runner/test_loops.py
index 6bf9cb4795a..e17cbfbfbf7 100644
--- a/tests/test_engine/test_runner/test_loops.py
+++ b/tests/test_engine/test_runner/test_loops.py
@@ -10,6 +10,7 @@
 from mmengine.optim import OptimWrapper
 from mmengine.runner import Runner
 from torch.utils.data import Dataset
+from mmengine.device.utils import is_musa_available
 
 from mmdet.registry import DATASETS
 from mmdet.utils import register_all_modules
@@ -84,7 +85,7 @@ def tearDown(self):
         self.temp_dir.cleanup()
 
     def test_teacher_student_val_loop(self):
-        device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+        device = 'cuda:0' if torch.cuda.is_available() else ('musa:0' if is_musa_available() else 'cpu')
         model = ToyModel2().to(device)
         evaluator = Mock()
         evaluator.evaluate = Mock(return_value=dict(acc=0.5))
diff --git a/tests/test_models/test_detectors/test_cornernet.py b/tests/test_models/test_detectors/test_cornernet.py
index 10bc62649f6..0a161ee4abf 100644
--- a/tests/test_models/test_detectors/test_cornernet.py
+++ b/tests/test_models/test_detectors/test_cornernet.py
@@ -8,7 +8,7 @@
 from mmdet.structures import DetDataSample
 from mmdet.testing import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
-
+from mmengine.device.utils import is_musa_available
 
 class TestCornerNet(TestCase):
 
@@ -50,8 +50,8 @@ def test_init(self):
         self.assertTrue(detector.backbone is not None)
         self.assertTrue(not hasattr(detector, 'neck'))
 
-    @unittest.skipIf(not torch.cuda.is_available(),
-                     'test requires GPU and torch+cuda')
+    @unittest.skipIf(not torch.cuda.is_available() and not is_musa_available(),
+                     'test requires GPU and torch+cuda+musa')
     def test_cornernet_forward_loss_mode(self):
         from mmdet.registry import MODELS
         detector = MODELS.build(self.model_cfg)
@@ -62,8 +62,8 @@ def test_cornernet_forward_loss_mode(self):
         losses = detector.forward(**data, mode='loss')
         assert isinstance(losses, dict)
 
-    @unittest.skipIf(not torch.cuda.is_available(),
-                     'test requires GPU and torch+cuda')
+    @unittest.skipIf(not torch.cuda.is_available() and not is_musa_available(),
+                     'test requires GPU and torch+cuda+musa')
     def test_cornernet_forward_predict_mode(self):
         from mmdet.registry import MODELS
         detector = MODELS.build(self.model_cfg)
@@ -79,8 +79,8 @@ def test_cornernet_forward_predict_mode(self):
             assert len(batch_results) == 2
             assert isinstance(batch_results[0], DetDataSample)
 
-    @unittest.skipIf(not torch.cuda.is_available(),
-                     'test requires GPU and torch+cuda')
+    @unittest.skipIf(not torch.cuda.is_available() and not is_musa_available(),
+                     'test requires GPU and torch+cuda+musa')
     def test_cornernet_forward_tensor_mode(self):
         from mmdet.registry import MODELS
         detector = MODELS.build(self.model_cfg)
diff --git a/tests/test_models/test_detectors/test_glip.py b/tests/test_models/test_detectors/test_glip.py
index dc38d3142d2..863659040ae 100644
--- a/tests/test_models/test_detectors/test_glip.py
+++ b/tests/test_models/test_detectors/test_glip.py
@@ -8,6 +8,7 @@
 from mmdet.structures import DetDataSample
 from mmdet.testing import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
+from mmengine.device.utils import is_musa_available
 
 
 class TestGLIP(TestCase):
@@ -37,7 +38,7 @@ def test_glip_forward_predict_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -46,7 +47,10 @@ def test_glip_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
-
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
             # test custom_entities is True
             packed_inputs = demo_mm_inputs(
                 2, [[3, 128, 128], [3, 125, 130]],
diff --git a/tests/test_models/test_detectors/test_kd_single_stage.py b/tests/test_models/test_detectors/test_kd_single_stage.py
index 93d886263a8..a585788b795 100644
--- a/tests/test_models/test_detectors/test_kd_single_stage.py
+++ b/tests/test_models/test_detectors/test_kd_single_stage.py
@@ -9,6 +9,7 @@
 from mmdet.structures import DetDataSample
 from mmdet.testing import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
+from mmengine.device.utils import is_musa_available
 
 
 class TestKDSingleStageDetector(TestCase):
@@ -28,13 +29,14 @@ def test_init(self, cfg_file):
         self.assertTrue(detector.bbox_head)
 
     @parameterized.expand([('ld/ld_r18-gflv1-r101_fpn_1x_coco.py', ('cpu',
-                                                                    'cuda'))])
+                                                                    'cuda',
+                                                                    'musa'))])
     def test_single_stage_forward_train(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -43,6 +45,10 @@ def test_single_stage_forward_train(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            if device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, True)
@@ -57,7 +63,7 @@ def test_single_stage_forward_test(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -66,7 +72,10 @@ def test_single_stage_forward_test(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
-
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, False)
 
diff --git a/tests/test_models/test_detectors/test_maskformer.py b/tests/test_models/test_detectors/test_maskformer.py
index 3eeb04bfd55..23516553837 100644
--- a/tests/test_models/test_detectors/test_maskformer.py
+++ b/tests/test_models/test_detectors/test_maskformer.py
@@ -8,6 +8,7 @@
 from mmdet.structures import DetDataSample
 from mmdet.testing._utils import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
+from mmengine.device.utils import is_musa_available
 
 
 class TestMaskFormer(unittest.TestCase):
@@ -54,13 +55,15 @@ def test_init(self):
         assert detector.backbone
         assert detector.panoptic_head
 
-    @parameterized.expand([('cpu', ), ('cuda', )])
+    @parameterized.expand([('cpu', ), ('cuda', ),('musa',)])
     def test_forward_loss_mode(self, device):
         model_cfg = self._create_model_cfg()
         detector = MODELS.build(model_cfg)
 
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
 
         packed_inputs = demo_mm_inputs(
@@ -74,12 +77,14 @@ def test_forward_loss_mode(self, device):
         losses = detector.forward(**data, mode='loss')
         self.assertIsInstance(losses, dict)
 
-    @parameterized.expand([('cpu', ), ('cuda', )])
+    @parameterized.expand([('cpu', ), ('cuda', ), ('musa', )])
     def test_forward_predict_mode(self, device):
         model_cfg = self._create_model_cfg()
         detector = MODELS.build(model_cfg)
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')   
         detector = detector.to(device)
         packed_inputs = demo_mm_inputs(
             2,
@@ -95,12 +100,14 @@ def test_forward_predict_mode(self, device):
             self.assertEqual(len(batch_results), 2)
             self.assertIsInstance(batch_results[0], DetDataSample)
 
-    @parameterized.expand([('cpu', ), ('cuda', )])
+    @parameterized.expand([('cpu', ), ('cuda', ), ('musa', )])
     def test_forward_tensor_mode(self, device):
         model_cfg = self._create_model_cfg()
         detector = MODELS.build(model_cfg)
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
 
         packed_inputs = demo_mm_inputs(
@@ -162,7 +169,10 @@ def test_init(self):
         ('cpu', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
         ('cpu', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py'),
         ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
-        ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py')
+        ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py'),
+        ('musa', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
+        ('musa', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py')
+
     ])
     def test_forward_loss_mode(self, device, cfg_path):
         print(device, cfg_path)
@@ -172,6 +182,9 @@ def test_forward_loss_mode(self, device, cfg_path):
 
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+cuda')
+
         detector = detector.to(device)
 
         packed_inputs = demo_mm_inputs(
@@ -189,7 +202,9 @@ def test_forward_loss_mode(self, device, cfg_path):
         ('cpu', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
         ('cpu', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py'),
         ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
-        ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py')
+        ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py'),
+        ('musa', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
+        ('musa', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py')
     ])
     def test_forward_predict_mode(self, device, cfg_path):
         with_semantic = 'panoptic' in cfg_path
@@ -197,6 +212,8 @@ def test_forward_predict_mode(self, device, cfg_path):
         detector = MODELS.build(model_cfg)
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
         packed_inputs = demo_mm_inputs(
             2,
@@ -216,7 +233,9 @@ def test_forward_predict_mode(self, device, cfg_path):
         ('cpu', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
         ('cpu', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py'),
         ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
-        ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py')
+        ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py'),
+        ('musa', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
+        ('musa', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py')
     ])
     def test_forward_tensor_mode(self, device, cfg_path):
         with_semantic = 'panoptic' in cfg_path
@@ -224,6 +243,8 @@ def test_forward_tensor_mode(self, device, cfg_path):
         detector = MODELS.build(model_cfg)
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
 
         packed_inputs = demo_mm_inputs(
diff --git a/tests/test_models/test_detectors/test_panoptic_two_stage_segmentor.py b/tests/test_models/test_detectors/test_panoptic_two_stage_segmentor.py
index 9234554f7b3..c08f61b2449 100644
--- a/tests/test_models/test_detectors/test_panoptic_two_stage_segmentor.py
+++ b/tests/test_models/test_detectors/test_panoptic_two_stage_segmentor.py
@@ -8,6 +8,7 @@
 from mmdet.structures import DetDataSample
 from mmdet.testing._utils import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
+from mmengine.device.utils import is_musa_available
 
 
 class TestTwoStagePanopticSegmentor(unittest.TestCase):
@@ -34,13 +35,15 @@ def test_init(self):
         assert detector.with_semantic_head
         assert detector.with_panoptic_fusion_head
 
-    @parameterized.expand([('cpu', ), ('cuda', )])
+    @parameterized.expand([('cpu', ), ('cuda', ), ('musa', )])
     def test_forward_loss_mode(self, device):
         model_cfg = self._create_model_cfg()
         detector = MODELS.build(model_cfg)
 
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
 
         packed_inputs = demo_mm_inputs(
@@ -54,12 +57,14 @@ def test_forward_loss_mode(self, device):
         losses = detector.forward(**data, mode='loss')
         self.assertIsInstance(losses, dict)
 
-    @parameterized.expand([('cpu', ), ('cuda', )])
+    @parameterized.expand([('cpu', ), ('cuda', ), ('musa', )])
     def test_forward_predict_mode(self, device):
         model_cfg = self._create_model_cfg()
         detector = MODELS.build(model_cfg)
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
         packed_inputs = demo_mm_inputs(
             2,
@@ -75,12 +80,14 @@ def test_forward_predict_mode(self, device):
             self.assertEqual(len(batch_results), 2)
             self.assertIsInstance(batch_results[0], DetDataSample)
 
-    @parameterized.expand([('cpu', ), ('cuda', )])
+    @parameterized.expand([('cpu', ), ('cuda', ), ('musa', )])
     def test_forward_tensor_mode(self, device):
         model_cfg = self._create_model_cfg()
         detector = MODELS.build(model_cfg)
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
 
         packed_inputs = demo_mm_inputs(
diff --git a/tests/test_models/test_detectors/test_rpn.py b/tests/test_models/test_detectors/test_rpn.py
index 60f7492a96b..97cabaa28b9 100644
--- a/tests/test_models/test_detectors/test_rpn.py
+++ b/tests/test_models/test_detectors/test_rpn.py
@@ -8,6 +8,7 @@
 from mmdet.structures import DetDataSample
 from mmdet.testing import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
+from mmengine.device.utils import is_musa_available
 
 
 class TestRPN(TestCase):
@@ -34,7 +35,7 @@ def test_init(self, cfg_file):
         detector = MODELS.build(model)
         self.assertEqual(detector.bbox_head.num_classes, 1)
 
-    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda'))])
+    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda', 'musa'))])
     def test_rpn_forward_loss_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         # backbone convert to ResNet18
@@ -43,7 +44,7 @@ def test_rpn_forward_loss_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -52,6 +53,10 @@ def test_rpn_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, True)
@@ -59,7 +64,7 @@ def test_rpn_forward_loss_mode(self, cfg_file, devices):
             losses = detector.forward(**data, mode='loss')
             self.assertIsInstance(losses, dict)
 
-    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda'))])
+    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda','musa'))])
     def test_rpn_forward_predict_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         # backbone convert to ResNet18
@@ -68,7 +73,7 @@ def test_rpn_forward_predict_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -77,6 +82,10 @@ def test_rpn_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, False)
@@ -87,7 +96,7 @@ def test_rpn_forward_predict_mode(self, cfg_file, devices):
                 self.assertEqual(len(batch_results), 2)
                 self.assertIsInstance(batch_results[0], DetDataSample)
 
-    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda'))])
+    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda','musa'))])
     def test_rpn_forward_tensor_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         # backbone convert to ResNet18
@@ -96,7 +105,7 @@ def test_rpn_forward_tensor_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -105,7 +114,11 @@ def test_rpn_forward_tensor_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
-
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
+                
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, False)
             batch_results = detector.forward(**data, mode='tensor')
diff --git a/tests/test_models/test_detectors/test_single_stage.py b/tests/test_models/test_detectors/test_single_stage.py
index 22dbd1a98cb..071a7ce977e 100644
--- a/tests/test_models/test_detectors/test_single_stage.py
+++ b/tests/test_models/test_detectors/test_single_stage.py
@@ -10,6 +10,7 @@
 from mmdet.structures import DetDataSample
 from mmdet.testing import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
+from mmengine.device.utils import is_musa_available
 
 
 class TestSingleStageDetector(TestCase):
@@ -36,11 +37,11 @@ def test_init(self, cfg_file):
         self.assertTrue(detector.bbox_head)
 
     @parameterized.expand([
-        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda')),
+        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda','musa')),
         ('centernet/centernet_r18_8xb16-crop512-140e_coco.py', ('cpu',
-                                                                'cuda')),
-        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda')),
-        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda')),
+                                                                'cuda','musa')),
+        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda','musa')),
+        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda','musa')),
     ])
     def test_single_stage_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -51,7 +52,7 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -61,6 +62,10 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, True)
@@ -68,18 +73,18 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
             self.assertIsInstance(losses, dict)
 
     @parameterized.expand([
-        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda')),
+        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda','musa')),
         ('centernet/centernet_r18_8xb16-crop512-140e_coco.py', ('cpu',
-                                                                'cuda')),
-        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda')),
-        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda')),
+                                                                'cuda','musa')),
+        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda','musa')),
+        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda','musa')),
     ])
     def test_single_stage_forward_predict_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -88,6 +93,10 @@ def test_single_stage_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, False)
@@ -99,18 +108,19 @@ def test_single_stage_forward_predict_mode(self, cfg_file, devices):
                 self.assertIsInstance(batch_results[0], DetDataSample)
 
     @parameterized.expand([
-        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda')),
+        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda','musa')),
         ('centernet/centernet_r18_8xb16-crop512-140e_coco.py', ('cpu',
-                                                                'cuda')),
-        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda')),
-        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda')),
+                                                                'cuda',
+                                                                'musa')),
+        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda','musa')),
+        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda','musa')),
     ])
     def test_single_stage_forward_tensor_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -119,6 +129,10 @@ def test_single_stage_forward_tensor_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, False)
diff --git a/tests/test_models/test_detectors/test_single_stage_instance_seg.py b/tests/test_models/test_detectors/test_single_stage_instance_seg.py
index 51530341241..d7927f38912 100644
--- a/tests/test_models/test_detectors/test_single_stage_instance_seg.py
+++ b/tests/test_models/test_detectors/test_single_stage_instance_seg.py
@@ -8,6 +8,7 @@
 from mmdet.structures import DetDataSample
 from mmdet.testing import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
+from mmengine.device.utils import is_musa_available
 
 
 class TestSingleStageInstanceSegmentor(TestCase):
@@ -33,9 +34,9 @@ def test_init(self, cfg_file):
             self.assertTrue(detector.bbox_head)
 
     @parameterized.expand([
-        ('solo/solo_r50_fpn_1x_coco.py', ('cpu', 'cuda')),
-        ('solov2/solov2-light_r18_fpn_ms-3x_coco.py', ('cpu', 'cuda')),
-        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda')),
+        ('solo/solo_r50_fpn_1x_coco.py', ('cpu', 'cuda','musa')),
+        ('solov2/solov2-light_r18_fpn_ms-3x_coco.py', ('cpu', 'cuda','musa')),
+        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda','musa')),
     ])
     def test_single_stage_forward_loss_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
@@ -45,7 +46,7 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -55,6 +56,10 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            if device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(
                 2, [[3, 128, 128], [3, 125, 130]], with_mask=True)
@@ -63,8 +68,8 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
             self.assertIsInstance(losses, dict)
 
     @parameterized.expand([
-        ('solo/decoupled-solo-light_r50_fpn_3x_coco.py', ('cpu', 'cuda')),
-        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda')),
+        ('solo/decoupled-solo-light_r50_fpn_3x_coco.py', ('cpu', 'cuda','musa')),
+        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda','musa')),
     ])
     def test_single_stage_forward_predict_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
@@ -74,7 +79,7 @@ def test_single_stage_forward_predict_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -83,6 +88,10 @@ def test_single_stage_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            if device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(
                 2, [[3, 128, 128], [3, 125, 130]], with_mask=True)
@@ -95,16 +104,16 @@ def test_single_stage_forward_predict_mode(self, cfg_file, devices):
                 self.assertIsInstance(batch_results[0], DetDataSample)
 
     @parameterized.expand([
-        ('solo/solo_r50_fpn_1x_coco.py', ('cpu', 'cuda')),
-        ('solov2/solov2_r50_fpn_1x_coco.py', ('cpu', 'cuda')),
-        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda')),
+        ('solo/solo_r50_fpn_1x_coco.py', ('cpu', 'cuda','musa')),
+        ('solov2/solov2_r50_fpn_1x_coco.py', ('cpu', 'cuda','musa')),
+        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda','musa')),
     ])
     def test_single_stage_forward_tensor_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -113,7 +122,11 @@ def test_single_stage_forward_tensor_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
-
+            if device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
+                
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, False)
             batch_results = detector.forward(**data, mode='tensor')
diff --git a/tests/test_models/test_detectors/test_two_stage.py b/tests/test_models/test_detectors/test_two_stage.py
index 5609c0821dc..e20cd81489e 100644
--- a/tests/test_models/test_detectors/test_two_stage.py
+++ b/tests/test_models/test_detectors/test_two_stage.py
@@ -8,7 +8,7 @@
 from mmdet.structures import DetDataSample
 from mmdet.testing import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
-
+from mmengine.device.utils import is_musa_available
 
 class TestTwoStageBBox(TestCase):
 
@@ -55,9 +55,12 @@ def test_two_stage_forward_loss_mode(self, cfg_file):
         from mmdet.registry import MODELS
         detector = MODELS.build(model)
 
-        if not torch.cuda.is_available():
-            return unittest.skip('test requires GPU and torch+cuda')
-        detector = detector.cuda()
+        if not torch.cuda.is_available() and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+cuda+musa')
+        if is_musa_available():
+            detector = detector.musa()
+        else:
+            detector = detector.cuda()
 
         packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
 
@@ -81,9 +84,12 @@ def test_two_stage_forward_predict_mode(self, cfg_file):
         from mmdet.registry import MODELS
         detector = MODELS.build(model)
 
-        if not torch.cuda.is_available():
-            return unittest.skip('test requires GPU and torch+cuda')
-        detector = detector.cuda()
+        if not torch.cuda.is_available() and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+cuda+musa')
+        if is_musa_available():
+            detector = detector.musa()
+        else:
+            detector = detector.cuda()
 
         packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
         data = detector.data_preprocessor(packed_inputs, False)
@@ -169,9 +175,12 @@ def test_two_stage_forward_loss_mode(self, cfg_file):
         from mmdet.registry import MODELS
         detector = MODELS.build(model)
 
-        if not torch.cuda.is_available():
-            return unittest.skip('test requires GPU and torch+cuda')
-        detector = detector.cuda()
+        if not torch.cuda.is_available() and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+cuda+musa')
+        if is_musa_available():
+            detector = detector.musa()
+        else:
+            detector = detector.cuda()
 
         packed_inputs = demo_mm_inputs(
             2, [[3, 128, 128], [3, 125, 130]], with_mask=True)
@@ -195,9 +204,12 @@ def test_two_stage_forward_predict_mode(self, cfg_file):
         from mmdet.registry import MODELS
         detector = MODELS.build(model)
 
-        if not torch.cuda.is_available():
-            return unittest.skip('test requires GPU and torch+cuda')
-        detector = detector.cuda()
+        if not torch.cuda.is_available() and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+cuda+musa')
+        if is_musa_available():
+            detector = detector.musa()
+        else:
+            detector = detector.cuda()
 
         packed_inputs = demo_mm_inputs(2, [[3, 256, 256], [3, 255, 260]])
         data = detector.data_preprocessor(packed_inputs, False)
diff --git a/tests/test_models/test_mot/test_byte_track.py b/tests/test_models/test_mot/test_byte_track.py
index a48548c7510..5d7c0e70796 100644
--- a/tests/test_models/test_mot/test_byte_track.py
+++ b/tests/test_models/test_mot/test_byte_track.py
@@ -10,6 +10,7 @@
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_track_inputs, get_detector_cfg
+from mmengine.device.utils import is_musa_available
 
 
 class TestByteTrack(TestCase):
@@ -33,14 +34,14 @@ def test_bytetrack_init(self, cfg_file):
 
     @parameterized.expand([
         ('bytetrack/bytetrack_yolox_x_8xb4-80e_crowdhuman-mot17halftrain_'
-         'test-mot17halfval.py', ('cpu', 'cuda')),
+         'test-mot17halfval.py', ('cpu', 'cuda','musa')),
     ])
     def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
             f'test_bytetrack_forward_loss_mode-{time.time()}')
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -56,6 +57,10 @@ def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = model.data_preprocessor(packed_inputs, True)
@@ -64,7 +69,7 @@ def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
 
     @parameterized.expand([
         ('bytetrack/bytetrack_yolox_x_8xb4-80e_crowdhuman-mot17halftrain_'
-         'test-mot17halfval.py', ('cpu', 'cuda')),
+         'test-mot17halfval.py', ('cpu', 'cuda','musa')),
     ])
     def test_bytetrack_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -72,7 +77,7 @@ def test_bytetrack_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -86,6 +91,10 @@ def test_bytetrack_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_track_inputs(
                 batch_size=1,
diff --git a/tests/test_models/test_mot/test_deep_sort.py b/tests/test_models/test_mot/test_deep_sort.py
index 72dfeb43510..99883b7e9b8 100644
--- a/tests/test_models/test_mot/test_deep_sort.py
+++ b/tests/test_models/test_mot/test_deep_sort.py
@@ -10,6 +10,7 @@
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_track_inputs, get_detector_cfg
+from mmengine.device.utils import is_musa_available
 
 
 class TestDeepSORT(TestCase):
@@ -31,7 +32,7 @@ def test_init(self, cfg_file):
 
     @parameterized.expand([
         ('deepsort/deepsort_faster-rcnn_r50_fpn_8xb2-4e'
-         '_mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda')),
+         '_mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda','musa')),
     ])
     def test_deepsort_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -39,7 +40,7 @@ def test_deepsort_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -49,6 +50,10 @@ def test_deepsort_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            if device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_track_inputs(
                 batch_size=1,
diff --git a/tests/test_models/test_mot/test_oc_sort.py b/tests/test_models/test_mot/test_oc_sort.py
index 5bf29513e00..2f6ad8005df 100644
--- a/tests/test_models/test_mot/test_oc_sort.py
+++ b/tests/test_models/test_mot/test_oc_sort.py
@@ -10,6 +10,7 @@
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_track_inputs, get_detector_cfg
+from mmengine.device.utils import is_musa_available
 
 
 class TestByteTrack(TestCase):
@@ -33,14 +34,14 @@ def test_bytetrack_init(self, cfg_file):
 
     @parameterized.expand([
         ('ocsort/ocsort_yolox_x_8xb4-amp-80e_crowdhuman-mot17halftrain_'
-         'test-mot17halfval.py', ('cpu', 'cuda')),
+         'test-mot17halfval.py', ('cpu', 'cuda','musa')),
     ])
     def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
             f'test_bytetrack_forward_loss_mode-{time.time()}')
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -56,6 +57,10 @@ def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            if device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = model.data_preprocessor(packed_inputs, True)
@@ -64,7 +69,7 @@ def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
 
     @parameterized.expand([
         ('ocsort/ocsort_yolox_x_8xb4-amp-80e_crowdhuman-mot17halftrain_'
-         'test-mot17halfval.py', ('cpu', 'cuda')),
+         'test-mot17halfval.py', ('cpu', 'cuda','musa')),
     ])
     def test_bytetrack_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -72,7 +77,7 @@ def test_bytetrack_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
diff --git a/tests/test_models/test_mot/test_qdtrack.py b/tests/test_models/test_mot/test_qdtrack.py
index 714e022fdec..79987fefd7c 100644
--- a/tests/test_models/test_mot/test_qdtrack.py
+++ b/tests/test_models/test_mot/test_qdtrack.py
@@ -10,6 +10,7 @@
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_track_inputs, get_detector_cfg
+from mmengine.device.utils import is_musa_available
 
 
 class TestQDTrack(TestCase):
@@ -31,14 +32,14 @@ def test_qdtrack_init(self, cfg_file):
 
     @parameterized.expand([
         ('qdtrack/qdtrack_faster-rcnn_r50_fpn_8xb2-4e_mot17'
-         'halftrain_test-mot17halfval.py', ('cpu', 'cuda')),
+         'halftrain_test-mot17halfval.py', ('cpu', 'cuda','musa')),
     ])
     def test_qdtrack_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
             f'test_qdtrack_forward_loss_mode-{time.time()}')
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -49,6 +50,10 @@ def test_qdtrack_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_track_inputs(
                 batch_size=1,
@@ -64,7 +69,7 @@ def test_qdtrack_forward_loss_mode(self, cfg_file, devices):
 
     @parameterized.expand([
         ('qdtrack/qdtrack_faster-rcnn_r50_fpn_8xb2-4e_mot17'
-         'halftrain_test-mot17halfval.py', ('cpu', 'cuda')),
+         'halftrain_test-mot17halfval.py', ('cpu', 'cuda','musa')),
     ])
     def test_qdtrack_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -72,7 +77,7 @@ def test_qdtrack_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -82,7 +87,11 @@ def test_qdtrack_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
-
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
+                
             packed_inputs = demo_track_inputs(
                 batch_size=1, num_frames=1, image_shapes=(3, 128, 128))
             out_data = model.data_preprocessor(packed_inputs, False)
diff --git a/tests/test_models/test_mot/test_sort.py b/tests/test_models/test_mot/test_sort.py
index ec15a6bdde2..88edd8f8a34 100644
--- a/tests/test_models/test_mot/test_sort.py
+++ b/tests/test_models/test_mot/test_sort.py
@@ -10,6 +10,7 @@
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_track_inputs, get_detector_cfg
+from mmengine.device.utils import is_musa_available
 
 
 class TestDeepSORT(TestCase):
@@ -30,7 +31,7 @@ def test_init(self, cfg_file):
 
     @parameterized.expand([
         ('sort/sort_faster-rcnn_r50_fpn_8xb2-4e'
-         '_mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda')),
+         '_mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda','musa')),
     ])
     def test_deepsort_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -38,7 +39,7 @@ def test_deepsort_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -48,6 +49,10 @@ def test_deepsort_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_track_inputs(
                 batch_size=1,
diff --git a/tests/test_models/test_mot/test_strong_sort.py b/tests/test_models/test_mot/test_strong_sort.py
index e0d48a1dbf2..ede520fe725 100644
--- a/tests/test_models/test_mot/test_strong_sort.py
+++ b/tests/test_models/test_mot/test_strong_sort.py
@@ -10,6 +10,7 @@
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_track_inputs, get_detector_cfg
+from mmengine.device.utils import is_musa_available
 
 
 class TestDeepSORT(TestCase):
@@ -40,7 +41,7 @@ def test_init(self, cfg_file):
 
     @parameterized.expand([
         ('strongsort/strongsort_yolox_x_8xb4-80e_crowdhuman'
-         '-mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda')),
+         '-mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda','musa')),
     ])
     def test_strongsort_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -48,7 +49,7 @@ def test_strongsort_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -68,6 +69,10 @@ def test_strongsort_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            if device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_track_inputs(
                 batch_size=1,
diff --git a/tests/test_models/test_necks/test_ct_resnet_neck.py b/tests/test_models/test_necks/test_ct_resnet_neck.py
index acc5258f0d2..12b7a31a237 100644
--- a/tests/test_models/test_necks/test_ct_resnet_neck.py
+++ b/tests/test_models/test_necks/test_ct_resnet_neck.py
@@ -4,6 +4,7 @@
 import torch
 
 from mmdet.models.necks import CTResNetNeck
+from mmengine.device.utils import is_musa_available
 
 
 class TestCTResNetNeck(unittest.TestCase):
@@ -51,3 +52,13 @@ def test_forward(self):
             feat = feat.cuda()
             out_feat = ct_resnet_neck([feat])[0]
             self.assertEqual(out_feat.shape, (1, num_filters[-1], 16, 16))
+        elif is_musa_available():
+            # test dcn
+            ct_resnet_neck = CTResNetNeck(
+                in_channels=in_channels,
+                num_deconv_filters=num_filters,
+                num_deconv_kernels=num_kernels)
+            ct_resnet_neck = ct_resnet_neck.musa()
+            feat = feat.musa()
+            out_feat = ct_resnet_neck([feat])[0]
+            self.assertEqual(out_feat.shape, (1, num_filters[-1], 16, 16))
diff --git a/tests/test_models/test_roi_heads/test_bbox_heads/test_double_bbox_head.py b/tests/test_models/test_roi_heads/test_bbox_heads/test_double_bbox_head.py
index 95aa02ee6bd..5f9139c8281 100644
--- a/tests/test_models/test_roi_heads/test_bbox_heads/test_double_bbox_head.py
+++ b/tests/test_models/test_roi_heads/test_bbox_heads/test_double_bbox_head.py
@@ -6,16 +6,19 @@
 from parameterized import parameterized
 
 from mmdet.models.roi_heads.bbox_heads import DoubleConvFCBBoxHead
+from mmengine.device.utils import is_musa_available
 
 
 class TestDoubleBboxHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_forward_loss(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         double_bbox_head = DoubleConvFCBBoxHead(
             num_convs=4,
             num_fcs=2,
diff --git a/tests/test_models/test_roi_heads/test_cascade_roi_head.py b/tests/test_models/test_roi_heads/test_cascade_roi_head.py
index 5918b0067f1..425a8eec7f5 100644
--- a/tests/test_models/test_roi_heads/test_cascade_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_cascade_roi_head.py
@@ -8,7 +8,7 @@
 from mmdet.models.roi_heads import StandardRoIHead  # noqa
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
-
+from mmengine.device.utils import is_musa_available
 
 class TestCascadeRoIHead(TestCase):
 
@@ -26,9 +26,9 @@ def test_init(self, cfg_file):
         ['cascade_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py'])
     def test_cascade_roi_head_loss(self, cfg_file):
         """Tests standard roi head loss when truth is empty and non-empty."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
         s = 256
         img_metas = [{
             'img_shape': (s, s, 3),
@@ -36,45 +36,90 @@ def test_cascade_roi_head_loss(self, cfg_file):
         }]
         roi_head_cfg = get_roi_head_cfg(cfg_file)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 1, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
+        
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
 
-        # When truth is non-empty then both cls, box, and mask loss
-        # should be nonzero for random inputs
-        img_shape_list = [(3, s, s) for _ in img_metas]
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
 
-        # When there is no truth, the cls loss should be nonzero but
-        # there should be no box and mask loss.
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss_cls' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
-            elif 'loss_bbox' in name or 'loss_mask' in name:
-                self.assertEqual(value.sum(), 0)
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
diff --git a/tests/test_models/test_roi_heads/test_dynamic_roi_head.py b/tests/test_models/test_roi_heads/test_dynamic_roi_head.py
index 8b4b44de699..b1105ef32a8 100644
--- a/tests/test_models/test_roi_heads/test_dynamic_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_dynamic_roi_head.py
@@ -8,6 +8,7 @@
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
 from mmdet.utils import register_all_modules
+from mmengine.device.utils import is_musa_available
 
 
 class TestDynamicRoIHead(TestCase):
@@ -21,12 +22,15 @@ def test_init(self):
         roi_head = MODELS.build(self.roi_head_cfg)
         self.assertTrue(roi_head.with_bbox)
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_dynamic_roi_head_loss(self, device):
         """Tests trident roi head predict."""
         if not torch.cuda.is_available() and device == 'cuda':
             # RoI pooling only support in GPU
             return unittest.skip('test requires GPU and torch+cuda')
+        elif not is_musa_available() and device == 'musa':
+            # RoI pooling only support in GPU
+            return unittest.skip('test requires GPU and torch+musa')
         roi_head = MODELS.build(self.roi_head_cfg)
         roi_head = roi_head.to(device=device)
         s = 256
diff --git a/tests/test_models/test_roi_heads/test_grid_roi_head.py b/tests/test_models/test_roi_heads/test_grid_roi_head.py
index fc2988760c8..ab8c4f76f40 100644
--- a/tests/test_models/test_roi_heads/test_grid_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_grid_roi_head.py
@@ -8,6 +8,7 @@
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
 from mmdet.utils import register_all_modules
+from mmengine.device.utils import is_musa_available
 
 
 class TestGridRoIHead(TestCase):
@@ -21,12 +22,15 @@ def test_init(self):
         roi_head = MODELS.build(self.roi_head_cfg)
         self.assertTrue(roi_head.with_bbox)
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_grid_roi_head_loss(self, device):
         """Tests trident roi head predict."""
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
 
         roi_head = MODELS.build(self.roi_head_cfg)
         roi_head = roi_head.to(device=device)
@@ -71,13 +75,16 @@ def test_grid_roi_head_loss(self, device):
             'loss_grid', out,
             'grid loss should be passed when there are no true boxes')
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_grid_roi_head_predict(self, device):
         """Tests trident roi head predict."""
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
+            
         roi_head = MODELS.build(self.roi_head_cfg)
         roi_head = roi_head.to(device=device)
         s = 256
@@ -99,13 +106,16 @@ def test_grid_roi_head_predict(self, device):
             image_shapes=image_shapes, num_proposals=100, device=device)
         roi_head.predict(feats, proposals_list, batch_data_samples)
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_grid_roi_head_forward(self, device):
         """Tests trident roi head forward."""
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
+            
         roi_head = MODELS.build(self.roi_head_cfg)
         roi_head = roi_head.to(device=device)
         s = 256
diff --git a/tests/test_models/test_roi_heads/test_htc_roi_head.py b/tests/test_models/test_roi_heads/test_htc_roi_head.py
index 37bb92f5787..4dc7ad31e47 100644
--- a/tests/test_models/test_roi_heads/test_htc_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_htc_roi_head.py
@@ -8,6 +8,7 @@
 from mmdet.models.roi_heads import HybridTaskCascadeRoIHead  # noqa
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
+from mmengine.device.utils import is_musa_available
 
 
 class TestHTCRoIHead(TestCase):
@@ -25,7 +26,7 @@ def test_init(self, cfg_file):
     @parameterized.expand(['htc/htc_r50_fpn_1x_coco.py'])
     def test_htc_roi_head_loss(self, cfg_file):
         """Tests htc roi head loss when truth is empty and non-empty."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
             return unittest.skip('test requires GPU and torch+cuda')
         s = 256
@@ -35,54 +36,100 @@ def test_htc_roi_head_loss(self, cfg_file):
         }]
         roi_head_cfg = get_roi_head_cfg(cfg_file)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
 
-        # When truth is non-empty then both cls, box, and mask loss
-        # should be nonzero for random inputs
-        img_shape_list = [(3, s, s) for _ in img_metas]
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            with_semantic=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='musa')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
 
-        # When there is no truth, the cls loss should be nonzero but
-        # there should be no box and mask loss.
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            with_semantic=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss_cls' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
-            elif 'loss_bbox' in name or 'loss_mask' in name:
-                self.assertEqual(value.sum(), 0)
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='musa')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
+        else:
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
 
     @parameterized.expand(['htc/htc_r50_fpn_1x_coco.py'])
     def test_htc_roi_head_predict(self, cfg_file):
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
             return unittest.skip('test requires GPU and torch+cuda')
         s = 256
@@ -92,23 +139,45 @@ def test_htc_roi_head_predict(self, cfg_file):
         }]
         roi_head_cfg = get_roi_head_cfg(cfg_file)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
+
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            results = roi_head.predict(
+                feats, proposal_list, batch_data_samples, rescale=True)
+            self.assertEqual(results[0].masks.shape[-2:], (s, s))
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
 
-        img_shape_list = [(3, s, s) for _ in img_metas]
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        results = roi_head.predict(
-            feats, proposal_list, batch_data_samples, rescale=True)
-        self.assertEqual(results[0].masks.shape[-2:], (s, s))
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            results = roi_head.predict(
+                feats, proposal_list, batch_data_samples, rescale=True)
+            self.assertEqual(results[0].masks.shape[-2:], (s, s))
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_coarse_mask_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_coarse_mask_head.py
index ffadc19ff2b..8e9e2721744 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_coarse_mask_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_coarse_mask_head.py
@@ -4,6 +4,7 @@
 from parameterized import parameterized
 
 from mmdet.models.roi_heads.mask_heads import CoarseMaskHead
+from mmengine.device.utils import is_musa_available
 
 
 class TestCoarseMaskHead(unittest.TestCase):
@@ -15,12 +16,14 @@ def test_init(self):
         with self.assertRaises(AssertionError):
             CoarseMaskHead(downsample_factor=0.5)
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_forward(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         x = torch.rand((1, 32, 7, 7)).to(device)
         mask_head = CoarseMaskHead(
             downsample_factor=2,
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_fcn_mask_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_fcn_mask_head.py
index e0b4ee9362b..c2ae0b81373 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_fcn_mask_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_fcn_mask_head.py
@@ -8,15 +8,19 @@
 from parameterized import parameterized
 
 from mmdet.models.roi_heads.mask_heads import FCNMaskHead
+from mmengine.device.utils import is_musa_available
 
 
 class TestFCNMaskHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_get_seg_masks(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         num_classes = 6
         mask_head = FCNMaskHead(
             num_convs=1,
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_feature_relay_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_feature_relay_head.py
index 4a182b842d9..e41fabbc8c2 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_feature_relay_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_feature_relay_head.py
@@ -7,15 +7,19 @@
 from torch import Tensor
 
 from mmdet.models.roi_heads.mask_heads import FeatureRelayHead
+from mmengine.device.utils import is_musa_available
 
 
 class TestFeatureRelayHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_forward(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         mask_head = FeatureRelayHead(in_channels=10, out_conv_channels=10)
 
         x = torch.rand((1, 10))
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_fused_semantic_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_fused_semantic_head.py
index 7f912d797eb..951276e83d3 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_fused_semantic_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_fused_semantic_head.py
@@ -7,16 +7,19 @@
 from torch import Tensor
 
 from mmdet.models.roi_heads.mask_heads import FusedSemanticHead
+from mmengine.device.utils import is_musa_available
 
 
 class TestFusedSemanticHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_forward_loss(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         semantic_head = FusedSemanticHead(
             num_ins=5,
             fusion_level=1,
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_global_context_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_global_context_head.py
index 32e85093501..df7e19530a3 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_global_context_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_global_context_head.py
@@ -7,16 +7,19 @@
 from torch import Tensor
 
 from mmdet.models.roi_heads.mask_heads import GlobalContextHead
+from mmengine.device.utils import is_musa_available
 
 
 class TestGlobalContextHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_forward_loss(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         head = GlobalContextHead(
             num_convs=1, in_channels=4, conv_out_channels=4, num_classes=10)
         feats = [
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_grid_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_grid_head.py
index 7a583cd4e61..f7f7c9faff9 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_grid_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_grid_head.py
@@ -11,16 +11,20 @@
 from mmdet.models.utils import unpack_gt_instances
 from mmdet.testing import (demo_mm_inputs, demo_mm_proposals,
                            demo_mm_sampling_results)
+from mmengine.device.utils import is_musa_available
 
 
 class TestGridHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_grid_head_loss(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
+        
         grid_head = GridHead()
         grid_head.to(device=device)
 
@@ -54,12 +58,15 @@ def test_grid_head_loss(self, device):
 
         grid_head.loss(grid_pred, sample_idx, sampling_results, train_cfg)
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_mask_iou_head_predict_by_feat(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        if device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
+        
         grid_head = GridHead()
         grid_head.to(device=device)
 
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_htc_mask_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_htc_mask_head.py
index 6927e618da4..1c301bc92e6 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_htc_mask_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_htc_mask_head.py
@@ -7,15 +7,19 @@
 from torch import Tensor
 
 from mmdet.models.roi_heads.mask_heads import HTCMaskHead
+from mmengine.device.utils import is_musa_available
 
 
 class TestHTCMaskHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_forward(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         num_classes = 6
         mask_head = HTCMaskHead(
             with_conv_res=True,
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_maskiou_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_maskiou_head.py
index 548147861d5..81034df8a68 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_maskiou_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_maskiou_head.py
@@ -12,16 +12,19 @@
 from mmdet.structures.mask import mask_target
 from mmdet.testing import (demo_mm_inputs, demo_mm_proposals,
                            demo_mm_sampling_results)
+from mmengine.device.utils import is_musa_available
 
 
 class TestMaskIoUHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_mask_iou_head_loss_and_target(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         mask_iou_head = MaskIoUHead(num_classes=4)
         mask_iou_head.to(device=device)
 
@@ -67,12 +70,14 @@ def test_mask_iou_head_loss_and_target(self, device):
                                       mask_targets, sampling_results,
                                       batch_gt_instances, train_cfg)
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_mask_iou_head_predict_by_feat(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         mask_iou_head = MaskIoUHead(num_classes=4)
         mask_iou_head.to(device=device)
 
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_mask_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_mask_head.py
index 4df9dc59e9b..009f9b9ec69 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_mask_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_mask_head.py
@@ -7,15 +7,19 @@
 from torch import Tensor
 
 from mmdet.models.roi_heads.mask_heads import SCNetMaskHead
+from mmengine.device.utils import is_musa_available
 
 
 class TestSCNetMaskHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_forward(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         num_classes = 6
         mask_head = SCNetMaskHead(
             conv_to_res=True,
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_semantic_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_semantic_head.py
index 84f787bb7f4..c1a2f78b275 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_semantic_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_semantic_head.py
@@ -7,16 +7,19 @@
 from torch import Tensor
 
 from mmdet.models.roi_heads.mask_heads import SCNetSemanticHead
+from mmengine.device.utils import is_musa_available
 
 
 class TestSCNetSemanticHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_forward_loss(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         semantic_head = SCNetSemanticHead(
             num_ins=5,
             fusion_level=1,
diff --git a/tests/test_models/test_roi_heads/test_mask_scoring_roI_head.py b/tests/test_models/test_roi_heads/test_mask_scoring_roI_head.py
index 458eb302b00..ba26c887b95 100644
--- a/tests/test_models/test_roi_heads/test_mask_scoring_roI_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_scoring_roI_head.py
@@ -7,6 +7,7 @@
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
 from mmdet.utils import register_all_modules
+from mmengine.device.utils import is_musa_available
 
 
 class TestMaskScoringRoiHead(TestCase):
@@ -24,101 +25,187 @@ def test_init(self):
 
     def test_mask_scoring_roi_head_loss(self):
         """Tests trident roi head predict."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
         roi_head = MODELS.build(self.roi_head_cfg)
-        roi_head = roi_head.cuda()
-        s = 256
-        feats = []
-        for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-
-        image_shapes = [(3, s, s)]
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-
-        out = roi_head.loss(feats, proposals_list, batch_data_samples)
-        loss_cls = out['loss_cls']
-        loss_bbox = out['loss_bbox']
-        loss_mask = out['loss_mask']
-        self.assertGreater(loss_cls.sum(), 0, 'cls loss should be non-zero')
-        self.assertGreater(loss_bbox.sum(), 0, 'box loss should be non-zero')
-        self.assertGreater(loss_mask.sum(), 0, 'mask loss should be non-zero')
-
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-
-        out = roi_head.loss(feats, proposals_list, batch_data_samples)
-        empty_cls_loss = out['loss_cls']
-        empty_bbox_loss = out['loss_bbox']
-        empty_mask_loss = out['loss_mask']
-        self.assertGreater(empty_cls_loss.sum(), 0,
-                           'cls loss should be non-zero')
-        self.assertEqual(
-            empty_bbox_loss.sum(), 0,
-            'there should be no box loss when there are no true boxes')
-        self.assertEqual(
-            empty_mask_loss.sum(), 0,
-            'there should be no mask loss when there are no true boxes')
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='musa'))
+
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            loss_cls = out['loss_cls']
+            loss_bbox = out['loss_bbox']
+            loss_mask = out['loss_mask']
+            self.assertGreater(loss_cls.sum(), 0, 'cls loss should be non-zero')
+            self.assertGreater(loss_bbox.sum(), 0, 'box loss should be non-zero')
+            self.assertGreater(loss_mask.sum(), 0, 'mask loss should be non-zero')
+
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            empty_cls_loss = out['loss_cls']
+            empty_bbox_loss = out['loss_bbox']
+            empty_mask_loss = out['loss_mask']
+            self.assertGreater(empty_cls_loss.sum(), 0,
+                            'cls loss should be non-zero')
+            self.assertEqual(
+                empty_bbox_loss.sum(), 0,
+                'there should be no box loss when there are no true boxes')
+            self.assertEqual(
+                empty_mask_loss.sum(), 0,
+                'there should be no mask loss when there are no true boxes')
+        else:
+            roi_head = roi_head.cuda()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='cuda'))
+
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            loss_cls = out['loss_cls']
+            loss_bbox = out['loss_bbox']
+            loss_mask = out['loss_mask']
+            self.assertGreater(loss_cls.sum(), 0, 'cls loss should be non-zero')
+            self.assertGreater(loss_bbox.sum(), 0, 'box loss should be non-zero')
+            self.assertGreater(loss_mask.sum(), 0, 'mask loss should be non-zero')
+
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            empty_cls_loss = out['loss_cls']
+            empty_bbox_loss = out['loss_bbox']
+            empty_mask_loss = out['loss_mask']
+            self.assertGreater(empty_cls_loss.sum(), 0,
+                            'cls loss should be non-zero')
+            self.assertEqual(
+                empty_bbox_loss.sum(), 0,
+                'there should be no box loss when there are no true boxes')
+            self.assertEqual(
+                empty_mask_loss.sum(), 0,
+                'there should be no mask loss when there are no true boxes')
 
     def test_mask_scoring_roi_head_predict(self):
         """Tests trident roi head predict."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
         roi_head = MODELS.build(self.roi_head_cfg)
-        roi_head = roi_head.cuda()
-        s = 256
-        feats = []
-        for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-
-        image_shapes = [(3, s, s)]
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-        roi_head.predict(feats, proposals_list, batch_data_samples)
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='musa'))
+
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+            roi_head.predict(feats, proposals_list, batch_data_samples)
+        else:
+            roi_head = roi_head.cuda()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='cuda'))
+
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+            roi_head.predict(feats, proposals_list, batch_data_samples)
 
     def test_mask_scoring_roi_head_forward(self):
         """Tests trident roi head forward."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
         roi_head = MODELS.build(self.roi_head_cfg)
-        roi_head = roi_head.cuda()
-        s = 256
-        feats = []
-        for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-
-        image_shapes = [(3, s, s)]
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-        roi_head.forward(feats, proposals_list)
+        if is_musa_available():
+            roi_head = roi_head.cuda()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='cuda'))
+
+            image_shapes = [(3, s, s)]
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+            roi_head.forward(feats, proposals_list)
+        else:
+            roi_head = roi_head.cuda()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='cuda'))
+
+            image_shapes = [(3, s, s)]
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+            roi_head.forward(feats, proposals_list)
diff --git a/tests/test_models/test_roi_heads/test_multi_instance_roi_head.py b/tests/test_models/test_roi_heads/test_multi_instance_roi_head.py
index df7734c5a95..00103bf9bcf 100644
--- a/tests/test_models/test_roi_heads/test_multi_instance_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_multi_instance_roi_head.py
@@ -8,6 +8,7 @@
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals
 from mmdet.utils import register_all_modules
+from mmengine.device.utils import is_musa_available
 
 register_all_modules()
 
@@ -81,49 +82,91 @@ def test_init(self):
     def test_standard_roi_head_loss(self):
         """Tests multi instance roi head loss when truth is empty and non-
         empty."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
         s = 256
         roi_head_cfg = _fake_roi_head()
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 1, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
-
-        # When truth is non-empty then emd loss should be nonzero for
-        # random inputs
-        image_shapes = [(3, s, s)]
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[1],
-            num_classes=4,
-            with_mask=False,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-
-        out = roi_head.loss(feats, proposals_list, batch_data_samples)
-        loss = out['loss_rcnn_emd']
-        self.assertGreater(loss.sum(), 0, 'loss should be non-zero')
-
-        # When there is no truth, the emd loss should be zero.
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-        out = roi_head.loss(feats, proposals_list, batch_data_samples)
-        empty_loss = out['loss_rcnn_emd']
-        self.assertEqual(
-            empty_loss.sum(), 0,
-            'there should be no emd loss when there are no true boxes')
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then emd loss should be nonzero for
+            # random inputs
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[1],
+                num_classes=4,
+                with_mask=False,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            loss = out['loss_rcnn_emd']
+            self.assertGreater(loss.sum(), 0, 'loss should be non-zero')
+
+            # When there is no truth, the emd loss should be zero.
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            empty_loss = out['loss_rcnn_emd']
+            self.assertEqual(
+                empty_loss.sum(), 0,
+                'there should be no emd loss when there are no true boxes')
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then emd loss should be nonzero for
+            # random inputs
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[1],
+                num_classes=4,
+                with_mask=False,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            loss = out['loss_rcnn_emd']
+            self.assertGreater(loss.sum(), 0, 'loss should be non-zero')
+
+            # When there is no truth, the emd loss should be zero.
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            empty_loss = out['loss_rcnn_emd']
+            self.assertEqual(
+                empty_loss.sum(), 0,
+                'there should be no emd loss when there are no true boxes')
diff --git a/tests/test_models/test_roi_heads/test_pisa_roi_head.py b/tests/test_models/test_roi_heads/test_pisa_roi_head.py
index 5820c3977c8..5f4686be6b7 100644
--- a/tests/test_models/test_roi_heads/test_pisa_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_pisa_roi_head.py
@@ -8,6 +8,7 @@
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
 from mmdet.utils import register_all_modules
+from mmengine.device.utils import is_musa_available
 
 
 class TestPISARoIHead(TestCase):
@@ -21,12 +22,15 @@ def test_init(self):
         roi_head = MODELS.build(self.roi_head_cfg)
         self.assertTrue(roi_head.with_bbox)
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda','musa'])
     def test_pisa_roi_head(self, device):
         """Tests trident roi head predict."""
         if not torch.cuda.is_available() and device == 'cuda':
             # RoI pooling only support in GPU
             return unittest.skip('test requires GPU and torch+cuda')
+        elif not is_musa_available() and device == 'musa':
+            # RoI pooling only support in GPU
+            return unittest.skip('test requires GPU and torch+musa')
         roi_head = MODELS.build(self.roi_head_cfg)
         roi_head = roi_head.to(device=device)
         s = 256
diff --git a/tests/test_models/test_roi_heads/test_point_rend_roi_head.py b/tests/test_models/test_roi_heads/test_point_rend_roi_head.py
index dce8bf498ea..1021ee452d1 100644
--- a/tests/test_models/test_roi_heads/test_point_rend_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_point_rend_roi_head.py
@@ -8,6 +8,7 @@
 from mmdet.models.roi_heads import PointRendRoIHead  # noqa
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
+from mmengine.device.utils import is_musa_available
 
 
 class TestHTCRoIHead(TestCase):
@@ -26,7 +27,7 @@ def test_init(self, cfg_file):
         ['point_rend/point-rend_r50-caffe_fpn_ms-1x_coco.py'])
     def test_point_rend_roi_head_loss(self, cfg_file):
         """Tests htc roi head loss when truth is empty and non-empty."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and is_musa_available():
             # RoI pooling only support in GPU
             return unittest.skip('test requires GPU and torch+cuda')
         s = 256
@@ -36,42 +37,80 @@ def test_point_rend_roi_head_loss(self, cfg_file):
         }]
         roi_head_cfg = get_roi_head_cfg(cfg_file)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
 
-        # When truth is non-empty then both cls, box, and mask loss
-        # should be nonzero for random inputs
-        img_shape_list = [img_meta['img_shape'] for img_meta in img_metas]
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [img_meta['img_shape'] for img_meta in img_metas]
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
 
-        # Positive rois must not be empty
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        with self.assertRaises(AssertionError):
+            # Positive rois must not be empty
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            with self.assertRaises(AssertionError):
+                out = roi_head.loss(feats, proposal_list, batch_data_samples)
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [img_meta['img_shape'] for img_meta in img_metas]
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
             out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+
+            # Positive rois must not be empty
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            with self.assertRaises(AssertionError):
+                out = roi_head.loss(feats, proposal_list, batch_data_samples)
 
     @parameterized.expand(
         ['point_rend/point-rend_r50-caffe_fpn_ms-1x_coco.py'])
diff --git a/tests/test_models/test_roi_heads/test_scnet_roi_head.py b/tests/test_models/test_roi_heads/test_scnet_roi_head.py
index 9f14530ba7b..c881dffcd82 100644
--- a/tests/test_models/test_roi_heads/test_scnet_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_scnet_roi_head.py
@@ -8,6 +8,7 @@
 from mmdet.models.roi_heads import SCNetRoIHead  # noqa
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
+from mmengine.device.utils import is_musa_available
 
 
 class TestSCNetRoIHead(TestCase):
@@ -27,7 +28,7 @@ def test_init(self, cfg_file):
     @parameterized.expand(['scnet/scnet_r50_fpn_1x_coco.py'])
     def test_scnet_roi_head_loss(self, cfg_file):
         """Tests htc roi head loss when truth is empty and non-empty."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and is_musa_available():
             # RoI pooling only support in GPU
             return unittest.skip('test requires GPU and torch+cuda')
         s = 256
@@ -37,54 +38,101 @@ def test_scnet_roi_head_loss(self, cfg_file):
         }]
         roi_head_cfg = get_roi_head_cfg(cfg_file)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
+        if is_musa_available():
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
 
-        # When truth is non-empty then both cls, box, and mask loss
-        # should be nonzero for random inputs
-        img_shape_list = [(3, s, s) for _ in img_metas]
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            with_semantic=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
 
-        # When there is no truth, the cls loss should be nonzero but
-        # there should be no box and mask loss.
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            with_semantic=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss_cls' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
-            elif 'loss_bbox' in name or 'loss_mask' in name:
-                self.assertEqual(value.sum(), 0)
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
+
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
 
     @parameterized.expand(['scnet/scnet_r50_fpn_1x_coco.py'])
     def test_scnet_roi_head_predict(self, cfg_file):
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and is_musa_available():
             # RoI pooling only support in GPU
             return unittest.skip('test requires GPU and torch+cuda')
         s = 256
@@ -94,23 +142,45 @@ def test_scnet_roi_head_predict(self, cfg_file):
         }]
         roi_head_cfg = get_roi_head_cfg(cfg_file)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
+
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            results = roi_head.predict(
+                feats, proposal_list, batch_data_samples, rescale=True)
+            self.assertEqual(results[0].masks.shape[-2:], (s, s))
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
 
-        img_shape_list = [(3, s, s) for _ in img_metas]
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        results = roi_head.predict(
-            feats, proposal_list, batch_data_samples, rescale=True)
-        self.assertEqual(results[0].masks.shape[-2:], (s, s))
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            results = roi_head.predict(
+                feats, proposal_list, batch_data_samples, rescale=True)
+            self.assertEqual(results[0].masks.shape[-2:], (s, s))
diff --git a/tests/test_models/test_roi_heads/test_sparse_roi_head.py b/tests/test_models/test_roi_heads/test_sparse_roi_head.py
index 1182786c0e0..4335800a9a4 100644
--- a/tests/test_models/test_roi_heads/test_sparse_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_sparse_roi_head.py
@@ -9,6 +9,7 @@
 from mmdet.models.roi_heads import StandardRoIHead  # noqa
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
+from mmengine.device.utils import is_musa_available
 
 
 class TestCascadeRoIHead(TestCase):
@@ -26,9 +27,9 @@ def test_init(self, cfg_file):
     @parameterized.expand(['queryinst/queryinst_r50_fpn_1x_coco.py'])
     def test_cascade_roi_head_loss(self, cfg_file):
         """Tests standard roi head loss when truth is empty and non-empty."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
         s = 256
         img_metas = [{
             'img_shape': (s, s, 3),
@@ -36,57 +37,114 @@ def test_cascade_roi_head_loss(self, cfg_file):
         }]
         roi_head_cfg = get_roi_head_cfg(cfg_file)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 1, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
 
-        # When truth is non-empty then both cls, box, and mask loss
-        # should be nonzero for random inputs
-        img_shape_list = [(3, s, s) for _ in img_metas]
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        # add import elements into proposal
-        init_proposal_features = nn.Embedding(100, 256).cuda().weight.clone()
-        for proposal in proposal_list:
-            proposal.features = init_proposal_features
-            proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
-                                                       s]]).repeat(100, 1)
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            # add import elements into proposal
+            init_proposal_features = nn.Embedding(100, 256).musa().weight.clone()
+            for proposal in proposal_list:
+                proposal.features = init_proposal_features
+                proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
+                                                        s]]).repeat(100, 1)
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
 
-        # When there is no truth, the cls loss should be nonzero but
-        # there should be no box and mask loss.
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        # add import elements into proposal
-        init_proposal_features = nn.Embedding(100, 256).cuda().weight.clone()
-        for proposal in proposal_list:
-            proposal.features = init_proposal_features
-            proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
-                                                       s]]).repeat(100, 1)
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss_cls' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
-            elif 'loss_bbox' in name or 'loss_mask' in name:
-                self.assertEqual(value.sum(), 0)
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            # add import elements into proposal
+            init_proposal_features = nn.Embedding(100, 256).musa().weight.clone()
+            for proposal in proposal_list:
+                proposal.features = init_proposal_features
+                proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
+                                                        s]]).repeat(100, 1)
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
+ 
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            # add import elements into proposal
+            init_proposal_features = nn.Embedding(100, 256).cuda().weight.clone()
+            for proposal in proposal_list:
+                proposal.features = init_proposal_features
+                proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
+                                                        s]]).repeat(100, 1)
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            # add import elements into proposal
+            init_proposal_features = nn.Embedding(100, 256).cuda().weight.clone()
+            for proposal in proposal_list:
+                proposal.features = init_proposal_features
+                proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
+                                                        s]]).repeat(100, 1)
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
diff --git a/tests/test_models/test_roi_heads/test_standard_roi_head.py b/tests/test_models/test_roi_heads/test_standard_roi_head.py
index 5ae95e28440..7661a7c16f5 100644
--- a/tests/test_models/test_roi_heads/test_standard_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_standard_roi_head.py
@@ -9,6 +9,7 @@
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals
 from mmdet.utils import register_all_modules
+from mmengine.device.utils import is_musa_available
 
 register_all_modules()
 
@@ -141,66 +142,126 @@ def test_init(self):
     @parameterized.expand([(False, ), (True, )])
     def test_standard_roi_head_loss(self, with_shared_head):
         """Tests standard roi head loss when truth is empty and non-empty."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
         s = 256
         roi_head_cfg = _fake_roi_head(with_shared_head=with_shared_head)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
-            if not with_shared_head:
-                feats.append(
-                    torch.rand(1, 1, s // (2**(i + 2)),
-                               s // (2**(i + 2))).to(device='cuda'))
-            else:
-                feats.append(
-                    torch.rand(1, 1024, s // (2**(i + 2)),
-                               s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
-
-        # When truth is non-empty then both cls, box, and mask loss
-        # should be nonzero for random inputs
-        image_shapes = [(3, s, s)]
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-
-        out = roi_head.loss(feats, proposals_list, batch_data_samples)
-        loss_cls = out['loss_cls']
-        loss_bbox = out['loss_bbox']
-        loss_mask = out['loss_mask']
-        self.assertGreater(loss_cls.sum(), 0, 'cls loss should be non-zero')
-        self.assertGreater(loss_bbox.sum(), 0, 'box loss should be non-zero')
-        self.assertGreater(loss_mask.sum(), 0, 'mask loss should be non-zero')
-
-        # When there is no truth, the cls loss should be nonzero but
-        # there should be no box and mask loss.
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-        out = roi_head.loss(feats, proposals_list, batch_data_samples)
-        empty_cls_loss = out['loss_cls']
-        empty_bbox_loss = out['loss_bbox']
-        empty_mask_loss = out['loss_mask']
-        self.assertGreater(empty_cls_loss.sum(), 0,
-                           'cls loss should be non-zero')
-        self.assertEqual(
-            empty_bbox_loss.sum(), 0,
-            'there should be no box loss when there are no true boxes')
-        self.assertEqual(
-            empty_mask_loss.sum(), 0,
-            'there should be no mask loss when there are no true boxes')
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                if not with_shared_head:
+                    feats.append(
+                        torch.rand(1, 1, s // (2**(i + 2)),
+                                s // (2**(i + 2))).to(device='musa'))
+                else:
+                    feats.append(
+                        torch.rand(1, 1024, s // (2**(i + 2)),
+                                s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            loss_cls = out['loss_cls']
+            loss_bbox = out['loss_bbox']
+            loss_mask = out['loss_mask']
+            self.assertGreater(loss_cls.sum(), 0, 'cls loss should be non-zero')
+            self.assertGreater(loss_bbox.sum(), 0, 'box loss should be non-zero')
+            self.assertGreater(loss_mask.sum(), 0, 'mask loss should be non-zero')
+
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            empty_cls_loss = out['loss_cls']
+            empty_bbox_loss = out['loss_bbox']
+            empty_mask_loss = out['loss_mask']
+            self.assertGreater(empty_cls_loss.sum(), 0,
+                            'cls loss should be non-zero')
+            self.assertEqual(
+                empty_bbox_loss.sum(), 0,
+                'there should be no box loss when there are no true boxes')
+            self.assertEqual(
+                empty_mask_loss.sum(), 0,
+                'there should be no mask loss when there are no true boxes')
+
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                if not with_shared_head:
+                    feats.append(
+                        torch.rand(1, 1, s // (2**(i + 2)),
+                                s // (2**(i + 2))).to(device='cuda'))
+                else:
+                    feats.append(
+                        torch.rand(1, 1024, s // (2**(i + 2)),
+                                s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            loss_cls = out['loss_cls']
+            loss_bbox = out['loss_bbox']
+            loss_mask = out['loss_mask']
+            self.assertGreater(loss_cls.sum(), 0, 'cls loss should be non-zero')
+            self.assertGreater(loss_bbox.sum(), 0, 'box loss should be non-zero')
+            self.assertGreater(loss_mask.sum(), 0, 'mask loss should be non-zero')
+
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            empty_cls_loss = out['loss_cls']
+            empty_bbox_loss = out['loss_bbox']
+            empty_mask_loss = out['loss_mask']
+            self.assertGreater(empty_cls_loss.sum(), 0,
+                            'cls loss should be non-zero')
+            self.assertEqual(
+                empty_bbox_loss.sum(), 0,
+                'there should be no box loss when there are no true boxes')
+            self.assertEqual(
+                empty_mask_loss.sum(), 0,
+                'there should be no mask loss when there are no true boxes')
diff --git a/tests/test_models/test_roi_heads/test_trident_roi_head.py b/tests/test_models/test_roi_heads/test_trident_roi_head.py
index a173b01066c..c749fb31fcd 100644
--- a/tests/test_models/test_roi_heads/test_trident_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_trident_roi_head.py
@@ -8,6 +8,7 @@
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
 from mmdet.utils import register_all_modules
+from mmengine.device.utils import is_musa_available
 
 
 class TestTridentRoIHead(TestCase):
@@ -25,34 +26,62 @@ def test_init(self):
 
     def test_trident_roi_head_predict(self):
         """Tests trident roi head predict."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
 
         roi_head_cfg = copy.deepcopy(self.roi_head_cfg)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        s = 256
-        feats = []
-        for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 1024, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-
-        image_shapes = [(3, s, s)]
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-        # When `test_branch_idx == 1`
-        roi_head.predict(feats, proposals_list, batch_data_samples)
-        # When `test_branch_idx == -1`
-        roi_head_cfg.test_branch_idx = -1
-        roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        roi_head.predict(feats, proposals_list, batch_data_samples)
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1024, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='musa'))
+
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+            # When `test_branch_idx == 1`
+            roi_head.predict(feats, proposals_list, batch_data_samples)
+            # When `test_branch_idx == -1`
+            roi_head_cfg.test_branch_idx = -1
+            roi_head = MODELS.build(roi_head_cfg)
+            roi_head = roi_head.musa()
+            roi_head.predict(feats, proposals_list, batch_data_samples)
+
+        else:
+            roi_head = roi_head.cuda()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1024, s // (2**(i + 2)),
+                            s // (2**(i + 2))).to(device='cuda'))
+
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+            # When `test_branch_idx == 1`
+            roi_head.predict(feats, proposals_list, batch_data_samples)
+            # When `test_branch_idx == -1`
+            roi_head_cfg.test_branch_idx = -1
+            roi_head = MODELS.build(roi_head_cfg)
+            roi_head = roi_head.cuda()
+            roi_head.predict(feats, proposals_list, batch_data_samples)
diff --git a/tests/test_models/test_task_modules/test_prior_generators/test_anchor_generator.py b/tests/test_models/test_task_modules/test_prior_generators/test_anchor_generator.py
index db0b60717bc..6a42e525354 100644
--- a/tests/test_models/test_task_modules/test_prior_generators/test_anchor_generator.py
+++ b/tests/test_models/test_task_modules/test_prior_generators/test_anchor_generator.py
@@ -7,6 +7,7 @@
 """
 import pytest
 import torch
+from mmengine.device.utils import is_musa_available
 
 
 def test_standard_points_generator():
@@ -102,6 +103,49 @@ def test_standard_points_generator():
 
         assert (priors_half_offset[0][0] - priors[0][0]).sum() == 4 * 0.5 * 2
         assert (priors_half_offset[1][0] - priors[1][0]).sum() == 10 * 0.5 * 2
+    elif is_musa_available():
+        anchor_generator_cfg = dict(
+            type='MlvlPointGenerator', strides=[4, 8], offset=0)
+        anchor_generator = build_prior_generator(anchor_generator_cfg)
+        assert anchor_generator is not None
+        # Square strides
+        mlvl_points = MlvlPointGenerator(strides=[4, 10], offset=0)
+        mlvl_points_half_stride_generator = MlvlPointGenerator(
+            strides=[4, 10], offset=0.5)
+        assert mlvl_points.num_levels == 2
+
+        # assert self.num_levels == len(featmap_sizes)
+        with pytest.raises(AssertionError):
+            mlvl_points.grid_priors(featmap_sizes=[(2, 2)], device='musa')
+        priors = mlvl_points.grid_priors(
+            featmap_sizes=[(2, 2), (4, 8)], device='musa')
+        priors_with_stride = mlvl_points.grid_priors(
+            featmap_sizes=[(2, 2), (4, 8)], with_stride=True, device='musa')
+        assert len(priors) == 2
+
+        # assert last dimension is (coord_x, coord_y, stride_w, stride_h).
+        assert priors_with_stride[0].size(1) == 4
+        assert priors_with_stride[0][0][2] == 4
+        assert priors_with_stride[0][0][3] == 4
+        assert priors_with_stride[1][0][2] == 10
+        assert priors_with_stride[1][0][3] == 10
+
+        stride_4_feat_2_2 = priors[0]
+        assert (stride_4_feat_2_2[1] - stride_4_feat_2_2[0]).sum() == 4
+        assert stride_4_feat_2_2.size(0) == 4
+        assert stride_4_feat_2_2.size(1) == 2
+
+        stride_10_feat_4_8 = priors[1]
+        assert (stride_10_feat_4_8[1] - stride_10_feat_4_8[0]).sum() == 10
+        assert stride_10_feat_4_8.size(0) == 4 * 8
+        assert stride_10_feat_4_8.size(1) == 2
+
+        # assert the offset of 0.5 * stride
+        priors_half_offset = mlvl_points_half_stride_generator.grid_priors(
+            featmap_sizes=[(2, 2), (4, 8)], device='musa')
+
+        assert (priors_half_offset[0][0] - priors[0][0]).sum() == 4 * 0.5 * 2
+        assert (priors_half_offset[1][0] - priors[1][0]).sum() == 10 * 0.5 * 2
 
 
 def test_sparse_prior():
@@ -118,7 +162,7 @@ def test_sparse_prior():
         level_idx=0,
         device='cpu')
 
-    assert not sparse_prior.is_cuda
+    assert not sparse_prior.is_cuda and not sparse_prior.is_musa
     assert (sparse_prior == grid_anchors[0][prior_indexs]).all()
     sparse_prior = mlvl_points.sparse_priors(
         prior_idxs=prior_indexs,
@@ -270,6 +314,91 @@ def test_sparse_prior():
                 featmap_size=featmap_sizes[i],
                 device='cuda')
             assert (sparse_yolo_anchors == yolo_anchors[i][prior_indexs]).all()
+    elif is_musa_available():
+        mlvl_points = MlvlPointGenerator(strides=[4, 10], offset=0)
+        prior_indexs = torch.Tensor([0, 3, 4, 5, 6, 7, 1, 2, 4, 5, 6,
+                                     9]).long().musa()
+
+        featmap_sizes = [(6, 8), (6, 4)]
+        grid_anchors = mlvl_points.grid_priors(
+            featmap_sizes=featmap_sizes, with_stride=False, device='musa')
+        sparse_prior = mlvl_points.sparse_priors(
+            prior_idxs=prior_indexs,
+            featmap_size=featmap_sizes[0],
+            level_idx=0,
+            device='musa')
+        assert (sparse_prior == grid_anchors[0][prior_indexs]).all()
+        sparse_prior = mlvl_points.sparse_priors(
+            prior_idxs=prior_indexs,
+            featmap_size=featmap_sizes[1],
+            level_idx=1,
+            device='musa')
+        assert (sparse_prior == grid_anchors[1][prior_indexs]).all()
+        assert sparse_prior.is_musa
+        mlvl_anchors = AnchorGenerator(
+            strides=[16, 32],
+            ratios=[1., 2.5],
+            scales=[1., 5.],
+            base_sizes=[4, 8])
+        prior_indexs = torch.Tensor([4, 5, 6, 7, 0, 2, 50, 4, 5, 6,
+                                     9]).long().to('musa')
+
+        featmap_sizes = [(13, 5), (16, 4)]
+        grid_anchors = mlvl_anchors.grid_priors(
+            featmap_sizes=featmap_sizes, device='musa')
+        sparse_prior = mlvl_anchors.sparse_priors(
+            prior_idxs=prior_indexs,
+            featmap_size=featmap_sizes[0],
+            level_idx=0,
+            device='musa')
+        assert (sparse_prior == grid_anchors[0][prior_indexs]).all()
+        sparse_prior = mlvl_anchors.sparse_priors(
+            prior_idxs=prior_indexs,
+            featmap_size=featmap_sizes[1],
+            level_idx=1,
+            device='musa')
+        assert (sparse_prior == grid_anchors[1][prior_indexs]).all()
+
+        # for ssd
+        from mmdet.models.task_modules.prior_generators import \
+            SSDAnchorGenerator
+        featmap_sizes = [(38, 38), (19, 19), (10, 10)]
+        anchor_generator = SSDAnchorGenerator(
+            scale_major=False,
+            input_size=300,
+            basesize_ratio_range=(0.15, 0.9),
+            strides=[8, 16, 32],
+            ratios=[[2], [2, 3], [2, 3]])
+        ssd_anchors = anchor_generator.grid_anchors(
+            featmap_sizes, device='musa')
+        for i in range(len(featmap_sizes)):
+            sparse_ssd_anchors = anchor_generator.sparse_priors(
+                prior_idxs=prior_indexs,
+                level_idx=i,
+                featmap_size=featmap_sizes[i],
+                device='musa')
+            assert (sparse_ssd_anchors == ssd_anchors[i][prior_indexs]).all()
+
+        # for yolo
+        from mmdet.models.task_modules.prior_generators import \
+            YOLOAnchorGenerator
+        featmap_sizes = [(38, 38), (19, 19), (10, 10)]
+        anchor_generator = YOLOAnchorGenerator(
+            strides=[32, 16, 8],
+            base_sizes=[
+                [(116, 90), (156, 198), (373, 326)],
+                [(30, 61), (62, 45), (59, 119)],
+                [(10, 13), (16, 30), (33, 23)],
+            ])
+        yolo_anchors = anchor_generator.grid_anchors(
+            featmap_sizes, device='musa')
+        for i in range(len(featmap_sizes)):
+            sparse_yolo_anchors = anchor_generator.sparse_priors(
+                prior_idxs=prior_indexs,
+                level_idx=i,
+                featmap_size=featmap_sizes[i],
+                device='musa')
+            assert (sparse_yolo_anchors == yolo_anchors[i][prior_indexs]).all()
 
 
 def test_standard_anchor_generator():
@@ -313,6 +442,8 @@ def test_ssd_anchor_generator():
     from mmdet.models.task_modules import build_anchor_generator
     if torch.cuda.is_available():
         device = 'cuda'
+    elif is_musa_available():
+        device = 'musa'
     else:
         device = 'cpu'
 
@@ -475,6 +606,8 @@ def test_anchor_generator_with_tuples():
     from mmdet.models.task_modules import build_anchor_generator
     if torch.cuda.is_available():
         device = 'cuda'
+    elif is_musa_available():
+        device = 'musa'
     else:
         device = 'cpu'
 
@@ -510,6 +643,8 @@ def test_yolo_anchor_generator():
     from mmdet.models.task_modules import build_anchor_generator
     if torch.cuda.is_available():
         device = 'cuda'
+    elif is_musa_available():
+        device = 'musa'
     else:
         device = 'cpu'
 
@@ -553,6 +688,8 @@ def test_retina_anchor():
     from mmdet.registry import MODELS
     if torch.cuda.is_available():
         device = 'cuda'
+    elif is_musa_available():
+        device = 'musa'
     else:
         device = 'cpu'
 
@@ -652,6 +789,8 @@ def test_guided_anchor():
     from mmdet.registry import MODELS
     if torch.cuda.is_available():
         device = 'cuda'
+    elif is_musa_available():
+        device = 'musa'
     else:
         device = 'cpu'
     # head configs modified from
diff --git a/tests/test_models/test_vis/test_mask2former.py b/tests/test_models/test_vis/test_mask2former.py
index c8d3474e9ca..fe900e36232 100644
--- a/tests/test_models/test_vis/test_mask2former.py
+++ b/tests/test_models/test_vis/test_mask2former.py
@@ -10,6 +10,7 @@
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_track_inputs, get_detector_cfg
+from mmengine.device.utils import is_musa_available
 
 
 class TestMask2Former(TestCase):
@@ -30,14 +31,14 @@ def test_mask2former_init(self, cfg_file):
 
     @parameterized.expand([
         ('mask2former_vis/mask2former_r50_8xb2-8e_youtubevis2021.py',
-         ('cpu', 'cuda')),
+         ('cpu', 'cuda','musa')),
     ])
     def test_mask2former_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
             f'test_mask2former_forward_loss_mode-{time.time()}')
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -48,6 +49,10 @@ def test_mask2former_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_track_inputs(
                 batch_size=1,
@@ -63,7 +68,7 @@ def test_mask2former_forward_loss_mode(self, cfg_file, devices):
 
     @parameterized.expand([
         ('mask2former_vis/mask2former_r50_8xb2-8e_youtubevis2021.py',
-         ('cpu', 'cuda')),
+         ('cpu', 'cuda','musa')),
     ])
     def test_mask2former_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -71,7 +76,7 @@ def test_mask2former_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -81,6 +86,10 @@ def test_mask2former_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_track_inputs(
                 batch_size=1,
diff --git a/tests/test_models/test_vis/test_masktrack_rcnn.py b/tests/test_models/test_vis/test_masktrack_rcnn.py
index fb94391f4d1..38e02835a6c 100644
--- a/tests/test_models/test_vis/test_masktrack_rcnn.py
+++ b/tests/test_models/test_vis/test_masktrack_rcnn.py
@@ -10,6 +10,7 @@
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_track_inputs, get_detector_cfg
+from mmengine.device.utils import is_musa_available
 
 
 class TestMaskTrackRCNN(TestCase):
@@ -32,14 +33,14 @@ def test_mask_track_rcnn_init(self, cfg_file):
     @parameterized.expand([
         (
             'masktrack_rcnn/masktrack-rcnn_mask-rcnn_r50_fpn_8xb1-12e_youtubevis2019.py',  # noqa: E501
-            ('cpu', 'cuda')),
+            ('cpu', 'cuda','musa')),
     ])
     def test_mask_track_rcnn_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
             f'test_mask_track_rcnn_forward_loss_mode-{time.time()}')
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -50,7 +51,10 @@ def test_mask_track_rcnn_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
-
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
             packed_inputs = demo_track_inputs(
                 batch_size=1,
                 num_frames=2,
@@ -66,7 +70,7 @@ def test_mask_track_rcnn_forward_loss_mode(self, cfg_file, devices):
     @parameterized.expand([
         (
             'masktrack_rcnn/masktrack-rcnn_mask-rcnn_r50_fpn_8xb1-12e_youtubevis2019.py',  # noqa: E501
-            ('cpu', 'cuda')),
+            ('cpu', 'cuda','musa')),
     ])
     def test_mask_track_rcnn_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -74,7 +78,7 @@ def test_mask_track_rcnn_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -84,7 +88,10 @@ def test_mask_track_rcnn_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
-
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
             packed_inputs = demo_track_inputs(
                 batch_size=1,
                 num_frames=1,
diff --git a/tests/test_structures/test_bbox/test_base_boxes.py b/tests/test_structures/test_bbox/test_base_boxes.py
index 651eeefe120..8ce025cc7e2 100644
--- a/tests/test_structures/test_bbox/test_base_boxes.py
+++ b/tests/test_structures/test_bbox/test_base_boxes.py
@@ -5,6 +5,7 @@
 from mmengine.testing import assert_allclose
 
 from .utils import ToyBaseBoxes
+from mmengine.device.utils import is_musa_available
 
 
 class TestBaseBoxes(TestCase):
@@ -19,7 +20,9 @@ def test_init(self):
         if torch.cuda.is_available():
             boxes = ToyBaseBoxes(box_tensor, device='cuda')
             self.assertTrue(boxes.tensor.is_cuda)
-
+        elif is_musa_available():
+            boxes = ToyBaseBoxes(box_tensor, device='musa')
+            self.assertTrue(boxes.tensor.is_musa)
         with self.assertRaises(AssertionError):
             box_tensor = torch.rand((4, ))
             boxes = ToyBaseBoxes(box_tensor)
@@ -147,15 +150,25 @@ def test_tensor_like_functions(self):
         if torch.cuda.is_available():
             new_boxes = boxes.to(device='cuda')
             self.assertTrue(new_boxes.tensor.is_cuda)
+        elif is_musa_available():
+            new_boxes = boxes.to(device='musa')
+            self.assertTrue(new_boxes.tensor.is_musa)
         # cpu
         if torch.cuda.is_available():
             new_boxes = boxes.to(device='cuda')
             new_boxes = new_boxes.cpu()
             self.assertFalse(new_boxes.tensor.is_cuda)
+        elif is_musa_available():
+            new_boxes = boxes.to(device='musa')
+            new_boxes = new_boxes.cpu()
+            self.assertFalse(new_boxes.tensor.is_musa)
         # cuda
         if torch.cuda.is_available():
             new_boxes = boxes.cuda()
             self.assertTrue(new_boxes.tensor.is_cuda)
+        elif is_musa_available():
+            new_boxes = boxes.to('musa')
+            self.assertTrue(new_boxes.tensor.is_musa)
         # clone
         boxes.clone()
         # detach
@@ -274,3 +287,6 @@ def test_misc(self):
         if torch.cuda.is_available():
             new_boxes = boxes.fake_boxes((3, 4, 4), device='cuda')
             self.assertTrue(new_boxes.tensor.is_cuda)
+        if is_musa_available():
+            new_boxes = boxes.fake_boxes((3, 4, 4), device='musa')
+            self.assertTrue(new_boxes.tensor.is_musa)
diff --git a/tests/test_utils/test_benchmark.py b/tests/test_utils/test_benchmark.py
index 939a7eca4e5..0a305f521e5 100644
--- a/tests/test_utils/test_benchmark.py
+++ b/tests/test_utils/test_benchmark.py
@@ -13,6 +13,7 @@
 from mmdet.utils import register_all_modules
 from mmdet.utils.benchmark import (DataLoaderBenchmark, DatasetBenchmark,
                                    InferenceBenchmark)
+from mmengine.device.utils import is_musa_available
 
 
 @MODELS.register_module()
@@ -83,8 +84,8 @@ def setUp(self) -> None:
         self.max_iter = 10
         self.log_interval = 5
 
-    @unittest.skipIf(not torch.cuda.is_available(),
-                     'test requires GPU and torch+cuda')
+    @unittest.skipIf(not torch.cuda.is_available() and not torch.cuda.is_available(),
+                     'test requires GPU and torch+cuda+musa')
     def test_init_and_run(self):
         checkpoint_path = os.path.join(tempfile.gettempdir(), 'checkpoint.pth')
         torch.save(ToyDetector().state_dict(), checkpoint_path)

From ca5eae06238f6b37c82b937b207f64e436b16fbc Mon Sep 17 00:00:00 2001
From: "jianlong.qu-ext" <jianlong.qu-EXT@mthreads.com>
Date: Tue, 18 Feb 2025 10:28:26 +0800
Subject: [PATCH 2/3] add code

---
 mmdet/apis/inference.py                       |  8 ++--
 .../assigners/sim_ota_assigner.py             |  2 +-
 .../task_modules/samplers/random_sampler.py   |  2 +-
 .../samplers/score_hlr_sampler.py             |  2 +-
 mmdet/models/task_modules/tracking/aflink.py  |  2 +-
 mmdet/utils/benchmark.py                      | 11 +++--
 mmdet/utils/contextmanagers.py                | 31 ++++++++-----
 mmdet/utils/memory.py                         |  3 +-
 mmdet/utils/profiling.py                      |  4 +-
 tests/test_apis/test_inference.py             | 25 +++++-----
 .../test_hooks/test_mean_teacher_hook.py      |  5 +-
 tests/test_engine/test_runner/test_loops.py   |  5 +-
 .../test_detectors/test_cornernet.py          |  3 +-
 tests/test_models/test_detectors/test_glip.py |  4 +-
 .../test_detectors/test_kd_single_stage.py    | 11 ++---
 .../test_detectors/test_maskformer.py         |  7 ++-
 .../test_panoptic_two_stage_segmentor.py      |  2 +-
 tests/test_models/test_detectors/test_rpn.py  | 19 ++++----
 .../test_detectors/test_single_stage.py       | 40 ++++++++--------
 .../test_single_stage_instance_seg.py         | 27 +++++------
 .../test_detectors/test_two_stage.py          |  3 +-
 tests/test_models/test_mot/test_byte_track.py | 10 ++--
 tests/test_models/test_mot/test_deep_sort.py  |  6 +--
 tests/test_models/test_mot/test_oc_sort.py    | 10 ++--
 tests/test_models/test_mot/test_qdtrack.py    | 12 ++---
 tests/test_models/test_mot/test_sort.py       |  6 +--
 .../test_models/test_mot/test_strong_sort.py  |  6 +--
 .../test_necks/test_ct_resnet_neck.py         |  2 +-
 .../test_bbox_heads/test_double_bbox_head.py  |  4 +-
 .../test_roi_heads/test_cascade_roi_head.py   | 27 +++++++----
 .../test_roi_heads/test_dynamic_roi_head.py   |  4 +-
 .../test_roi_heads/test_grid_roi_head.py      | 12 ++---
 .../test_roi_heads/test_htc_roi_head.py       | 40 ++++++++++------
 .../test_mask_heads/test_coarse_mask_head.py  |  4 +-
 .../test_mask_heads/test_fcn_mask_head.py     |  4 +-
 .../test_feature_relay_head.py                |  4 +-
 .../test_fused_semantic_head.py               |  4 +-
 .../test_global_context_head.py               |  4 +-
 .../test_mask_heads/test_grid_head.py         | 10 ++--
 .../test_mask_heads/test_htc_mask_head.py     |  4 +-
 .../test_mask_heads/test_maskiou_head.py      |  6 +--
 .../test_mask_heads/test_scnet_mask_head.py   |  4 +-
 .../test_scnet_semantic_head.py               |  4 +-
 .../test_mask_scoring_roI_head.py             | 36 +++++++++------
 .../test_multi_instance_roi_head.py           |  6 +--
 .../test_roi_heads/test_pisa_roi_head.py      |  4 +-
 .../test_point_rend_roi_head.py               | 24 ++++++----
 .../test_roi_heads/test_scnet_roi_head.py     | 40 ++++++++++------
 .../test_roi_heads/test_sparse_roi_head.py    | 46 +++++++++++--------
 .../test_roi_heads/test_standard_roi_head.py  | 32 +++++++------
 .../test_roi_heads/test_trident_roi_head.py   |  6 +--
 .../test_models/test_vis/test_mask2former.py  | 10 ++--
 .../test_vis/test_masktrack_rcnn.py           | 10 ++--
 .../test_bbox/test_base_boxes.py              |  2 +-
 tests/test_utils/test_benchmark.py            |  5 +-
 55 files changed, 351 insertions(+), 273 deletions(-)

diff --git a/mmdet/apis/inference.py b/mmdet/apis/inference.py
index e7008d1550e..d3d8ae0a7e2 100644
--- a/mmdet/apis/inference.py
+++ b/mmdet/apis/inference.py
@@ -22,6 +22,7 @@
 from ..structures import DetDataSample, SampleList
 from ..utils import get_test_pipeline_cfg
 
+
 def init_detector(
     config: Union[str, Path, Config],
     checkpoint: Optional[str] = None,
@@ -288,9 +289,10 @@ def inference_mot(model: nn.Module, img: np.ndarray, frame_id: int,
     test_pipeline = build_test_pipeline(cfg)
     data = test_pipeline(data)
 
-#   if not next(model.parameters()).is_cuda:
-    if not next(model.parameters()).is_cuda and not (next(model.parameters()).device.type =='musa'):
-    
+    #   if not next(model.parameters()).is_cuda:
+    if not next(model.parameters()).is_cuda and not (next(
+            model.parameters()).device.type == 'musa'):
+
         for m in model.modules():
             assert not isinstance(
                 m, RoIPool
diff --git a/mmdet/models/task_modules/assigners/sim_ota_assigner.py b/mmdet/models/task_modules/assigners/sim_ota_assigner.py
index fc397a3514e..b592c8ae4a5 100644
--- a/mmdet/models/task_modules/assigners/sim_ota_assigner.py
+++ b/mmdet/models/task_modules/assigners/sim_ota_assigner.py
@@ -116,7 +116,7 @@ def assign(self,
         valid_pred_scores = valid_pred_scores.unsqueeze(1).repeat(1, num_gt, 1)
         # disable AMP autocast and calculate BCE with FP32 to avoid overflow
         try:
-            import torch_musa  
+            import torch_musa
             IS_MUSA_AVAILABLE = True
         except Exception:
             IS_MUSA_AVAILABLE = False
diff --git a/mmdet/models/task_modules/samplers/random_sampler.py b/mmdet/models/task_modules/samplers/random_sampler.py
index 61a09dc6690..11fa789cce0 100644
--- a/mmdet/models/task_modules/samplers/random_sampler.py
+++ b/mmdet/models/task_modules/samplers/random_sampler.py
@@ -54,7 +54,7 @@ def random_choice(self, gallery: Union[Tensor, ndarray, list],
         """
         assert len(gallery) >= num
         try:
-            import torch_musa  
+            import torch_musa
             IS_MUSA_AVAILABLE = True
         except Exception:
             IS_MUSA_AVAILABLE = False
diff --git a/mmdet/models/task_modules/samplers/score_hlr_sampler.py b/mmdet/models/task_modules/samplers/score_hlr_sampler.py
index 0d48c2e3840..e36458ba72f 100644
--- a/mmdet/models/task_modules/samplers/score_hlr_sampler.py
+++ b/mmdet/models/task_modules/samplers/score_hlr_sampler.py
@@ -90,7 +90,7 @@ def random_choice(gallery: Union[Tensor, ndarray, list],
         assert len(gallery) >= num
 
         try:
-            import torch_musa  
+            import torch_musa
             IS_MUSA_AVAILABLE = True
         except Exception:
             IS_MUSA_AVAILABLE = False
diff --git a/mmdet/models/task_modules/tracking/aflink.py b/mmdet/models/task_modules/tracking/aflink.py
index d9f1b68e779..88fd9a6f482 100644
--- a/mmdet/models/task_modules/tracking/aflink.py
+++ b/mmdet/models/task_modules/tracking/aflink.py
@@ -159,7 +159,7 @@ def __init__(self,
 
         self.model = AFLinkModel()
         try:
-            import torch_musa  
+            import torch_musa
             IS_MUSA_AVAILABLE = True
         except Exception:
             IS_MUSA_AVAILABLE = False
diff --git a/mmdet/utils/benchmark.py b/mmdet/utils/benchmark.py
index e5efc065c7d..293432094e6 100644
--- a/mmdet/utils/benchmark.py
+++ b/mmdet/utils/benchmark.py
@@ -13,13 +13,13 @@
 from mmengine import MMLogger
 from mmengine.config import Config
 from mmengine.device import get_max_cuda_memory
+from mmengine.device.utils import is_musa_available
 from mmengine.dist import get_world_size
 from mmengine.runner import Runner, load_checkpoint
 from mmengine.utils.dl_utils import set_multi_processing
 from torch.nn.parallel import DistributedDataParallel
 
 from mmdet.registry import DATASETS, MODELS
-from mmengine.device.utils import is_musa_available
 
 try:
     import psutil
@@ -194,7 +194,6 @@ def _init_model(self, checkpoint: str, is_fuse_conv_bn: bool) -> nn.Module:
         if is_fuse_conv_bn:
             model = fuse_conv_bn(model)
 
-
         if is_musa_available():
             model = model.musa()
             if self.distributed:
@@ -203,7 +202,7 @@ def _init_model(self, checkpoint: str, is_fuse_conv_bn: bool) -> nn.Module:
                     device_ids=[torch.musa.current_device()],
                     broadcast_buffers=False,
                     find_unused_parameters=False)
-        else :
+        else:
             model = model.cuda()
             if self.distributed:
                 model = DistributedDataParallel(
@@ -223,7 +222,8 @@ def run_once(self) -> dict:
             for i, data in enumerate(self.data_loader):
 
                 if (i + 1) % self.log_interval == 0:
-                    print_log('==================================', self.logger)
+                    print_log('==================================',
+                              self.logger)
 
                 torch.musa.synchronize()
                 start_time = time.perf_counter()
@@ -255,7 +255,8 @@ def run_once(self) -> dict:
             for i, data in enumerate(self.data_loader):
 
                 if (i + 1) % self.log_interval == 0:
-                    print_log('==================================', self.logger)
+                    print_log('==================================',
+                              self.logger)
 
                 torch.cuda.synchronize()
                 start_time = time.perf_counter()
diff --git a/mmdet/utils/contextmanagers.py b/mmdet/utils/contextmanagers.py
index a56d587faad..2c2c8dd363c 100644
--- a/mmdet/utils/contextmanagers.py
+++ b/mmdet/utils/contextmanagers.py
@@ -8,6 +8,7 @@
 
 import torch
 from mmengine.device.utils import is_musa_available
+
 logger = logging.getLogger(__name__)
 
 DEBUG_COMPLETED_TIME = bool(os.environ.get('DEBUG_COMPLETED_TIME', False))
@@ -28,10 +29,13 @@ async def completed(trace_name='',
         if not streams:
             streams = [stream_before_context_switch]
         else:
-            streams = [s if s else stream_before_context_switch for s in streams]
+            streams = [
+                s if s else stream_before_context_switch for s in streams
+            ]
 
         end_events = [
-            torch.musa.Event(enable_timing=DEBUG_COMPLETED_TIME) for _ in streams
+            torch.musa.Event(enable_timing=DEBUG_COMPLETED_TIME)
+            for _ in streams
         ]
 
         if DEBUG_COMPLETED_TIME:
@@ -62,7 +66,7 @@ async def completed(trace_name='',
 
             are_done = [e.query() for e in end_events]
             logger.debug('%s %s completed: %s streams: %s', trace_name, name,
-                        are_done, streams)
+                         are_done, streams)
             with torch.musa.stream(stream_before_context_switch):
                 while not all(are_done):
                     await asyncio.sleep(sleep_interval)
@@ -91,10 +95,13 @@ async def completed(trace_name='',
         if not streams:
             streams = [stream_before_context_switch]
         else:
-            streams = [s if s else stream_before_context_switch for s in streams]
+            streams = [
+                s if s else stream_before_context_switch for s in streams
+            ]
 
         end_events = [
-            torch.cuda.Event(enable_timing=DEBUG_COMPLETED_TIME) for _ in streams
+            torch.cuda.Event(enable_timing=DEBUG_COMPLETED_TIME)
+            for _ in streams
         ]
 
         if DEBUG_COMPLETED_TIME:
@@ -125,7 +132,7 @@ async def completed(trace_name='',
 
             are_done = [e.query() for e in end_events]
             logger.debug('%s %s completed: %s streams: %s', trace_name, name,
-                        are_done, streams)
+                         are_done, streams)
             with torch.cuda.stream(stream_before_context_switch):
                 while not all(are_done):
                     await asyncio.sleep(sleep_interval)
@@ -173,13 +180,13 @@ async def concurrent(streamqueue: asyncio.Queue,
 
             try:
                 with torch.musa.stream(stream):
-                    logger.debug('%s %s is starting, stream: %s', trace_name, name,
-                                stream)
+                    logger.debug('%s %s is starting, stream: %s', trace_name,
+                                 name, stream)
                     yield
                     current = torch.musa.current_stream()
                     assert current == stream
                     logger.debug('%s %s has finished, stream: %s', trace_name,
-                                name, stream)
+                                 name, stream)
             finally:
                 streamqueue.task_done()
                 streamqueue.put_nowait(stream)
@@ -192,13 +199,13 @@ async def concurrent(streamqueue: asyncio.Queue,
 
             try:
                 with torch.cuda.stream(stream):
-                    logger.debug('%s %s is starting, stream: %s', trace_name, name,
-                                stream)
+                    logger.debug('%s %s is starting, stream: %s', trace_name,
+                                 name, stream)
                     yield
                     current = torch.cuda.current_stream()
                     assert current == stream
                     logger.debug('%s %s has finished, stream: %s', trace_name,
-                                name, stream)
+                                 name, stream)
             finally:
                 streamqueue.task_done()
                 streamqueue.put_nowait(stream)
diff --git a/mmdet/utils/memory.py b/mmdet/utils/memory.py
index af29d4df8b0..9d3eed56e5b 100644
--- a/mmdet/utils/memory.py
+++ b/mmdet/utils/memory.py
@@ -5,8 +5,9 @@
 from functools import wraps
 
 import torch
-from mmengine.logging import MMLogger
 from mmengine.device.utils import is_musa_available
+from mmengine.logging import MMLogger
+
 
 def cast_tensor_type(inputs, src_type=None, dst_type=None):
     """Recursively convert Tensor in inputs from ``src_type`` to ``dst_type``.
diff --git a/mmdet/utils/profiling.py b/mmdet/utils/profiling.py
index 273f241b109..deda08a5738 100644
--- a/mmdet/utils/profiling.py
+++ b/mmdet/utils/profiling.py
@@ -5,6 +5,7 @@
 
 import torch
 from mmengine.device.utils import is_musa_available
+
 if sys.version_info >= (3, 7):
 
     @contextlib.contextmanager
@@ -18,7 +19,8 @@ def profile_time(trace_name,
         Useful as a temporary context manager to find sweet spots of code
         suitable for async implementation.
         """
-        if (not enabled) or not torch.cuda.is_available() and is_musa_available():
+        if (not enabled
+            ) or not torch.cuda.is_available() and is_musa_available():
             yield
             return
         if is_musa_available():
diff --git a/tests/test_apis/test_inference.py b/tests/test_apis/test_inference.py
index a72710eb5a1..547ea274c70 100644
--- a/tests/test_apis/test_inference.py
+++ b/tests/test_apis/test_inference.py
@@ -4,16 +4,18 @@
 import numpy as np
 import pytest
 import torch
+from mmengine.device.utils import is_musa_available
 
 from mmdet.apis import inference_detector, init_detector
 from mmdet.structures import DetDataSample
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
+
 # TODO: Waiting to fix multiple call error bug
 register_all_modules()
 
 
-@pytest.mark.parametrize('config', ['configs/retinanet/retinanet_r18_fpn_1x_coco.py'])
+@pytest.mark.parametrize('config',
+                         ['configs/retinanet/retinanet_r18_fpn_1x_coco.py'])
 @pytest.mark.parametrize('device', [
     'cpu',
     pytest.param(
@@ -42,14 +44,13 @@ def test_init_detector(config, device):
                     type='Pretrained', checkpoint='torchvision://resnet18'))))
 
     # for device in devices:
-        # pytest.set_trace()
-        # if device == 'cuda' and not torch.cuda.is_available():
-        #     pytest.skip('test requires GPU and torch+cuda')
-        # elif device == 'musa' and not is_musa_available():
-        #     print('$$$$$$$$$$$$$$$$$$$$$$$')
-        #     pytest.skip('test requires GPU and torch+musa')
-    model = init_detector(
-        config_file, device=device, cfg_options=cfg_options)
+    # pytest.set_trace()
+    # if device == 'cuda' and not torch.cuda.is_available():
+    #     pytest.skip('test requires GPU and torch+cuda')
+    # elif device == 'musa' and not is_musa_available():
+    #     print('$$$$$$$$$$$$$$$$$$$$$$$')
+    #     pytest.skip('test requires GPU and torch+musa')
+    model = init_detector(config_file, device=device, cfg_options=cfg_options)
 
     # test init_detector with :obj:`Path`
     config_path_object = Path(config_file)
@@ -64,9 +65,9 @@ def test_init_detector(config, device):
 
 @pytest.mark.parametrize('config,devices',
                          [('configs/retinanet/retinanet_r18_fpn_1x_coco.py',
-                           ('cpu', 'cuda','musa'))])
+                           ('cpu', 'cuda', 'musa'))])
 def test_inference_detector(config, devices):
-    assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+    assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
     project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
     project_dir = os.path.join(project_dir, '..')
diff --git a/tests/test_engine/test_hooks/test_mean_teacher_hook.py b/tests/test_engine/test_hooks/test_mean_teacher_hook.py
index 4692234a96e..b57a0b9d2af 100644
--- a/tests/test_engine/test_hooks/test_mean_teacher_hook.py
+++ b/tests/test_engine/test_hooks/test_mean_teacher_hook.py
@@ -5,13 +5,13 @@
 
 import torch
 import torch.nn as nn
+from mmengine.device.utils import is_musa_available
 from mmengine.evaluator import BaseMetric
 from mmengine.model import BaseModel
 from mmengine.optim import OptimWrapper
 from mmengine.registry import MODEL_WRAPPERS
 from mmengine.runner import Runner
 from torch.utils.data import Dataset
-from mmengine.device.utils import is_musa_available
 
 from mmdet.registry import DATASETS
 from mmdet.utils import register_all_modules
@@ -99,7 +99,8 @@ def tearDown(self):
         self.temp_dir.cleanup()
 
     def test_mean_teacher_hook(self):
-        device = 'cuda:0' if torch.cuda.is_available() else ('musa:0' if is_musa_available() else 'cpu')
+        device = 'cuda:0' if torch.cuda.is_available() else (
+            'musa:0' if is_musa_available() else 'cpu')
         model = ToyModel2().to(device)
         runner = Runner(
             model=model,
diff --git a/tests/test_engine/test_runner/test_loops.py b/tests/test_engine/test_runner/test_loops.py
index e17cbfbfbf7..d5f7e778947 100644
--- a/tests/test_engine/test_runner/test_loops.py
+++ b/tests/test_engine/test_runner/test_loops.py
@@ -5,12 +5,12 @@
 
 import torch
 import torch.nn as nn
+from mmengine.device.utils import is_musa_available
 from mmengine.evaluator import Evaluator
 from mmengine.model import BaseModel
 from mmengine.optim import OptimWrapper
 from mmengine.runner import Runner
 from torch.utils.data import Dataset
-from mmengine.device.utils import is_musa_available
 
 from mmdet.registry import DATASETS
 from mmdet.utils import register_all_modules
@@ -85,7 +85,8 @@ def tearDown(self):
         self.temp_dir.cleanup()
 
     def test_teacher_student_val_loop(self):
-        device = 'cuda:0' if torch.cuda.is_available() else ('musa:0' if is_musa_available() else 'cpu')
+        device = 'cuda:0' if torch.cuda.is_available() else (
+            'musa:0' if is_musa_available() else 'cpu')
         model = ToyModel2().to(device)
         evaluator = Mock()
         evaluator.evaluate = Mock(return_value=dict(acc=0.5))
diff --git a/tests/test_models/test_detectors/test_cornernet.py b/tests/test_models/test_detectors/test_cornernet.py
index 0a161ee4abf..9a12b4f24ab 100644
--- a/tests/test_models/test_detectors/test_cornernet.py
+++ b/tests/test_models/test_detectors/test_cornernet.py
@@ -4,11 +4,12 @@
 
 import torch
 from mmengine.config import ConfigDict
+from mmengine.device.utils import is_musa_available
 
 from mmdet.structures import DetDataSample
 from mmdet.testing import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
+
 
 class TestCornerNet(TestCase):
 
diff --git a/tests/test_models/test_detectors/test_glip.py b/tests/test_models/test_detectors/test_glip.py
index 863659040ae..a0581b8050f 100644
--- a/tests/test_models/test_detectors/test_glip.py
+++ b/tests/test_models/test_detectors/test_glip.py
@@ -3,12 +3,12 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.structures import DetDataSample
 from mmdet.testing import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
 
 
 class TestGLIP(TestCase):
@@ -38,7 +38,7 @@ def test_glip_forward_predict_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
diff --git a/tests/test_models/test_detectors/test_kd_single_stage.py b/tests/test_models/test_detectors/test_kd_single_stage.py
index a585788b795..a02eb2a45d0 100644
--- a/tests/test_models/test_detectors/test_kd_single_stage.py
+++ b/tests/test_models/test_detectors/test_kd_single_stage.py
@@ -3,13 +3,13 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet import *  # noqa
 from mmdet.structures import DetDataSample
 from mmdet.testing import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
 
 
 class TestKDSingleStageDetector(TestCase):
@@ -28,15 +28,14 @@ def test_init(self, cfg_file):
         self.assertTrue(detector.neck)
         self.assertTrue(detector.bbox_head)
 
-    @parameterized.expand([('ld/ld_r18-gflv1-r101_fpn_1x_coco.py', ('cpu',
-                                                                    'cuda',
-                                                                    'musa'))])
+    @parameterized.expand([('ld/ld_r18-gflv1-r101_fpn_1x_coco.py',
+                            ('cpu', 'cuda', 'musa'))])
     def test_single_stage_forward_train(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -63,7 +62,7 @@ def test_single_stage_forward_test(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
diff --git a/tests/test_models/test_detectors/test_maskformer.py b/tests/test_models/test_detectors/test_maskformer.py
index 23516553837..e0101d54ac0 100644
--- a/tests/test_models/test_detectors/test_maskformer.py
+++ b/tests/test_models/test_detectors/test_maskformer.py
@@ -2,13 +2,13 @@
 import unittest
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
 from mmdet.structures import DetDataSample
 from mmdet.testing._utils import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
 
 
 class TestMaskFormer(unittest.TestCase):
@@ -55,7 +55,7 @@ def test_init(self):
         assert detector.backbone
         assert detector.panoptic_head
 
-    @parameterized.expand([('cpu', ), ('cuda', ),('musa',)])
+    @parameterized.expand([('cpu', ), ('cuda', ), ('musa', )])
     def test_forward_loss_mode(self, device):
         model_cfg = self._create_model_cfg()
         detector = MODELS.build(model_cfg)
@@ -84,7 +84,7 @@ def test_forward_predict_mode(self, device):
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
         elif device == 'musa' and not is_musa_available():
-            return unittest.skip('test requires GPU and torch+musa')   
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
         packed_inputs = demo_mm_inputs(
             2,
@@ -172,7 +172,6 @@ def test_init(self):
         ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py'),
         ('musa', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
         ('musa', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py')
-
     ])
     def test_forward_loss_mode(self, device, cfg_path):
         print(device, cfg_path)
diff --git a/tests/test_models/test_detectors/test_panoptic_two_stage_segmentor.py b/tests/test_models/test_detectors/test_panoptic_two_stage_segmentor.py
index c08f61b2449..85dc70bd85b 100644
--- a/tests/test_models/test_detectors/test_panoptic_two_stage_segmentor.py
+++ b/tests/test_models/test_detectors/test_panoptic_two_stage_segmentor.py
@@ -2,13 +2,13 @@
 import unittest
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
 from mmdet.structures import DetDataSample
 from mmdet.testing._utils import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
 
 
 class TestTwoStagePanopticSegmentor(unittest.TestCase):
diff --git a/tests/test_models/test_detectors/test_rpn.py b/tests/test_models/test_detectors/test_rpn.py
index 97cabaa28b9..45bdcb4800c 100644
--- a/tests/test_models/test_detectors/test_rpn.py
+++ b/tests/test_models/test_detectors/test_rpn.py
@@ -3,12 +3,12 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.structures import DetDataSample
 from mmdet.testing import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
 
 
 class TestRPN(TestCase):
@@ -35,7 +35,8 @@ def test_init(self, cfg_file):
         detector = MODELS.build(model)
         self.assertEqual(detector.bbox_head.num_classes, 1)
 
-    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda', 'musa'))])
+    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda',
+                                                           'musa'))])
     def test_rpn_forward_loss_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         # backbone convert to ResNet18
@@ -44,7 +45,7 @@ def test_rpn_forward_loss_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -64,7 +65,8 @@ def test_rpn_forward_loss_mode(self, cfg_file, devices):
             losses = detector.forward(**data, mode='loss')
             self.assertIsInstance(losses, dict)
 
-    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda','musa'))])
+    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda',
+                                                           'musa'))])
     def test_rpn_forward_predict_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         # backbone convert to ResNet18
@@ -73,7 +75,7 @@ def test_rpn_forward_predict_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -96,7 +98,8 @@ def test_rpn_forward_predict_mode(self, cfg_file, devices):
                 self.assertEqual(len(batch_results), 2)
                 self.assertIsInstance(batch_results[0], DetDataSample)
 
-    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda','musa'))])
+    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda',
+                                                           'musa'))])
     def test_rpn_forward_tensor_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         # backbone convert to ResNet18
@@ -105,7 +108,7 @@ def test_rpn_forward_tensor_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -118,7 +121,7 @@ def test_rpn_forward_tensor_mode(self, cfg_file, devices):
                 if not is_musa_available():
                     return unittest.skip('test requires GPU and torch+musa')
                 detector = detector.musa()
-                
+
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, False)
             batch_results = detector.forward(**data, mode='tensor')
diff --git a/tests/test_models/test_detectors/test_single_stage.py b/tests/test_models/test_detectors/test_single_stage.py
index 071a7ce977e..26f2488e075 100644
--- a/tests/test_models/test_detectors/test_single_stage.py
+++ b/tests/test_models/test_detectors/test_single_stage.py
@@ -4,13 +4,13 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from parameterized import parameterized
 
 from mmdet.structures import DetDataSample
 from mmdet.testing import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
 
 
 class TestSingleStageDetector(TestCase):
@@ -37,11 +37,12 @@ def test_init(self, cfg_file):
         self.assertTrue(detector.bbox_head)
 
     @parameterized.expand([
-        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda','musa')),
-        ('centernet/centernet_r18_8xb16-crop512-140e_coco.py', ('cpu',
-                                                                'cuda','musa')),
-        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda','musa')),
-        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda','musa')),
+        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda', 'musa')),
+        ('centernet/centernet_r18_8xb16-crop512-140e_coco.py', ('cpu', 'cuda',
+                                                                'musa')),
+        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda', 'musa')),
+        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda',
+                                                            'musa')),
     ])
     def test_single_stage_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -52,7 +53,7 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -73,18 +74,19 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
             self.assertIsInstance(losses, dict)
 
     @parameterized.expand([
-        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda','musa')),
-        ('centernet/centernet_r18_8xb16-crop512-140e_coco.py', ('cpu',
-                                                                'cuda','musa')),
-        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda','musa')),
-        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda','musa')),
+        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda', 'musa')),
+        ('centernet/centernet_r18_8xb16-crop512-140e_coco.py', ('cpu', 'cuda',
+                                                                'musa')),
+        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda', 'musa')),
+        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda',
+                                                            'musa')),
     ])
     def test_single_stage_forward_predict_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -108,19 +110,19 @@ def test_single_stage_forward_predict_mode(self, cfg_file, devices):
                 self.assertIsInstance(batch_results[0], DetDataSample)
 
     @parameterized.expand([
-        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda','musa')),
-        ('centernet/centernet_r18_8xb16-crop512-140e_coco.py', ('cpu',
-                                                                'cuda',
+        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda', 'musa')),
+        ('centernet/centernet_r18_8xb16-crop512-140e_coco.py', ('cpu', 'cuda',
                                                                 'musa')),
-        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda','musa')),
-        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda','musa')),
+        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda', 'musa')),
+        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda',
+                                                            'musa')),
     ])
     def test_single_stage_forward_tensor_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
diff --git a/tests/test_models/test_detectors/test_single_stage_instance_seg.py b/tests/test_models/test_detectors/test_single_stage_instance_seg.py
index d7927f38912..5fc40a03050 100644
--- a/tests/test_models/test_detectors/test_single_stage_instance_seg.py
+++ b/tests/test_models/test_detectors/test_single_stage_instance_seg.py
@@ -3,12 +3,12 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.structures import DetDataSample
 from mmdet.testing import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
 
 
 class TestSingleStageInstanceSegmentor(TestCase):
@@ -34,9 +34,9 @@ def test_init(self, cfg_file):
             self.assertTrue(detector.bbox_head)
 
     @parameterized.expand([
-        ('solo/solo_r50_fpn_1x_coco.py', ('cpu', 'cuda','musa')),
-        ('solov2/solov2-light_r18_fpn_ms-3x_coco.py', ('cpu', 'cuda','musa')),
-        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda','musa')),
+        ('solo/solo_r50_fpn_1x_coco.py', ('cpu', 'cuda', 'musa')),
+        ('solov2/solov2-light_r18_fpn_ms-3x_coco.py', ('cpu', 'cuda', 'musa')),
+        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_single_stage_forward_loss_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
@@ -46,7 +46,7 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -68,8 +68,9 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
             self.assertIsInstance(losses, dict)
 
     @parameterized.expand([
-        ('solo/decoupled-solo-light_r50_fpn_3x_coco.py', ('cpu', 'cuda','musa')),
-        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda','musa')),
+        ('solo/decoupled-solo-light_r50_fpn_3x_coco.py', ('cpu', 'cuda',
+                                                          'musa')),
+        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_single_stage_forward_predict_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
@@ -79,7 +80,7 @@ def test_single_stage_forward_predict_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -104,16 +105,16 @@ def test_single_stage_forward_predict_mode(self, cfg_file, devices):
                 self.assertIsInstance(batch_results[0], DetDataSample)
 
     @parameterized.expand([
-        ('solo/solo_r50_fpn_1x_coco.py', ('cpu', 'cuda','musa')),
-        ('solov2/solov2_r50_fpn_1x_coco.py', ('cpu', 'cuda','musa')),
-        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda','musa')),
+        ('solo/solo_r50_fpn_1x_coco.py', ('cpu', 'cuda', 'musa')),
+        ('solov2/solov2_r50_fpn_1x_coco.py', ('cpu', 'cuda', 'musa')),
+        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_single_stage_forward_tensor_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -126,7 +127,7 @@ def test_single_stage_forward_tensor_mode(self, cfg_file, devices):
                 if not is_musa_available():
                     return unittest.skip('test requires GPU and torch+musa')
                 detector = detector.musa()
-                
+
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, False)
             batch_results = detector.forward(**data, mode='tensor')
diff --git a/tests/test_models/test_detectors/test_two_stage.py b/tests/test_models/test_detectors/test_two_stage.py
index e20cd81489e..4f42b93caf8 100644
--- a/tests/test_models/test_detectors/test_two_stage.py
+++ b/tests/test_models/test_detectors/test_two_stage.py
@@ -3,12 +3,13 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.structures import DetDataSample
 from mmdet.testing import demo_mm_inputs, get_detector_cfg
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
+
 
 class TestTwoStageBBox(TestCase):
 
diff --git a/tests/test_models/test_mot/test_byte_track.py b/tests/test_models/test_mot/test_byte_track.py
index 5d7c0e70796..67477eb4fcf 100644
--- a/tests/test_models/test_mot/test_byte_track.py
+++ b/tests/test_models/test_mot/test_byte_track.py
@@ -4,13 +4,13 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_track_inputs, get_detector_cfg
-from mmengine.device.utils import is_musa_available
 
 
 class TestByteTrack(TestCase):
@@ -34,14 +34,14 @@ def test_bytetrack_init(self, cfg_file):
 
     @parameterized.expand([
         ('bytetrack/bytetrack_yolox_x_8xb4-80e_crowdhuman-mot17halftrain_'
-         'test-mot17halfval.py', ('cpu', 'cuda','musa')),
+         'test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
             f'test_bytetrack_forward_loss_mode-{time.time()}')
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -69,7 +69,7 @@ def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
 
     @parameterized.expand([
         ('bytetrack/bytetrack_yolox_x_8xb4-80e_crowdhuman-mot17halftrain_'
-         'test-mot17halfval.py', ('cpu', 'cuda','musa')),
+         'test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_bytetrack_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -77,7 +77,7 @@ def test_bytetrack_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
diff --git a/tests/test_models/test_mot/test_deep_sort.py b/tests/test_models/test_mot/test_deep_sort.py
index 99883b7e9b8..028942a8235 100644
--- a/tests/test_models/test_mot/test_deep_sort.py
+++ b/tests/test_models/test_mot/test_deep_sort.py
@@ -4,13 +4,13 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_track_inputs, get_detector_cfg
-from mmengine.device.utils import is_musa_available
 
 
 class TestDeepSORT(TestCase):
@@ -32,7 +32,7 @@ def test_init(self, cfg_file):
 
     @parameterized.expand([
         ('deepsort/deepsort_faster-rcnn_r50_fpn_8xb2-4e'
-         '_mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda','musa')),
+         '_mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_deepsort_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -40,7 +40,7 @@ def test_deepsort_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
diff --git a/tests/test_models/test_mot/test_oc_sort.py b/tests/test_models/test_mot/test_oc_sort.py
index 2f6ad8005df..8026d2f4e16 100644
--- a/tests/test_models/test_mot/test_oc_sort.py
+++ b/tests/test_models/test_mot/test_oc_sort.py
@@ -4,13 +4,13 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_track_inputs, get_detector_cfg
-from mmengine.device.utils import is_musa_available
 
 
 class TestByteTrack(TestCase):
@@ -34,14 +34,14 @@ def test_bytetrack_init(self, cfg_file):
 
     @parameterized.expand([
         ('ocsort/ocsort_yolox_x_8xb4-amp-80e_crowdhuman-mot17halftrain_'
-         'test-mot17halfval.py', ('cpu', 'cuda','musa')),
+         'test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
             f'test_bytetrack_forward_loss_mode-{time.time()}')
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -69,7 +69,7 @@ def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
 
     @parameterized.expand([
         ('ocsort/ocsort_yolox_x_8xb4-amp-80e_crowdhuman-mot17halftrain_'
-         'test-mot17halfval.py', ('cpu', 'cuda','musa')),
+         'test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_bytetrack_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -77,7 +77,7 @@ def test_bytetrack_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
diff --git a/tests/test_models/test_mot/test_qdtrack.py b/tests/test_models/test_mot/test_qdtrack.py
index 79987fefd7c..b14a83d6a4c 100644
--- a/tests/test_models/test_mot/test_qdtrack.py
+++ b/tests/test_models/test_mot/test_qdtrack.py
@@ -4,13 +4,13 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_track_inputs, get_detector_cfg
-from mmengine.device.utils import is_musa_available
 
 
 class TestQDTrack(TestCase):
@@ -32,14 +32,14 @@ def test_qdtrack_init(self, cfg_file):
 
     @parameterized.expand([
         ('qdtrack/qdtrack_faster-rcnn_r50_fpn_8xb2-4e_mot17'
-         'halftrain_test-mot17halfval.py', ('cpu', 'cuda','musa')),
+         'halftrain_test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_qdtrack_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
             f'test_qdtrack_forward_loss_mode-{time.time()}')
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -69,7 +69,7 @@ def test_qdtrack_forward_loss_mode(self, cfg_file, devices):
 
     @parameterized.expand([
         ('qdtrack/qdtrack_faster-rcnn_r50_fpn_8xb2-4e_mot17'
-         'halftrain_test-mot17halfval.py', ('cpu', 'cuda','musa')),
+         'halftrain_test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_qdtrack_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -77,7 +77,7 @@ def test_qdtrack_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -91,7 +91,7 @@ def test_qdtrack_forward_predict_mode(self, cfg_file, devices):
                 if not is_musa_available():
                     return unittest.skip('test requires GPU and torch+musa')
                 model = model.musa()
-                
+
             packed_inputs = demo_track_inputs(
                 batch_size=1, num_frames=1, image_shapes=(3, 128, 128))
             out_data = model.data_preprocessor(packed_inputs, False)
diff --git a/tests/test_models/test_mot/test_sort.py b/tests/test_models/test_mot/test_sort.py
index 88edd8f8a34..87a91180a26 100644
--- a/tests/test_models/test_mot/test_sort.py
+++ b/tests/test_models/test_mot/test_sort.py
@@ -4,13 +4,13 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_track_inputs, get_detector_cfg
-from mmengine.device.utils import is_musa_available
 
 
 class TestDeepSORT(TestCase):
@@ -31,7 +31,7 @@ def test_init(self, cfg_file):
 
     @parameterized.expand([
         ('sort/sort_faster-rcnn_r50_fpn_8xb2-4e'
-         '_mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda','musa')),
+         '_mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_deepsort_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -39,7 +39,7 @@ def test_deepsort_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
diff --git a/tests/test_models/test_mot/test_strong_sort.py b/tests/test_models/test_mot/test_strong_sort.py
index ede520fe725..ea1202f0fee 100644
--- a/tests/test_models/test_mot/test_strong_sort.py
+++ b/tests/test_models/test_mot/test_strong_sort.py
@@ -4,13 +4,13 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_track_inputs, get_detector_cfg
-from mmengine.device.utils import is_musa_available
 
 
 class TestDeepSORT(TestCase):
@@ -41,7 +41,7 @@ def test_init(self, cfg_file):
 
     @parameterized.expand([
         ('strongsort/strongsort_yolox_x_8xb4-80e_crowdhuman'
-         '-mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda','musa')),
+         '-mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_strongsort_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -49,7 +49,7 @@ def test_strongsort_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
diff --git a/tests/test_models/test_necks/test_ct_resnet_neck.py b/tests/test_models/test_necks/test_ct_resnet_neck.py
index 12b7a31a237..35cd9d3a140 100644
--- a/tests/test_models/test_necks/test_ct_resnet_neck.py
+++ b/tests/test_models/test_necks/test_ct_resnet_neck.py
@@ -2,9 +2,9 @@
 import unittest
 
 import torch
+from mmengine.device.utils import is_musa_available
 
 from mmdet.models.necks import CTResNetNeck
-from mmengine.device.utils import is_musa_available
 
 
 class TestCTResNetNeck(unittest.TestCase):
diff --git a/tests/test_models/test_roi_heads/test_bbox_heads/test_double_bbox_head.py b/tests/test_models/test_roi_heads/test_bbox_heads/test_double_bbox_head.py
index 5f9139c8281..ca272a00783 100644
--- a/tests/test_models/test_roi_heads/test_bbox_heads/test_double_bbox_head.py
+++ b/tests/test_models/test_roi_heads/test_bbox_heads/test_double_bbox_head.py
@@ -3,15 +3,15 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.models.roi_heads.bbox_heads import DoubleConvFCBBoxHead
-from mmengine.device.utils import is_musa_available
 
 
 class TestDoubleBboxHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward_loss(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
diff --git a/tests/test_models/test_roi_heads/test_cascade_roi_head.py b/tests/test_models/test_roi_heads/test_cascade_roi_head.py
index 425a8eec7f5..6f89996e90c 100644
--- a/tests/test_models/test_roi_heads/test_cascade_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_cascade_roi_head.py
@@ -3,12 +3,13 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.models.roi_heads import StandardRoIHead  # noqa
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
-from mmengine.device.utils import is_musa_available
+
 
 class TestCascadeRoIHead(TestCase):
 
@@ -36,20 +37,22 @@ def test_cascade_roi_head_loss(self, cfg_file):
         }]
         roi_head_cfg = get_roi_head_cfg(cfg_file)
         roi_head = MODELS.build(roi_head_cfg)
-        
+
         if is_musa_available():
             roi_head = roi_head.musa()
             feats = []
-            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 1, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='musa'))
+                               s // (2**(i + 2))).to(device='musa'))
             feats = tuple(feats)
 
             # When truth is non-empty then both cls, box, and mask loss
             # should be nonzero for random inputs
             img_shape_list = [(3, s, s) for _ in img_metas]
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -65,7 +68,8 @@ def test_cascade_roi_head_loss(self, cfg_file):
 
             # When there is no truth, the cls loss should be nonzero but
             # there should be no box and mask loss.
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -83,16 +87,18 @@ def test_cascade_roi_head_loss(self, cfg_file):
         else:
             roi_head = roi_head.cuda()
             feats = []
-            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 1, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='cuda'))
+                               s // (2**(i + 2))).to(device='cuda'))
             feats = tuple(feats)
 
             # When truth is non-empty then both cls, box, and mask loss
             # should be nonzero for random inputs
             img_shape_list = [(3, s, s) for _ in img_metas]
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -108,7 +114,8 @@ def test_cascade_roi_head_loss(self, cfg_file):
 
             # When there is no truth, the cls loss should be nonzero but
             # there should be no box and mask loss.
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
diff --git a/tests/test_models/test_roi_heads/test_dynamic_roi_head.py b/tests/test_models/test_roi_heads/test_dynamic_roi_head.py
index b1105ef32a8..cd6a35ebd56 100644
--- a/tests/test_models/test_roi_heads/test_dynamic_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_dynamic_roi_head.py
@@ -3,12 +3,12 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
 
 
 class TestDynamicRoIHead(TestCase):
@@ -22,7 +22,7 @@ def test_init(self):
         roi_head = MODELS.build(self.roi_head_cfg)
         self.assertTrue(roi_head.with_bbox)
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_dynamic_roi_head_loss(self, device):
         """Tests trident roi head predict."""
         if not torch.cuda.is_available() and device == 'cuda':
diff --git a/tests/test_models/test_roi_heads/test_grid_roi_head.py b/tests/test_models/test_roi_heads/test_grid_roi_head.py
index ab8c4f76f40..0a53bf3aa28 100644
--- a/tests/test_models/test_roi_heads/test_grid_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_grid_roi_head.py
@@ -3,12 +3,12 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
 
 
 class TestGridRoIHead(TestCase):
@@ -22,7 +22,7 @@ def test_init(self):
         roi_head = MODELS.build(self.roi_head_cfg)
         self.assertTrue(roi_head.with_bbox)
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_grid_roi_head_loss(self, device):
         """Tests trident roi head predict."""
         if device == 'cuda':
@@ -75,7 +75,7 @@ def test_grid_roi_head_loss(self, device):
             'loss_grid', out,
             'grid loss should be passed when there are no true boxes')
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_grid_roi_head_predict(self, device):
         """Tests trident roi head predict."""
         if device == 'cuda':
@@ -84,7 +84,7 @@ def test_grid_roi_head_predict(self, device):
         elif device == 'musa':
             if not is_musa_available():
                 return unittest.skip('test requires GPU and torch+musa')
-            
+
         roi_head = MODELS.build(self.roi_head_cfg)
         roi_head = roi_head.to(device=device)
         s = 256
@@ -106,7 +106,7 @@ def test_grid_roi_head_predict(self, device):
             image_shapes=image_shapes, num_proposals=100, device=device)
         roi_head.predict(feats, proposals_list, batch_data_samples)
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_grid_roi_head_forward(self, device):
         """Tests trident roi head forward."""
         if device == 'cuda':
@@ -115,7 +115,7 @@ def test_grid_roi_head_forward(self, device):
         elif device == 'musa':
             if not is_musa_available():
                 return unittest.skip('test requires GPU and torch+musa')
-            
+
         roi_head = MODELS.build(self.roi_head_cfg)
         roi_head = roi_head.to(device=device)
         s = 256
diff --git a/tests/test_models/test_roi_heads/test_htc_roi_head.py b/tests/test_models/test_roi_heads/test_htc_roi_head.py
index 4dc7ad31e47..15db0b8c6b1 100644
--- a/tests/test_models/test_roi_heads/test_htc_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_htc_roi_head.py
@@ -3,12 +3,12 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.models.roi_heads import HybridTaskCascadeRoIHead  # noqa
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
-from mmengine.device.utils import is_musa_available
 
 
 class TestHTCRoIHead(TestCase):
@@ -39,16 +39,18 @@ def test_htc_roi_head_loss(self, cfg_file):
         if is_musa_available():
             roi_head = roi_head.musa()
             feats = []
-            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='musa'))
+                               s // (2**(i + 2))).to(device='musa'))
             feats = tuple(feats)
 
             # When truth is non-empty then both cls, box, and mask loss
             # should be nonzero for random inputs
             img_shape_list = [(3, s, s) for _ in img_metas]
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -65,7 +67,8 @@ def test_htc_roi_head_loss(self, cfg_file):
 
             # When there is no truth, the cls loss should be nonzero but
             # there should be no box and mask loss.
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -84,16 +87,18 @@ def test_htc_roi_head_loss(self, cfg_file):
         else:
             roi_head = roi_head.musa()
             feats = []
-            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='cuda'))
+                               s // (2**(i + 2))).to(device='cuda'))
             feats = tuple(feats)
 
             # When truth is non-empty then both cls, box, and mask loss
             # should be nonzero for random inputs
             img_shape_list = [(3, s, s) for _ in img_metas]
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -110,7 +115,8 @@ def test_htc_roi_head_loss(self, cfg_file):
 
             # When there is no truth, the cls loss should be nonzero but
             # there should be no box and mask loss.
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -142,14 +148,16 @@ def test_htc_roi_head_predict(self, cfg_file):
         if is_musa_available():
             roi_head = roi_head.musa()
             feats = []
-            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='musa'))
+                               s // (2**(i + 2))).to(device='musa'))
             feats = tuple(feats)
 
             img_shape_list = [(3, s, s) for _ in img_metas]
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -163,14 +171,16 @@ def test_htc_roi_head_predict(self, cfg_file):
         else:
             roi_head = roi_head.cuda()
             feats = []
-            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='cuda'))
+                               s // (2**(i + 2))).to(device='cuda'))
             feats = tuple(feats)
 
             img_shape_list = [(3, s, s) for _ in img_metas]
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_coarse_mask_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_coarse_mask_head.py
index 8e9e2721744..886e40c0b2b 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_coarse_mask_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_coarse_mask_head.py
@@ -1,10 +1,10 @@
 import unittest
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.models.roi_heads.mask_heads import CoarseMaskHead
-from mmengine.device.utils import is_musa_available
 
 
 class TestCoarseMaskHead(unittest.TestCase):
@@ -16,7 +16,7 @@ def test_init(self):
         with self.assertRaises(AssertionError):
             CoarseMaskHead(downsample_factor=0.5)
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_fcn_mask_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_fcn_mask_head.py
index c2ae0b81373..91511c37206 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_fcn_mask_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_fcn_mask_head.py
@@ -4,16 +4,16 @@
 
 import torch
 from mmengine.config import ConfigDict
+from mmengine.device.utils import is_musa_available
 from mmengine.structures import InstanceData
 from parameterized import parameterized
 
 from mmdet.models.roi_heads.mask_heads import FCNMaskHead
-from mmengine.device.utils import is_musa_available
 
 
 class TestFCNMaskHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_get_seg_masks(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_feature_relay_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_feature_relay_head.py
index e41fabbc8c2..b3c931b93b9 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_feature_relay_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_feature_relay_head.py
@@ -3,16 +3,16 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 from torch import Tensor
 
 from mmdet.models.roi_heads.mask_heads import FeatureRelayHead
-from mmengine.device.utils import is_musa_available
 
 
 class TestFeatureRelayHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_fused_semantic_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_fused_semantic_head.py
index 951276e83d3..fca49e43e03 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_fused_semantic_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_fused_semantic_head.py
@@ -3,16 +3,16 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 from torch import Tensor
 
 from mmdet.models.roi_heads.mask_heads import FusedSemanticHead
-from mmengine.device.utils import is_musa_available
 
 
 class TestFusedSemanticHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward_loss(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_global_context_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_global_context_head.py
index df7e19530a3..1a407149a3c 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_global_context_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_global_context_head.py
@@ -3,16 +3,16 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 from torch import Tensor
 
 from mmdet.models.roi_heads.mask_heads import GlobalContextHead
-from mmengine.device.utils import is_musa_available
 
 
 class TestGlobalContextHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward_loss(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_grid_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_grid_head.py
index f7f7c9faff9..a2837954fc0 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_grid_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_grid_head.py
@@ -4,6 +4,7 @@
 
 import torch
 from mmengine.config import ConfigDict
+from mmengine.device.utils import is_musa_available
 from mmengine.structures import InstanceData
 from parameterized import parameterized
 
@@ -11,12 +12,11 @@
 from mmdet.models.utils import unpack_gt_instances
 from mmdet.testing import (demo_mm_inputs, demo_mm_proposals,
                            demo_mm_sampling_results)
-from mmengine.device.utils import is_musa_available
 
 
 class TestGridHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_grid_head_loss(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
@@ -24,7 +24,7 @@ def test_grid_head_loss(self, device):
         elif device == 'musa':
             if not is_musa_available():
                 return unittest.skip('test requires GPU and torch+musa')
-        
+
         grid_head = GridHead()
         grid_head.to(device=device)
 
@@ -58,7 +58,7 @@ def test_grid_head_loss(self, device):
 
         grid_head.loss(grid_pred, sample_idx, sampling_results, train_cfg)
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_mask_iou_head_predict_by_feat(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
@@ -66,7 +66,7 @@ def test_mask_iou_head_predict_by_feat(self, device):
         if device == 'musa':
             if not is_musa_available():
                 return unittest.skip('test requires GPU and torch+musa')
-        
+
         grid_head = GridHead()
         grid_head.to(device=device)
 
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_htc_mask_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_htc_mask_head.py
index 1c301bc92e6..652f3f9e80c 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_htc_mask_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_htc_mask_head.py
@@ -3,16 +3,16 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 from torch import Tensor
 
 from mmdet.models.roi_heads.mask_heads import HTCMaskHead
-from mmengine.device.utils import is_musa_available
 
 
 class TestHTCMaskHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_maskiou_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_maskiou_head.py
index 81034df8a68..be3f7b81c34 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_maskiou_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_maskiou_head.py
@@ -4,6 +4,7 @@
 
 import torch
 from mmengine.config import ConfigDict
+from mmengine.device.utils import is_musa_available
 from mmengine.structures import InstanceData
 from parameterized import parameterized
 
@@ -12,12 +13,11 @@
 from mmdet.structures.mask import mask_target
 from mmdet.testing import (demo_mm_inputs, demo_mm_proposals,
                            demo_mm_sampling_results)
-from mmengine.device.utils import is_musa_available
 
 
 class TestMaskIoUHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_mask_iou_head_loss_and_target(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
@@ -70,7 +70,7 @@ def test_mask_iou_head_loss_and_target(self, device):
                                       mask_targets, sampling_results,
                                       batch_gt_instances, train_cfg)
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_mask_iou_head_predict_by_feat(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_mask_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_mask_head.py
index 009f9b9ec69..252e66ea875 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_mask_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_mask_head.py
@@ -3,16 +3,16 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 from torch import Tensor
 
 from mmdet.models.roi_heads.mask_heads import SCNetMaskHead
-from mmengine.device.utils import is_musa_available
 
 
 class TestSCNetMaskHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_semantic_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_semantic_head.py
index c1a2f78b275..492954fb042 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_semantic_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_semantic_head.py
@@ -3,16 +3,16 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 from torch import Tensor
 
 from mmdet.models.roi_heads.mask_heads import SCNetSemanticHead
-from mmengine.device.utils import is_musa_available
 
 
 class TestSCNetSemanticHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward_loss(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
diff --git a/tests/test_models/test_roi_heads/test_mask_scoring_roI_head.py b/tests/test_models/test_roi_heads/test_mask_scoring_roI_head.py
index ba26c887b95..6fb02cca77d 100644
--- a/tests/test_models/test_roi_heads/test_mask_scoring_roI_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_scoring_roI_head.py
@@ -3,11 +3,11 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
 
 
 class TestMaskScoringRoiHead(TestCase):
@@ -36,7 +36,7 @@ def test_mask_scoring_roi_head_loss(self):
             for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='musa'))
+                               s // (2**(i + 2))).to(device='musa'))
 
             image_shapes = [(3, s, s)]
             batch_data_samples = demo_mm_inputs(
@@ -53,9 +53,12 @@ def test_mask_scoring_roi_head_loss(self):
             loss_cls = out['loss_cls']
             loss_bbox = out['loss_bbox']
             loss_mask = out['loss_mask']
-            self.assertGreater(loss_cls.sum(), 0, 'cls loss should be non-zero')
-            self.assertGreater(loss_bbox.sum(), 0, 'box loss should be non-zero')
-            self.assertGreater(loss_mask.sum(), 0, 'mask loss should be non-zero')
+            self.assertGreater(loss_cls.sum(), 0,
+                               'cls loss should be non-zero')
+            self.assertGreater(loss_bbox.sum(), 0,
+                               'box loss should be non-zero')
+            self.assertGreater(loss_mask.sum(), 0,
+                               'mask loss should be non-zero')
 
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
@@ -72,7 +75,7 @@ def test_mask_scoring_roi_head_loss(self):
             empty_bbox_loss = out['loss_bbox']
             empty_mask_loss = out['loss_mask']
             self.assertGreater(empty_cls_loss.sum(), 0,
-                            'cls loss should be non-zero')
+                               'cls loss should be non-zero')
             self.assertEqual(
                 empty_bbox_loss.sum(), 0,
                 'there should be no box loss when there are no true boxes')
@@ -86,7 +89,7 @@ def test_mask_scoring_roi_head_loss(self):
             for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='cuda'))
+                               s // (2**(i + 2))).to(device='cuda'))
 
             image_shapes = [(3, s, s)]
             batch_data_samples = demo_mm_inputs(
@@ -103,9 +106,12 @@ def test_mask_scoring_roi_head_loss(self):
             loss_cls = out['loss_cls']
             loss_bbox = out['loss_bbox']
             loss_mask = out['loss_mask']
-            self.assertGreater(loss_cls.sum(), 0, 'cls loss should be non-zero')
-            self.assertGreater(loss_bbox.sum(), 0, 'box loss should be non-zero')
-            self.assertGreater(loss_mask.sum(), 0, 'mask loss should be non-zero')
+            self.assertGreater(loss_cls.sum(), 0,
+                               'cls loss should be non-zero')
+            self.assertGreater(loss_bbox.sum(), 0,
+                               'box loss should be non-zero')
+            self.assertGreater(loss_mask.sum(), 0,
+                               'mask loss should be non-zero')
 
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
@@ -122,7 +128,7 @@ def test_mask_scoring_roi_head_loss(self):
             empty_bbox_loss = out['loss_bbox']
             empty_mask_loss = out['loss_mask']
             self.assertGreater(empty_cls_loss.sum(), 0,
-                            'cls loss should be non-zero')
+                               'cls loss should be non-zero')
             self.assertEqual(
                 empty_bbox_loss.sum(), 0,
                 'there should be no box loss when there are no true boxes')
@@ -143,7 +149,7 @@ def test_mask_scoring_roi_head_predict(self):
             for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='musa'))
+                               s // (2**(i + 2))).to(device='musa'))
 
             image_shapes = [(3, s, s)]
             batch_data_samples = demo_mm_inputs(
@@ -163,7 +169,7 @@ def test_mask_scoring_roi_head_predict(self):
             for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='cuda'))
+                               s // (2**(i + 2))).to(device='cuda'))
 
             image_shapes = [(3, s, s)]
             batch_data_samples = demo_mm_inputs(
@@ -190,7 +196,7 @@ def test_mask_scoring_roi_head_forward(self):
             for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='cuda'))
+                               s // (2**(i + 2))).to(device='cuda'))
 
             image_shapes = [(3, s, s)]
             proposals_list = demo_mm_proposals(
@@ -203,7 +209,7 @@ def test_mask_scoring_roi_head_forward(self):
             for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='cuda'))
+                               s // (2**(i + 2))).to(device='cuda'))
 
             image_shapes = [(3, s, s)]
             proposals_list = demo_mm_proposals(
diff --git a/tests/test_models/test_roi_heads/test_multi_instance_roi_head.py b/tests/test_models/test_roi_heads/test_multi_instance_roi_head.py
index 00103bf9bcf..2aeddf4f77e 100644
--- a/tests/test_models/test_roi_heads/test_multi_instance_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_multi_instance_roi_head.py
@@ -4,11 +4,11 @@
 
 import torch
 from mmengine.config import Config
+from mmengine.device.utils import is_musa_available
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
 
 register_all_modules()
 
@@ -94,7 +94,7 @@ def test_standard_roi_head_loss(self):
             for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 1, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='musa'))
+                               s // (2**(i + 2))).to(device='musa'))
             feats = tuple(feats)
 
             # When truth is non-empty then emd loss should be nonzero for
@@ -135,7 +135,7 @@ def test_standard_roi_head_loss(self):
             for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 1, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='cuda'))
+                               s // (2**(i + 2))).to(device='cuda'))
             feats = tuple(feats)
 
             # When truth is non-empty then emd loss should be nonzero for
diff --git a/tests/test_models/test_roi_heads/test_pisa_roi_head.py b/tests/test_models/test_roi_heads/test_pisa_roi_head.py
index 5f4686be6b7..fc1d48fe478 100644
--- a/tests/test_models/test_roi_heads/test_pisa_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_pisa_roi_head.py
@@ -3,12 +3,12 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
 
 
 class TestPISARoIHead(TestCase):
@@ -22,7 +22,7 @@ def test_init(self):
         roi_head = MODELS.build(self.roi_head_cfg)
         self.assertTrue(roi_head.with_bbox)
 
-    @parameterized.expand(['cpu', 'cuda','musa'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_pisa_roi_head(self, device):
         """Tests trident roi head predict."""
         if not torch.cuda.is_available() and device == 'cuda':
diff --git a/tests/test_models/test_roi_heads/test_point_rend_roi_head.py b/tests/test_models/test_roi_heads/test_point_rend_roi_head.py
index 1021ee452d1..410edcf58db 100644
--- a/tests/test_models/test_roi_heads/test_point_rend_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_point_rend_roi_head.py
@@ -3,12 +3,12 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.models.roi_heads import PointRendRoIHead  # noqa
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
-from mmengine.device.utils import is_musa_available
 
 
 class TestHTCRoIHead(TestCase):
@@ -40,16 +40,18 @@ def test_point_rend_roi_head_loss(self, cfg_file):
         if is_musa_available():
             roi_head = roi_head.musa()
             feats = []
-            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='musa'))
+                               s // (2**(i + 2))).to(device='musa'))
             feats = tuple(feats)
 
             # When truth is non-empty then both cls, box, and mask loss
             # should be nonzero for random inputs
             img_shape_list = [img_meta['img_shape'] for img_meta in img_metas]
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -64,7 +66,8 @@ def test_point_rend_roi_head_loss(self, cfg_file):
                         value.sum(), 0, msg='loss should be non-zero')
 
             # Positive rois must not be empty
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -77,16 +80,18 @@ def test_point_rend_roi_head_loss(self, cfg_file):
         else:
             roi_head = roi_head.cuda()
             feats = []
-            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='cuda'))
+                               s // (2**(i + 2))).to(device='cuda'))
             feats = tuple(feats)
 
             # When truth is non-empty then both cls, box, and mask loss
             # should be nonzero for random inputs
             img_shape_list = [img_meta['img_shape'] for img_meta in img_metas]
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -101,7 +106,8 @@ def test_point_rend_roi_head_loss(self, cfg_file):
                         value.sum(), 0, msg='loss should be non-zero')
 
             # Positive rois must not be empty
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
diff --git a/tests/test_models/test_roi_heads/test_scnet_roi_head.py b/tests/test_models/test_roi_heads/test_scnet_roi_head.py
index c881dffcd82..b133caaf673 100644
--- a/tests/test_models/test_roi_heads/test_scnet_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_scnet_roi_head.py
@@ -3,12 +3,12 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.models.roi_heads import SCNetRoIHead  # noqa
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
-from mmengine.device.utils import is_musa_available
 
 
 class TestSCNetRoIHead(TestCase):
@@ -41,16 +41,18 @@ def test_scnet_roi_head_loss(self, cfg_file):
         if is_musa_available():
             roi_head = roi_head.cuda()
             feats = []
-            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='cuda'))
+                               s // (2**(i + 2))).to(device='cuda'))
             feats = tuple(feats)
 
             # When truth is non-empty then both cls, box, and mask loss
             # should be nonzero for random inputs
             img_shape_list = [(3, s, s) for _ in img_metas]
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -67,7 +69,8 @@ def test_scnet_roi_head_loss(self, cfg_file):
 
             # When there is no truth, the cls loss should be nonzero but
             # there should be no box and mask loss.
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -87,16 +90,18 @@ def test_scnet_roi_head_loss(self, cfg_file):
         else:
             roi_head = roi_head.cuda()
             feats = []
-            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='cuda'))
+                               s // (2**(i + 2))).to(device='cuda'))
             feats = tuple(feats)
 
             # When truth is non-empty then both cls, box, and mask loss
             # should be nonzero for random inputs
             img_shape_list = [(3, s, s) for _ in img_metas]
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -113,7 +118,8 @@ def test_scnet_roi_head_loss(self, cfg_file):
 
             # When there is no truth, the cls loss should be nonzero but
             # there should be no box and mask loss.
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -145,14 +151,16 @@ def test_scnet_roi_head_predict(self, cfg_file):
         if is_musa_available():
             roi_head = roi_head.musa()
             feats = []
-            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='musa'))
+                               s // (2**(i + 2))).to(device='musa'))
             feats = tuple(feats)
 
             img_shape_list = [(3, s, s) for _ in img_metas]
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -166,14 +174,16 @@ def test_scnet_roi_head_predict(self, cfg_file):
         else:
             roi_head = roi_head.cuda()
             feats = []
-            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 256, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='cuda'))
+                               s // (2**(i + 2))).to(device='cuda'))
             feats = tuple(feats)
 
             img_shape_list = [(3, s, s) for _ in img_metas]
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
diff --git a/tests/test_models/test_roi_heads/test_sparse_roi_head.py b/tests/test_models/test_roi_heads/test_sparse_roi_head.py
index 4335800a9a4..132e44ddcbb 100644
--- a/tests/test_models/test_roi_heads/test_sparse_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_sparse_roi_head.py
@@ -4,12 +4,12 @@
 
 import torch
 import torch.nn as nn
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.models.roi_heads import StandardRoIHead  # noqa
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
-from mmengine.device.utils import is_musa_available
 
 
 class TestCascadeRoIHead(TestCase):
@@ -40,22 +40,25 @@ def test_cascade_roi_head_loss(self, cfg_file):
         if is_musa_available():
             roi_head = roi_head.musa()
             feats = []
-            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 1, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='musa'))
+                               s // (2**(i + 2))).to(device='musa'))
             feats = tuple(feats)
 
             # When truth is non-empty then both cls, box, and mask loss
             # should be nonzero for random inputs
             img_shape_list = [(3, s, s) for _ in img_metas]
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
             # add import elements into proposal
-            init_proposal_features = nn.Embedding(100, 256).musa().weight.clone()
+            init_proposal_features = nn.Embedding(100,
+                                                  256).musa().weight.clone()
             for proposal in proposal_list:
                 proposal.features = init_proposal_features
                 proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
-                                                        s]]).repeat(100, 1)
+                                                           s]]).repeat(100, 1)
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -71,13 +74,15 @@ def test_cascade_roi_head_loss(self, cfg_file):
 
             # When there is no truth, the cls loss should be nonzero but
             # there should be no box and mask loss.
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='musa')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
             # add import elements into proposal
-            init_proposal_features = nn.Embedding(100, 256).musa().weight.clone()
+            init_proposal_features = nn.Embedding(100,
+                                                  256).musa().weight.clone()
             for proposal in proposal_list:
                 proposal.features = init_proposal_features
                 proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
-                                                        s]]).repeat(100, 1)
+                                                           s]]).repeat(100, 1)
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -92,26 +97,29 @@ def test_cascade_roi_head_loss(self, cfg_file):
                         value.sum(), 0, msg='loss should be non-zero')
                 elif 'loss_bbox' in name or 'loss_mask' in name:
                     self.assertEqual(value.sum(), 0)
- 
+
         else:
             roi_head = roi_head.cuda()
             feats = []
-            for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 1, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='cuda'))
+                               s // (2**(i + 2))).to(device='cuda'))
             feats = tuple(feats)
 
             # When truth is non-empty then both cls, box, and mask loss
             # should be nonzero for random inputs
             img_shape_list = [(3, s, s) for _ in img_metas]
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
             # add import elements into proposal
-            init_proposal_features = nn.Embedding(100, 256).cuda().weight.clone()
+            init_proposal_features = nn.Embedding(100,
+                                                  256).cuda().weight.clone()
             for proposal in proposal_list:
                 proposal.features = init_proposal_features
                 proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
-                                                        s]]).repeat(100, 1)
+                                                           s]]).repeat(100, 1)
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
@@ -127,13 +135,15 @@ def test_cascade_roi_head_loss(self, cfg_file):
 
             # When there is no truth, the cls loss should be nonzero but
             # there should be no box and mask loss.
-            proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
             # add import elements into proposal
-            init_proposal_features = nn.Embedding(100, 256).cuda().weight.clone()
+            init_proposal_features = nn.Embedding(100,
+                                                  256).cuda().weight.clone()
             for proposal in proposal_list:
                 proposal.features = init_proposal_features
                 proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
-                                                        s]]).repeat(100, 1)
+                                                           s]]).repeat(100, 1)
             batch_data_samples = demo_mm_inputs(
                 batch_size=1,
                 image_shapes=[(3, s, s)],
diff --git a/tests/test_models/test_roi_heads/test_standard_roi_head.py b/tests/test_models/test_roi_heads/test_standard_roi_head.py
index 7661a7c16f5..6bd7db1f719 100644
--- a/tests/test_models/test_roi_heads/test_standard_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_standard_roi_head.py
@@ -4,12 +4,12 @@
 
 import torch
 from mmengine.config import Config
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
 
 register_all_modules()
 
@@ -155,11 +155,11 @@ def test_standard_roi_head_loss(self, with_shared_head):
                 if not with_shared_head:
                     feats.append(
                         torch.rand(1, 1, s // (2**(i + 2)),
-                                s // (2**(i + 2))).to(device='musa'))
+                                   s // (2**(i + 2))).to(device='musa'))
                 else:
                     feats.append(
                         torch.rand(1, 1024, s // (2**(i + 2)),
-                                s // (2**(i + 2))).to(device='musa'))
+                                   s // (2**(i + 2))).to(device='musa'))
             feats = tuple(feats)
 
             # When truth is non-empty then both cls, box, and mask loss
@@ -179,9 +179,12 @@ def test_standard_roi_head_loss(self, with_shared_head):
             loss_cls = out['loss_cls']
             loss_bbox = out['loss_bbox']
             loss_mask = out['loss_mask']
-            self.assertGreater(loss_cls.sum(), 0, 'cls loss should be non-zero')
-            self.assertGreater(loss_bbox.sum(), 0, 'box loss should be non-zero')
-            self.assertGreater(loss_mask.sum(), 0, 'mask loss should be non-zero')
+            self.assertGreater(loss_cls.sum(), 0,
+                               'cls loss should be non-zero')
+            self.assertGreater(loss_bbox.sum(), 0,
+                               'box loss should be non-zero')
+            self.assertGreater(loss_mask.sum(), 0,
+                               'mask loss should be non-zero')
 
             # When there is no truth, the cls loss should be nonzero but
             # there should be no box and mask loss.
@@ -199,7 +202,7 @@ def test_standard_roi_head_loss(self, with_shared_head):
             empty_bbox_loss = out['loss_bbox']
             empty_mask_loss = out['loss_mask']
             self.assertGreater(empty_cls_loss.sum(), 0,
-                            'cls loss should be non-zero')
+                               'cls loss should be non-zero')
             self.assertEqual(
                 empty_bbox_loss.sum(), 0,
                 'there should be no box loss when there are no true boxes')
@@ -214,11 +217,11 @@ def test_standard_roi_head_loss(self, with_shared_head):
                 if not with_shared_head:
                     feats.append(
                         torch.rand(1, 1, s // (2**(i + 2)),
-                                s // (2**(i + 2))).to(device='cuda'))
+                                   s // (2**(i + 2))).to(device='cuda'))
                 else:
                     feats.append(
                         torch.rand(1, 1024, s // (2**(i + 2)),
-                                s // (2**(i + 2))).to(device='cuda'))
+                                   s // (2**(i + 2))).to(device='cuda'))
             feats = tuple(feats)
 
             # When truth is non-empty then both cls, box, and mask loss
@@ -238,9 +241,12 @@ def test_standard_roi_head_loss(self, with_shared_head):
             loss_cls = out['loss_cls']
             loss_bbox = out['loss_bbox']
             loss_mask = out['loss_mask']
-            self.assertGreater(loss_cls.sum(), 0, 'cls loss should be non-zero')
-            self.assertGreater(loss_bbox.sum(), 0, 'box loss should be non-zero')
-            self.assertGreater(loss_mask.sum(), 0, 'mask loss should be non-zero')
+            self.assertGreater(loss_cls.sum(), 0,
+                               'cls loss should be non-zero')
+            self.assertGreater(loss_bbox.sum(), 0,
+                               'box loss should be non-zero')
+            self.assertGreater(loss_mask.sum(), 0,
+                               'mask loss should be non-zero')
 
             # When there is no truth, the cls loss should be nonzero but
             # there should be no box and mask loss.
@@ -258,7 +264,7 @@ def test_standard_roi_head_loss(self, with_shared_head):
             empty_bbox_loss = out['loss_bbox']
             empty_mask_loss = out['loss_mask']
             self.assertGreater(empty_cls_loss.sum(), 0,
-                            'cls loss should be non-zero')
+                               'cls loss should be non-zero')
             self.assertEqual(
                 empty_bbox_loss.sum(), 0,
                 'there should be no box loss when there are no true boxes')
diff --git a/tests/test_models/test_roi_heads/test_trident_roi_head.py b/tests/test_models/test_roi_heads/test_trident_roi_head.py
index c749fb31fcd..2759ff98476 100644
--- a/tests/test_models/test_roi_heads/test_trident_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_trident_roi_head.py
@@ -4,11 +4,11 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
 from mmdet.utils import register_all_modules
-from mmengine.device.utils import is_musa_available
 
 
 class TestTridentRoIHead(TestCase):
@@ -39,7 +39,7 @@ def test_trident_roi_head_predict(self):
             for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 1024, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='musa'))
+                               s // (2**(i + 2))).to(device='musa'))
 
             image_shapes = [(3, s, s)]
             batch_data_samples = demo_mm_inputs(
@@ -66,7 +66,7 @@ def test_trident_roi_head_predict(self):
             for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
                 feats.append(
                     torch.rand(1, 1024, s // (2**(i + 2)),
-                            s // (2**(i + 2))).to(device='cuda'))
+                               s // (2**(i + 2))).to(device='cuda'))
 
             image_shapes = [(3, s, s)]
             batch_data_samples = demo_mm_inputs(
diff --git a/tests/test_models/test_vis/test_mask2former.py b/tests/test_models/test_vis/test_mask2former.py
index fe900e36232..adc2160ccb4 100644
--- a/tests/test_models/test_vis/test_mask2former.py
+++ b/tests/test_models/test_vis/test_mask2former.py
@@ -4,13 +4,13 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_track_inputs, get_detector_cfg
-from mmengine.device.utils import is_musa_available
 
 
 class TestMask2Former(TestCase):
@@ -31,14 +31,14 @@ def test_mask2former_init(self, cfg_file):
 
     @parameterized.expand([
         ('mask2former_vis/mask2former_r50_8xb2-8e_youtubevis2021.py',
-         ('cpu', 'cuda','musa')),
+         ('cpu', 'cuda', 'musa')),
     ])
     def test_mask2former_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
             f'test_mask2former_forward_loss_mode-{time.time()}')
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -68,7 +68,7 @@ def test_mask2former_forward_loss_mode(self, cfg_file, devices):
 
     @parameterized.expand([
         ('mask2former_vis/mask2former_r50_8xb2-8e_youtubevis2021.py',
-         ('cpu', 'cuda','musa')),
+         ('cpu', 'cuda', 'musa')),
     ])
     def test_mask2former_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -76,7 +76,7 @@ def test_mask2former_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
diff --git a/tests/test_models/test_vis/test_masktrack_rcnn.py b/tests/test_models/test_vis/test_masktrack_rcnn.py
index 38e02835a6c..67fd95da48a 100644
--- a/tests/test_models/test_vis/test_masktrack_rcnn.py
+++ b/tests/test_models/test_vis/test_masktrack_rcnn.py
@@ -4,13 +4,13 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_track_inputs, get_detector_cfg
-from mmengine.device.utils import is_musa_available
 
 
 class TestMaskTrackRCNN(TestCase):
@@ -33,14 +33,14 @@ def test_mask_track_rcnn_init(self, cfg_file):
     @parameterized.expand([
         (
             'masktrack_rcnn/masktrack-rcnn_mask-rcnn_r50_fpn_8xb1-12e_youtubevis2019.py',  # noqa: E501
-            ('cpu', 'cuda','musa')),
+            ('cpu', 'cuda', 'musa')),
     ])
     def test_mask_track_rcnn_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
             f'test_mask_track_rcnn_forward_loss_mode-{time.time()}')
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -70,7 +70,7 @@ def test_mask_track_rcnn_forward_loss_mode(self, cfg_file, devices):
     @parameterized.expand([
         (
             'masktrack_rcnn/masktrack-rcnn_mask-rcnn_r50_fpn_8xb1-12e_youtubevis2019.py',  # noqa: E501
-            ('cpu', 'cuda','musa')),
+            ('cpu', 'cuda', 'musa')),
     ])
     def test_mask_track_rcnn_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -78,7 +78,7 @@ def test_mask_track_rcnn_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda','musa'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
diff --git a/tests/test_structures/test_bbox/test_base_boxes.py b/tests/test_structures/test_bbox/test_base_boxes.py
index 8ce025cc7e2..79127e00c99 100644
--- a/tests/test_structures/test_bbox/test_base_boxes.py
+++ b/tests/test_structures/test_bbox/test_base_boxes.py
@@ -2,10 +2,10 @@
 
 import numpy as np
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.testing import assert_allclose
 
 from .utils import ToyBaseBoxes
-from mmengine.device.utils import is_musa_available
 
 
 class TestBaseBoxes(TestCase):
diff --git a/tests/test_utils/test_benchmark.py b/tests/test_utils/test_benchmark.py
index 0a305f521e5..6c52322203b 100644
--- a/tests/test_utils/test_benchmark.py
+++ b/tests/test_utils/test_benchmark.py
@@ -6,6 +6,7 @@
 import torch
 from mmengine import Config, MMLogger
 from mmengine.dataset import Compose
+from mmengine.device.utils import is_musa_available
 from mmengine.model import BaseModel
 from torch.utils.data import Dataset
 
@@ -13,7 +14,6 @@
 from mmdet.utils import register_all_modules
 from mmdet.utils.benchmark import (DataLoaderBenchmark, DatasetBenchmark,
                                    InferenceBenchmark)
-from mmengine.device.utils import is_musa_available
 
 
 @MODELS.register_module()
@@ -84,7 +84,8 @@ def setUp(self) -> None:
         self.max_iter = 10
         self.log_interval = 5
 
-    @unittest.skipIf(not torch.cuda.is_available() and not torch.cuda.is_available(),
+    @unittest.skipIf(not torch.cuda.is_available()
+                     and not torch.cuda.is_available(),
                      'test requires GPU and torch+cuda+musa')
     def test_init_and_run(self):
         checkpoint_path = os.path.join(tempfile.gettempdir(), 'checkpoint.pth')

From e5c4b4fbd6483861c71026f589b55de8cc40f48c Mon Sep 17 00:00:00 2001
From: "jianlong.qu-ext" <jianlong.qu-EXT@mthreads.com>
Date: Wed, 19 Feb 2025 14:37:45 +0800
Subject: [PATCH 3/3] delete comments

---
 mmdet/__init__.py                                       | 2 +-
 mmdet/apis/inference.py                                 | 1 -
 mmdet/models/layers/se_layer.py                         | 1 +
 mmdet/models/task_modules/assigners/iou2d_calculator.py | 3 ++-
 mmdet/models/task_modules/tracking/aflink.py            | 5 -----
 mmdet/utils/contextmanagers.py                          | 8 ++++----
 tests/test_apis/test_inference.py                       | 8 --------
 tests/test_utils/test_benchmark.py                      | 1 -
 8 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/mmdet/__init__.py b/mmdet/__init__.py
index 49589e46b26..3ac884ac8b4 100644
--- a/mmdet/__init__.py
+++ b/mmdet/__init__.py
@@ -6,7 +6,7 @@
 from .version import __version__, version_info
 
 mmcv_minimum_version = '2.0.0rc4'
-mmcv_maximum_version = '2.2.1'
+mmcv_maximum_version = '2.2.0'
 mmcv_version = digit_version(mmcv.__version__)
 
 mmengine_minimum_version = '0.7.1'
diff --git a/mmdet/apis/inference.py b/mmdet/apis/inference.py
index d3d8ae0a7e2..df7e051b6c3 100644
--- a/mmdet/apis/inference.py
+++ b/mmdet/apis/inference.py
@@ -289,7 +289,6 @@ def inference_mot(model: nn.Module, img: np.ndarray, frame_id: int,
     test_pipeline = build_test_pipeline(cfg)
     data = test_pipeline(data)
 
-    #   if not next(model.parameters()).is_cuda:
     if not next(model.parameters()).is_cuda and not (next(
             model.parameters()).device.type == 'musa'):
 
diff --git a/mmdet/models/layers/se_layer.py b/mmdet/models/layers/se_layer.py
index 5b7a280809a..8bb14b86ce4 100644
--- a/mmdet/models/layers/se_layer.py
+++ b/mmdet/models/layers/se_layer.py
@@ -154,6 +154,7 @@ def __init__(self, channels: int, init_cfg: OptMultiConfig = None) -> None:
             self.act = nn.Hardsigmoid(inplace=True)
 
     def forward(self, x: Tensor) -> Tensor:
+        """Forward function for ChannelAttention."""
         if x.device.type == 'musa':
             with torch_musa.core.amp.autocast(enabled=False):
                 out = self.global_avgpool(x)
diff --git a/mmdet/models/task_modules/assigners/iou2d_calculator.py b/mmdet/models/task_modules/assigners/iou2d_calculator.py
index facb63a8acb..e4fc586ed8e 100644
--- a/mmdet/models/task_modules/assigners/iou2d_calculator.py
+++ b/mmdet/models/task_modules/assigners/iou2d_calculator.py
@@ -54,7 +54,8 @@ def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):
             bboxes1 = cast_tensor_type(bboxes1, self.scale, self.dtype)
             bboxes2 = cast_tensor_type(bboxes2, self.scale, self.dtype)
             overlaps = bbox_overlaps(bboxes1, bboxes2, mode, is_aligned)
-            if not overlaps.is_cuda and overlaps.device.type != 'musa' and overlaps.dtype == torch.float16:
+            if not overlaps.is_cuda and overlaps.device.type != 'musa'
+                and overlaps.dtype == torch.float16:
                 # resume cpu float32
                 overlaps = overlaps.float()
             return overlaps
diff --git a/mmdet/models/task_modules/tracking/aflink.py b/mmdet/models/task_modules/tracking/aflink.py
index 88fd9a6f482..0f83d26ca11 100644
--- a/mmdet/models/task_modules/tracking/aflink.py
+++ b/mmdet/models/task_modules/tracking/aflink.py
@@ -158,11 +158,6 @@ def __init__(self,
         self.confidence_threshold = confidence_threshold
 
         self.model = AFLinkModel()
-        try:
-            import torch_musa
-            IS_MUSA_AVAILABLE = True
-        except Exception:
-            IS_MUSA_AVAILABLE = False
         if checkpoint:
             load_checkpoint(self.model, checkpoint)
         if torch.cuda.is_available():
diff --git a/mmdet/utils/contextmanagers.py b/mmdet/utils/contextmanagers.py
index 2c2c8dd363c..3d01b7602be 100644
--- a/mmdet/utils/contextmanagers.py
+++ b/mmdet/utils/contextmanagers.py
@@ -59,8 +59,8 @@ async def completed(trace_name='',
 
             grad_enabled_after = torch.is_grad_enabled()
 
-            # observed change of torch.is_grad_enabled() during concurrent run of
-            # async_test_bboxes code
+            # observed change of torch.is_grad_enabled() during concurrent
+            # run of async_test_bboxes code
             assert (grad_enabled_before == grad_enabled_after
                     ), 'Unexpected is_grad_enabled() value change'
 
@@ -125,8 +125,8 @@ async def completed(trace_name='',
 
             grad_enabled_after = torch.is_grad_enabled()
 
-            # observed change of torch.is_grad_enabled() during concurrent run of
-            # async_test_bboxes code
+            # observed change of torch.is_grad_enabled() during concurrent
+            # run of async_test_bboxes code
             assert (grad_enabled_before == grad_enabled_after
                     ), 'Unexpected is_grad_enabled() value change'
 
diff --git a/tests/test_apis/test_inference.py b/tests/test_apis/test_inference.py
index 547ea274c70..5d977928cf9 100644
--- a/tests/test_apis/test_inference.py
+++ b/tests/test_apis/test_inference.py
@@ -28,7 +28,6 @@
             not is_musa_available(), reason='requires musa support')),
 ])
 def test_init_detector(config, device):
-    # assert all([device in ['cpu', 'cuda','musa'] for device in devices])
 
     project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
     project_dir = os.path.join(project_dir, '..')
@@ -43,13 +42,6 @@ def test_init_detector(config, device):
                 init_cfg=dict(
                     type='Pretrained', checkpoint='torchvision://resnet18'))))
 
-    # for device in devices:
-    # pytest.set_trace()
-    # if device == 'cuda' and not torch.cuda.is_available():
-    #     pytest.skip('test requires GPU and torch+cuda')
-    # elif device == 'musa' and not is_musa_available():
-    #     print('$$$$$$$$$$$$$$$$$$$$$$$')
-    #     pytest.skip('test requires GPU and torch+musa')
     model = init_detector(config_file, device=device, cfg_options=cfg_options)
 
     # test init_detector with :obj:`Path`
diff --git a/tests/test_utils/test_benchmark.py b/tests/test_utils/test_benchmark.py
index 6c52322203b..08611223652 100644
--- a/tests/test_utils/test_benchmark.py
+++ b/tests/test_utils/test_benchmark.py
@@ -6,7 +6,6 @@
 import torch
 from mmengine import Config, MMLogger
 from mmengine.dataset import Compose
-from mmengine.device.utils import is_musa_available
 from mmengine.model import BaseModel
 from torch.utils.data import Dataset