diff --git a/mmdet/apis/inference.py b/mmdet/apis/inference.py
index 7e6f914ecab..df7e051b6c3 100644
--- a/mmdet/apis/inference.py
+++ b/mmdet/apis/inference.py
@@ -289,7 +289,9 @@ def inference_mot(model: nn.Module, img: np.ndarray, frame_id: int,
     test_pipeline = build_test_pipeline(cfg)
     data = test_pipeline(data)
 
-    if not next(model.parameters()).is_cuda:
+    if not next(model.parameters()).is_cuda and not (next(
+            model.parameters()).device.type == 'musa'):
+
         for m in model.modules():
             assert not isinstance(
                 m, RoIPool
diff --git a/mmdet/models/backbones/csp_darknet.py b/mmdet/models/backbones/csp_darknet.py
index a890b486f25..03e43124d45 100644
--- a/mmdet/models/backbones/csp_darknet.py
+++ b/mmdet/models/backbones/csp_darknet.py
@@ -115,9 +115,14 @@ def __init__(self,
 
     def forward(self, x):
         x = self.conv1(x)
-        with torch.cuda.amp.autocast(enabled=False):
-            x = torch.cat(
-                [x] + [pooling(x) for pooling in self.poolings], dim=1)
+        if x.device.type == 'musa':
+            with torch_musa.core.amp.autocast(enabled=False):
+                x = torch.cat(
+                    [x] + [pooling(x) for pooling in self.poolings], dim=1)
+        else:
+            with torch.cuda.amp.autocast(enabled=False):
+                x = torch.cat(
+                    [x] + [pooling(x) for pooling in self.poolings], dim=1)
         x = self.conv2(x)
         return x
 
diff --git a/mmdet/models/layers/se_layer.py b/mmdet/models/layers/se_layer.py
index 5598dabaf6f..8bb14b86ce4 100644
--- a/mmdet/models/layers/se_layer.py
+++ b/mmdet/models/layers/se_layer.py
@@ -155,8 +155,13 @@ def __init__(self, channels: int, init_cfg: OptMultiConfig = None) -> None:
 
     def forward(self, x: Tensor) -> Tensor:
         """Forward function for ChannelAttention."""
-        with torch.cuda.amp.autocast(enabled=False):
-            out = self.global_avgpool(x)
+        if x.device.type == 'musa':
+            with torch_musa.core.amp.autocast(enabled=False):
+                out = self.global_avgpool(x)
+        else:
+            """Forward function for ChannelAttention."""
+            with torch.cuda.amp.autocast(enabled=False):
+                out = self.global_avgpool(x)
         out = self.fc(out)
         out = self.act(out)
         return x * out
diff --git a/mmdet/models/losses/focal_loss.py b/mmdet/models/losses/focal_loss.py
index 15bef293a59..60f4e9bab10 100644
--- a/mmdet/models/losses/focal_loss.py
+++ b/mmdet/models/losses/focal_loss.py
@@ -234,6 +234,8 @@ def forward(self,
                     calculate_loss_func = py_sigmoid_focal_loss
                 elif torch.cuda.is_available() and pred.is_cuda:
                     calculate_loss_func = sigmoid_focal_loss
+                elif torch.musa.is_available() and pred.device.type == 'musa':
+                    calculate_loss_func = sigmoid_focal_loss
                 else:
                     num_classes = pred.size(1)
                     target = F.one_hot(target, num_classes=num_classes + 1)
diff --git a/mmdet/models/task_modules/assigners/iou2d_calculator.py b/mmdet/models/task_modules/assigners/iou2d_calculator.py
index b6daa94feb4..e4fc586ed8e 100644
--- a/mmdet/models/task_modules/assigners/iou2d_calculator.py
+++ b/mmdet/models/task_modules/assigners/iou2d_calculator.py
@@ -54,7 +54,8 @@ def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):
             bboxes1 = cast_tensor_type(bboxes1, self.scale, self.dtype)
             bboxes2 = cast_tensor_type(bboxes2, self.scale, self.dtype)
             overlaps = bbox_overlaps(bboxes1, bboxes2, mode, is_aligned)
-            if not overlaps.is_cuda and overlaps.dtype == torch.float16:
+            if not overlaps.is_cuda and overlaps.device.type != 'musa'
+                and overlaps.dtype == torch.float16:
                 # resume cpu float32
                 overlaps = overlaps.float()
             return overlaps
diff --git a/mmdet/models/task_modules/assigners/sim_ota_assigner.py b/mmdet/models/task_modules/assigners/sim_ota_assigner.py
index d54a8b91d13..b592c8ae4a5 100644
--- a/mmdet/models/task_modules/assigners/sim_ota_assigner.py
+++ b/mmdet/models/task_modules/assigners/sim_ota_assigner.py
@@ -115,13 +115,28 @@ def assign(self,
 
         valid_pred_scores = valid_pred_scores.unsqueeze(1).repeat(1, num_gt, 1)
         # disable AMP autocast and calculate BCE with FP32 to avoid overflow
-        with torch.cuda.amp.autocast(enabled=False):
-            cls_cost = (
-                F.binary_cross_entropy(
-                    valid_pred_scores.to(dtype=torch.float32),
-                    gt_onehot_label,
-                    reduction='none',
-                ).sum(-1).to(dtype=valid_pred_scores.dtype))
+        try:
+            import torch_musa
+            IS_MUSA_AVAILABLE = True
+        except Exception:
+            IS_MUSA_AVAILABLE = False
+
+        if IS_MUSA_AVAILABLE:
+            with torch_musa.core.amp.autocast(enabled=False):
+                cls_cost = (
+                    F.binary_cross_entropy(
+                        valid_pred_scores.to(dtype=torch.float32),
+                        gt_onehot_label,
+                        reduction='none',
+                    ).sum(-1).to(dtype=valid_pred_scores.dtype))
+        else:
+            with torch.cuda.amp.autocast(enabled=False):
+                cls_cost = (
+                    F.binary_cross_entropy(
+                        valid_pred_scores.to(dtype=torch.float32),
+                        gt_onehot_label,
+                        reduction='none',
+                    ).sum(-1).to(dtype=valid_pred_scores.dtype))
 
         cost_matrix = (
             cls_cost * self.cls_weight + iou_cost * self.iou_weight +
diff --git a/mmdet/models/task_modules/samplers/random_sampler.py b/mmdet/models/task_modules/samplers/random_sampler.py
index fa03665fc36..11fa789cce0 100644
--- a/mmdet/models/task_modules/samplers/random_sampler.py
+++ b/mmdet/models/task_modules/samplers/random_sampler.py
@@ -53,11 +53,17 @@ def random_choice(self, gallery: Union[Tensor, ndarray, list],
             Tensor or ndarray: sampled indices.
         """
         assert len(gallery) >= num
-
+        try:
+            import torch_musa
+            IS_MUSA_AVAILABLE = True
+        except Exception:
+            IS_MUSA_AVAILABLE = False
         is_tensor = isinstance(gallery, torch.Tensor)
         if not is_tensor:
             if torch.cuda.is_available():
                 device = torch.cuda.current_device()
+            elif IS_MUSA_AVAILABLE:
+                device = torch.musa.current_device()
             else:
                 device = 'cpu'
             gallery = torch.tensor(gallery, dtype=torch.long, device=device)
diff --git a/mmdet/models/task_modules/samplers/score_hlr_sampler.py b/mmdet/models/task_modules/samplers/score_hlr_sampler.py
index 0227585b923..e36458ba72f 100644
--- a/mmdet/models/task_modules/samplers/score_hlr_sampler.py
+++ b/mmdet/models/task_modules/samplers/score_hlr_sampler.py
@@ -89,10 +89,17 @@ def random_choice(gallery: Union[Tensor, ndarray, list],
         """
         assert len(gallery) >= num
 
+        try:
+            import torch_musa
+            IS_MUSA_AVAILABLE = True
+        except Exception:
+            IS_MUSA_AVAILABLE = False
         is_tensor = isinstance(gallery, torch.Tensor)
         if not is_tensor:
             if torch.cuda.is_available():
                 device = torch.cuda.current_device()
+            elif IS_MUSA_AVAILABLE:
+                device = torch.musa.current_device()
             else:
                 device = 'cpu'
             gallery = torch.tensor(gallery, dtype=torch.long, device=device)
diff --git a/mmdet/models/task_modules/tracking/aflink.py b/mmdet/models/task_modules/tracking/aflink.py
index 52461067e37..0f83d26ca11 100644
--- a/mmdet/models/task_modules/tracking/aflink.py
+++ b/mmdet/models/task_modules/tracking/aflink.py
@@ -162,6 +162,8 @@ def __init__(self,
             load_checkpoint(self.model, checkpoint)
         if torch.cuda.is_available():
             self.model.cuda()
+        elif torch.musa.is_available():
+            self.model.musa()
         self.model.eval()
 
         self.device = next(self.model.parameters()).device
diff --git a/mmdet/structures/bbox/bbox_overlaps.py b/mmdet/structures/bbox/bbox_overlaps.py
index 8e3435d28b3..bf2cbb30c3c 100644
--- a/mmdet/structures/bbox/bbox_overlaps.py
+++ b/mmdet/structures/bbox/bbox_overlaps.py
@@ -3,7 +3,7 @@
 
 
 def fp16_clamp(x, min=None, max=None):
-    if not x.is_cuda and x.dtype == torch.float16:
+    if not x.is_cuda and x.device.type != 'musa' and x.dtype == torch.float16:
         # clamp for cpu float16, tensor fp16 has no clamp implementation
         return x.float().clamp(min, max).half()
 
diff --git a/mmdet/utils/benchmark.py b/mmdet/utils/benchmark.py
index 5419b2d175e..293432094e6 100644
--- a/mmdet/utils/benchmark.py
+++ b/mmdet/utils/benchmark.py
@@ -13,6 +13,7 @@
 from mmengine import MMLogger
 from mmengine.config import Config
 from mmengine.device import get_max_cuda_memory
+from mmengine.device.utils import is_musa_available
 from mmengine.dist import get_world_size
 from mmengine.runner import Runner, load_checkpoint
 from mmengine.utils.dl_utils import set_multi_processing
@@ -193,14 +194,22 @@ def _init_model(self, checkpoint: str, is_fuse_conv_bn: bool) -> nn.Module:
         if is_fuse_conv_bn:
             model = fuse_conv_bn(model)
 
-        model = model.cuda()
-
-        if self.distributed:
-            model = DistributedDataParallel(
-                model,
-                device_ids=[torch.cuda.current_device()],
-                broadcast_buffers=False,
-                find_unused_parameters=False)
+        if is_musa_available():
+            model = model.musa()
+            if self.distributed:
+                model = DistributedDataParallel(
+                    model,
+                    device_ids=[torch.musa.current_device()],
+                    broadcast_buffers=False,
+                    find_unused_parameters=False)
+        else:
+            model = model.cuda()
+            if self.distributed:
+                model = DistributedDataParallel(
+                    model,
+                    device_ids=[torch.cuda.current_device()],
+                    broadcast_buffers=False,
+                    find_unused_parameters=False)
 
         model.eval()
         return model
@@ -209,37 +218,71 @@ def run_once(self) -> dict:
         """Executes the benchmark once."""
         pure_inf_time = 0
         fps = 0
+        if is_musa_available():
+            for i, data in enumerate(self.data_loader):
 
-        for i, data in enumerate(self.data_loader):
+                if (i + 1) % self.log_interval == 0:
+                    print_log('==================================',
+                              self.logger)
 
-            if (i + 1) % self.log_interval == 0:
-                print_log('==================================', self.logger)
+                torch.musa.synchronize()
+                start_time = time.perf_counter()
 
-            torch.cuda.synchronize()
-            start_time = time.perf_counter()
+                with torch.no_grad():
+                    self.model.test_step(data)
 
-            with torch.no_grad():
-                self.model.test_step(data)
+                torch.musa.synchronize()
+                elapsed = time.perf_counter() - start_time
 
-            torch.cuda.synchronize()
-            elapsed = time.perf_counter() - start_time
+                if i >= self.num_warmup:
+                    pure_inf_time += elapsed
+                    if (i + 1) % self.log_interval == 0:
+                        fps = (i + 1 - self.num_warmup) / pure_inf_time
+                        musa_memory = get_max_musa_memory()
 
-            if i >= self.num_warmup:
-                pure_inf_time += elapsed
-                if (i + 1) % self.log_interval == 0:
+                        print_log(
+                            f'Done image [{i + 1:<3}/{self.max_iter}], '
+                            f'fps: {fps:.1f} img/s, '
+                            f'times per image: {1000 / fps:.1f} ms/img, '
+                            f'musa memory: {musa_memory} MB', self.logger)
+                        print_process_memory(self._process, self.logger)
+
+                if (i + 1) == self.max_iter:
                     fps = (i + 1 - self.num_warmup) / pure_inf_time
-                    cuda_memory = get_max_cuda_memory()
+                    break
 
-                    print_log(
-                        f'Done image [{i + 1:<3}/{self.max_iter}], '
-                        f'fps: {fps:.1f} img/s, '
-                        f'times per image: {1000 / fps:.1f} ms/img, '
-                        f'cuda memory: {cuda_memory} MB', self.logger)
-                    print_process_memory(self._process, self.logger)
+        else:
+            for i, data in enumerate(self.data_loader):
 
-            if (i + 1) == self.max_iter:
-                fps = (i + 1 - self.num_warmup) / pure_inf_time
-                break
+                if (i + 1) % self.log_interval == 0:
+                    print_log('==================================',
+                              self.logger)
+
+                torch.cuda.synchronize()
+                start_time = time.perf_counter()
+
+                with torch.no_grad():
+                    self.model.test_step(data)
+
+                torch.cuda.synchronize()
+                elapsed = time.perf_counter() - start_time
+
+                if i >= self.num_warmup:
+                    pure_inf_time += elapsed
+                    if (i + 1) % self.log_interval == 0:
+                        fps = (i + 1 - self.num_warmup) / pure_inf_time
+                        cuda_memory = get_max_cuda_memory()
+
+                        print_log(
+                            f'Done image [{i + 1:<3}/{self.max_iter}], '
+                            f'fps: {fps:.1f} img/s, '
+                            f'times per image: {1000 / fps:.1f} ms/img, '
+                            f'cuda memory: {cuda_memory} MB', self.logger)
+                        print_process_memory(self._process, self.logger)
+
+                if (i + 1) == self.max_iter:
+                    fps = (i + 1 - self.num_warmup) / pure_inf_time
+                    break
 
         return {'fps': fps}
 
diff --git a/mmdet/utils/contextmanagers.py b/mmdet/utils/contextmanagers.py
index fa12bfcaff1..3d01b7602be 100644
--- a/mmdet/utils/contextmanagers.py
+++ b/mmdet/utils/contextmanagers.py
@@ -7,6 +7,7 @@
 from typing import List
 
 import torch
+from mmengine.device.utils import is_musa_available
 
 logger = logging.getLogger(__name__)
 
@@ -20,72 +21,141 @@ async def completed(trace_name='',
                     streams: List[torch.cuda.Stream] = None):
     """Async context manager that waits for work to complete on given CUDA
     streams."""
-    if not torch.cuda.is_available():
+    if not torch.cuda.is_available() and not is_musa_available():
         yield
         return
+    if is_musa_available():
+        stream_before_context_switch = torch.musa.current_stream()
+        if not streams:
+            streams = [stream_before_context_switch]
+        else:
+            streams = [
+                s if s else stream_before_context_switch for s in streams
+            ]
+
+        end_events = [
+            torch.musa.Event(enable_timing=DEBUG_COMPLETED_TIME)
+            for _ in streams
+        ]
 
-    stream_before_context_switch = torch.cuda.current_stream()
-    if not streams:
-        streams = [stream_before_context_switch]
-    else:
-        streams = [s if s else stream_before_context_switch for s in streams]
-
-    end_events = [
-        torch.cuda.Event(enable_timing=DEBUG_COMPLETED_TIME) for _ in streams
-    ]
+        if DEBUG_COMPLETED_TIME:
+            start = torch.musa.Event(enable_timing=True)
+            stream_before_context_switch.record_event(start)
 
-    if DEBUG_COMPLETED_TIME:
-        start = torch.cuda.Event(enable_timing=True)
-        stream_before_context_switch.record_event(start)
+            cpu_start = time.monotonic()
+        logger.debug('%s %s starting, streams: %s', trace_name, name, streams)
+        grad_enabled_before = torch.is_grad_enabled()
+        try:
+            yield
+        finally:
+            current_stream = torch.musa.current_stream()
+            assert current_stream == stream_before_context_switch
 
-        cpu_start = time.monotonic()
-    logger.debug('%s %s starting, streams: %s', trace_name, name, streams)
-    grad_enabled_before = torch.is_grad_enabled()
-    try:
-        yield
-    finally:
-        current_stream = torch.cuda.current_stream()
-        assert current_stream == stream_before_context_switch
+            if DEBUG_COMPLETED_TIME:
+                cpu_end = time.monotonic()
+            for i, stream in enumerate(streams):
+                event = end_events[i]
+                stream.record_event(event)
+
+            grad_enabled_after = torch.is_grad_enabled()
+
+            # observed change of torch.is_grad_enabled() during concurrent
+            # run of async_test_bboxes code
+            assert (grad_enabled_before == grad_enabled_after
+                    ), 'Unexpected is_grad_enabled() value change'
+
+            are_done = [e.query() for e in end_events]
+            logger.debug('%s %s completed: %s streams: %s', trace_name, name,
+                         are_done, streams)
+            with torch.musa.stream(stream_before_context_switch):
+                while not all(are_done):
+                    await asyncio.sleep(sleep_interval)
+                    are_done = [e.query() for e in end_events]
+                    logger.debug(
+                        '%s %s completed: %s streams: %s',
+                        trace_name,
+                        name,
+                        are_done,
+                        streams,
+                    )
+
+            current_stream = torch.musa.current_stream()
+            assert current_stream == stream_before_context_switch
+
+            if DEBUG_COMPLETED_TIME:
+                cpu_time = (cpu_end - cpu_start) * 1000
+                stream_times_ms = ''
+                for i, stream in enumerate(streams):
+                    elapsed_time = start.elapsed_time(end_events[i])
+                    stream_times_ms += f' {stream} {elapsed_time:.2f} ms'
+                logger.info('%s %s %.2f ms %s', trace_name, name, cpu_time,
+                            stream_times_ms)
+    else:
+        stream_before_context_switch = torch.cuda.current_stream()
+        if not streams:
+            streams = [stream_before_context_switch]
+        else:
+            streams = [
+                s if s else stream_before_context_switch for s in streams
+            ]
+
+        end_events = [
+            torch.cuda.Event(enable_timing=DEBUG_COMPLETED_TIME)
+            for _ in streams
+        ]
 
         if DEBUG_COMPLETED_TIME:
-            cpu_end = time.monotonic()
-        for i, stream in enumerate(streams):
-            event = end_events[i]
-            stream.record_event(event)
-
-        grad_enabled_after = torch.is_grad_enabled()
-
-        # observed change of torch.is_grad_enabled() during concurrent run of
-        # async_test_bboxes code
-        assert (grad_enabled_before == grad_enabled_after
-                ), 'Unexpected is_grad_enabled() value change'
-
-        are_done = [e.query() for e in end_events]
-        logger.debug('%s %s completed: %s streams: %s', trace_name, name,
-                     are_done, streams)
-        with torch.cuda.stream(stream_before_context_switch):
-            while not all(are_done):
-                await asyncio.sleep(sleep_interval)
-                are_done = [e.query() for e in end_events]
-                logger.debug(
-                    '%s %s completed: %s streams: %s',
-                    trace_name,
-                    name,
-                    are_done,
-                    streams,
-                )
-
-        current_stream = torch.cuda.current_stream()
-        assert current_stream == stream_before_context_switch
+            start = torch.cuda.Event(enable_timing=True)
+            stream_before_context_switch.record_event(start)
 
-        if DEBUG_COMPLETED_TIME:
-            cpu_time = (cpu_end - cpu_start) * 1000
-            stream_times_ms = ''
+            cpu_start = time.monotonic()
+        logger.debug('%s %s starting, streams: %s', trace_name, name, streams)
+        grad_enabled_before = torch.is_grad_enabled()
+        try:
+            yield
+        finally:
+            current_stream = torch.cuda.current_stream()
+            assert current_stream == stream_before_context_switch
+
+            if DEBUG_COMPLETED_TIME:
+                cpu_end = time.monotonic()
             for i, stream in enumerate(streams):
-                elapsed_time = start.elapsed_time(end_events[i])
-                stream_times_ms += f' {stream} {elapsed_time:.2f} ms'
-            logger.info('%s %s %.2f ms %s', trace_name, name, cpu_time,
-                        stream_times_ms)
+                event = end_events[i]
+                stream.record_event(event)
+
+            grad_enabled_after = torch.is_grad_enabled()
+
+            # observed change of torch.is_grad_enabled() during concurrent
+            # run of async_test_bboxes code
+            assert (grad_enabled_before == grad_enabled_after
+                    ), 'Unexpected is_grad_enabled() value change'
+
+            are_done = [e.query() for e in end_events]
+            logger.debug('%s %s completed: %s streams: %s', trace_name, name,
+                         are_done, streams)
+            with torch.cuda.stream(stream_before_context_switch):
+                while not all(are_done):
+                    await asyncio.sleep(sleep_interval)
+                    are_done = [e.query() for e in end_events]
+                    logger.debug(
+                        '%s %s completed: %s streams: %s',
+                        trace_name,
+                        name,
+                        are_done,
+                        streams,
+                    )
+
+            current_stream = torch.cuda.current_stream()
+            assert current_stream == stream_before_context_switch
+
+            if DEBUG_COMPLETED_TIME:
+                cpu_time = (cpu_end - cpu_start) * 1000
+                stream_times_ms = ''
+                for i, stream in enumerate(streams):
+                    elapsed_time = start.elapsed_time(end_events[i])
+                    stream_times_ms += f' {stream} {elapsed_time:.2f} ms'
+                logger.info('%s %s %.2f ms %s', trace_name, name, cpu_time,
+                            stream_times_ms)
 
 
 @contextlib.asynccontextmanager
@@ -98,25 +168,44 @@ async def concurrent(streamqueue: asyncio.Queue,
 
     Queue tasks define the pool of streams used for concurrent execution.
     """
-    if not torch.cuda.is_available():
+    if not torch.cuda.is_available() and not is_musa_available():
         yield
         return
-
-    initial_stream = torch.cuda.current_stream()
-
-    with torch.cuda.stream(initial_stream):
-        stream = await streamqueue.get()
-        assert isinstance(stream, torch.cuda.Stream)
-
-        try:
-            with torch.cuda.stream(stream):
-                logger.debug('%s %s is starting, stream: %s', trace_name, name,
-                             stream)
-                yield
-                current = torch.cuda.current_stream()
-                assert current == stream
-                logger.debug('%s %s has finished, stream: %s', trace_name,
-                             name, stream)
-        finally:
-            streamqueue.task_done()
-            streamqueue.put_nowait(stream)
+    if is_musa_available():
+        initial_stream = torch.musa.current_stream()
+
+        with torch.musa.stream(initial_stream):
+            stream = await streamqueue.get()
+            assert isinstance(stream, torch.musa.Stream)
+
+            try:
+                with torch.musa.stream(stream):
+                    logger.debug('%s %s is starting, stream: %s', trace_name,
+                                 name, stream)
+                    yield
+                    current = torch.musa.current_stream()
+                    assert current == stream
+                    logger.debug('%s %s has finished, stream: %s', trace_name,
+                                 name, stream)
+            finally:
+                streamqueue.task_done()
+                streamqueue.put_nowait(stream)
+    else:
+        initial_stream = torch.cuda.current_stream()
+
+        with torch.cuda.stream(initial_stream):
+            stream = await streamqueue.get()
+            assert isinstance(stream, torch.cuda.Stream)
+
+            try:
+                with torch.cuda.stream(stream):
+                    logger.debug('%s %s is starting, stream: %s', trace_name,
+                                 name, stream)
+                    yield
+                    current = torch.cuda.current_stream()
+                    assert current == stream
+                    logger.debug('%s %s has finished, stream: %s', trace_name,
+                                 name, stream)
+            finally:
+                streamqueue.task_done()
+                streamqueue.put_nowait(stream)
diff --git a/mmdet/utils/memory.py b/mmdet/utils/memory.py
index b6f9cbc7f9e..9d3eed56e5b 100644
--- a/mmdet/utils/memory.py
+++ b/mmdet/utils/memory.py
@@ -5,6 +5,7 @@
 from functools import wraps
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MMLogger
 
 
@@ -143,7 +144,10 @@ def wrapped(*args, **kwargs):
                     return func(*args, **kwargs)
 
                 # Clear cache and retry
-                torch.cuda.empty_cache()
+                if is_musa_available():
+                    torch.musa.empty_cache()
+                else:
+                    torch.cuda.empty_cache()
                 with _ignore_torch_cuda_oom():
                     return func(*args, **kwargs)
 
diff --git a/mmdet/utils/profiling.py b/mmdet/utils/profiling.py
index 2f53f456c72..deda08a5738 100644
--- a/mmdet/utils/profiling.py
+++ b/mmdet/utils/profiling.py
@@ -4,6 +4,7 @@
 import time
 
 import torch
+from mmengine.device.utils import is_musa_available
 
 if sys.version_info >= (3, 7):
 
@@ -18,13 +19,20 @@ def profile_time(trace_name,
         Useful as a temporary context manager to find sweet spots of code
         suitable for async implementation.
         """
-        if (not enabled) or not torch.cuda.is_available():
+        if (not enabled
+            ) or not torch.cuda.is_available() and is_musa_available():
             yield
             return
-        stream = stream if stream else torch.cuda.current_stream()
-        end_stream = end_stream if end_stream else stream
-        start = torch.cuda.Event(enable_timing=True)
-        end = torch.cuda.Event(enable_timing=True)
+        if is_musa_available():
+            stream = stream if stream else torch.musa.current_stream()
+            end_stream = end_stream if end_stream else stream
+            start = torch.musa.Event(enable_timing=True)
+            end = torch.musa.Event(enable_timing=True)
+        else:
+            stream = stream if stream else torch.cuda.current_stream()
+            end_stream = end_stream if end_stream else stream
+            start = torch.cuda.Event(enable_timing=True)
+            end = torch.cuda.Event(enable_timing=True)
         stream.record_event(start)
         try:
             cpu_start = time.monotonic()
diff --git a/tests/test_apis/test_inference.py b/tests/test_apis/test_inference.py
index e42f86c64e8..5d977928cf9 100644
--- a/tests/test_apis/test_inference.py
+++ b/tests/test_apis/test_inference.py
@@ -4,6 +4,7 @@
 import numpy as np
 import pytest
 import torch
+from mmengine.device.utils import is_musa_available
 
 from mmdet.apis import inference_detector, init_detector
 from mmdet.structures import DetDataSample
@@ -13,11 +14,20 @@
 register_all_modules()
 
 
-@pytest.mark.parametrize('config,devices',
-                         [('configs/retinanet/retinanet_r18_fpn_1x_coco.py',
-                           ('cpu', 'cuda'))])
-def test_init_detector(config, devices):
-    assert all([device in ['cpu', 'cuda'] for device in devices])
+@pytest.mark.parametrize('config',
+                         ['configs/retinanet/retinanet_r18_fpn_1x_coco.py'])
+@pytest.mark.parametrize('device', [
+    'cpu',
+    pytest.param(
+        'cuda',
+        marks=pytest.mark.skipif(
+            not torch.cuda.is_available(), reason='requires cuda support')),
+    pytest.param(
+        'musa',
+        marks=pytest.mark.skipif(
+            not is_musa_available(), reason='requires musa support')),
+])
+def test_init_detector(config, device):
 
     project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
     project_dir = os.path.join(project_dir, '..')
@@ -32,28 +42,24 @@ def test_init_detector(config, devices):
                 init_cfg=dict(
                     type='Pretrained', checkpoint='torchvision://resnet18'))))
 
-    for device in devices:
-        if device == 'cuda' and not torch.cuda.is_available():
-            pytest.skip('test requires GPU and torch+cuda')
-
-        model = init_detector(
-            config_file, device=device, cfg_options=cfg_options)
+    model = init_detector(config_file, device=device, cfg_options=cfg_options)
 
-        # test init_detector with :obj:`Path`
-        config_path_object = Path(config_file)
-        model = init_detector(config_path_object, device=device)
+    # test init_detector with :obj:`Path`
+    config_path_object = Path(config_file)
+    model = init_detector(config_path_object, device=device)
 
-        # test init_detector with undesirable type
-        with pytest.raises(TypeError):
-            config_list = [config_file]
-            model = init_detector(config_list)  # noqa: F841
+    # test init_detector with undesirable type
+    # pytest.set_trace()
+    with pytest.raises(TypeError):
+        config_list = [config_file]
+        model = init_detector(config_list)  # noqa: F841
 
 
 @pytest.mark.parametrize('config,devices',
                          [('configs/retinanet/retinanet_r18_fpn_1x_coco.py',
-                           ('cpu', 'cuda'))])
+                           ('cpu', 'cuda', 'musa'))])
 def test_inference_detector(config, devices):
-    assert all([device in ['cpu', 'cuda'] for device in devices])
+    assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
     project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
     project_dir = os.path.join(project_dir, '..')
@@ -68,7 +74,8 @@ def test_inference_detector(config, devices):
     for device in devices:
         if device == 'cuda' and not torch.cuda.is_available():
             pytest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa' and not is_musa_available():
+            pytest.skip('test requires GPU and torch+musa')
         model = init_detector(config_file, device=device)
         result = inference_detector(model, img1)
         assert isinstance(result, DetDataSample)
diff --git a/tests/test_engine/test_hooks/test_mean_teacher_hook.py b/tests/test_engine/test_hooks/test_mean_teacher_hook.py
index 41d056e4071..b57a0b9d2af 100644
--- a/tests/test_engine/test_hooks/test_mean_teacher_hook.py
+++ b/tests/test_engine/test_hooks/test_mean_teacher_hook.py
@@ -5,6 +5,7 @@
 
 import torch
 import torch.nn as nn
+from mmengine.device.utils import is_musa_available
 from mmengine.evaluator import BaseMetric
 from mmengine.model import BaseModel
 from mmengine.optim import OptimWrapper
@@ -98,7 +99,8 @@ def tearDown(self):
         self.temp_dir.cleanup()
 
     def test_mean_teacher_hook(self):
-        device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+        device = 'cuda:0' if torch.cuda.is_available() else (
+            'musa:0' if is_musa_available() else 'cpu')
         model = ToyModel2().to(device)
         runner = Runner(
             model=model,
diff --git a/tests/test_engine/test_runner/test_loops.py b/tests/test_engine/test_runner/test_loops.py
index 6bf9cb4795a..d5f7e778947 100644
--- a/tests/test_engine/test_runner/test_loops.py
+++ b/tests/test_engine/test_runner/test_loops.py
@@ -5,6 +5,7 @@
 
 import torch
 import torch.nn as nn
+from mmengine.device.utils import is_musa_available
 from mmengine.evaluator import Evaluator
 from mmengine.model import BaseModel
 from mmengine.optim import OptimWrapper
@@ -84,7 +85,8 @@ def tearDown(self):
         self.temp_dir.cleanup()
 
     def test_teacher_student_val_loop(self):
-        device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+        device = 'cuda:0' if torch.cuda.is_available() else (
+            'musa:0' if is_musa_available() else 'cpu')
         model = ToyModel2().to(device)
         evaluator = Mock()
         evaluator.evaluate = Mock(return_value=dict(acc=0.5))
diff --git a/tests/test_models/test_detectors/test_cornernet.py b/tests/test_models/test_detectors/test_cornernet.py
index 10bc62649f6..9a12b4f24ab 100644
--- a/tests/test_models/test_detectors/test_cornernet.py
+++ b/tests/test_models/test_detectors/test_cornernet.py
@@ -4,6 +4,7 @@
 
 import torch
 from mmengine.config import ConfigDict
+from mmengine.device.utils import is_musa_available
 
 from mmdet.structures import DetDataSample
 from mmdet.testing import demo_mm_inputs, get_detector_cfg
@@ -50,8 +51,8 @@ def test_init(self):
         self.assertTrue(detector.backbone is not None)
         self.assertTrue(not hasattr(detector, 'neck'))
 
-    @unittest.skipIf(not torch.cuda.is_available(),
-                     'test requires GPU and torch+cuda')
+    @unittest.skipIf(not torch.cuda.is_available() and not is_musa_available(),
+                     'test requires GPU and torch+cuda+musa')
     def test_cornernet_forward_loss_mode(self):
         from mmdet.registry import MODELS
         detector = MODELS.build(self.model_cfg)
@@ -62,8 +63,8 @@ def test_cornernet_forward_loss_mode(self):
         losses = detector.forward(**data, mode='loss')
         assert isinstance(losses, dict)
 
-    @unittest.skipIf(not torch.cuda.is_available(),
-                     'test requires GPU and torch+cuda')
+    @unittest.skipIf(not torch.cuda.is_available() and not is_musa_available(),
+                     'test requires GPU and torch+cuda+musa')
     def test_cornernet_forward_predict_mode(self):
         from mmdet.registry import MODELS
         detector = MODELS.build(self.model_cfg)
@@ -79,8 +80,8 @@ def test_cornernet_forward_predict_mode(self):
             assert len(batch_results) == 2
             assert isinstance(batch_results[0], DetDataSample)
 
-    @unittest.skipIf(not torch.cuda.is_available(),
-                     'test requires GPU and torch+cuda')
+    @unittest.skipIf(not torch.cuda.is_available() and not is_musa_available(),
+                     'test requires GPU and torch+cuda+musa')
     def test_cornernet_forward_tensor_mode(self):
         from mmdet.registry import MODELS
         detector = MODELS.build(self.model_cfg)
diff --git a/tests/test_models/test_detectors/test_glip.py b/tests/test_models/test_detectors/test_glip.py
index dc38d3142d2..a0581b8050f 100644
--- a/tests/test_models/test_detectors/test_glip.py
+++ b/tests/test_models/test_detectors/test_glip.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.structures import DetDataSample
@@ -37,7 +38,7 @@ def test_glip_forward_predict_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -46,7 +47,10 @@ def test_glip_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
-
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
             # test custom_entities is True
             packed_inputs = demo_mm_inputs(
                 2, [[3, 128, 128], [3, 125, 130]],
diff --git a/tests/test_models/test_detectors/test_kd_single_stage.py b/tests/test_models/test_detectors/test_kd_single_stage.py
index 93d886263a8..a02eb2a45d0 100644
--- a/tests/test_models/test_detectors/test_kd_single_stage.py
+++ b/tests/test_models/test_detectors/test_kd_single_stage.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet import *  # noqa
@@ -27,14 +28,14 @@ def test_init(self, cfg_file):
         self.assertTrue(detector.neck)
         self.assertTrue(detector.bbox_head)
 
-    @parameterized.expand([('ld/ld_r18-gflv1-r101_fpn_1x_coco.py', ('cpu',
-                                                                    'cuda'))])
+    @parameterized.expand([('ld/ld_r18-gflv1-r101_fpn_1x_coco.py',
+                            ('cpu', 'cuda', 'musa'))])
     def test_single_stage_forward_train(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -43,6 +44,10 @@ def test_single_stage_forward_train(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            if device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, True)
@@ -57,7 +62,7 @@ def test_single_stage_forward_test(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -66,7 +71,10 @@ def test_single_stage_forward_test(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
-
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, False)
 
diff --git a/tests/test_models/test_detectors/test_maskformer.py b/tests/test_models/test_detectors/test_maskformer.py
index 3eeb04bfd55..e0101d54ac0 100644
--- a/tests/test_models/test_detectors/test_maskformer.py
+++ b/tests/test_models/test_detectors/test_maskformer.py
@@ -2,6 +2,7 @@
 import unittest
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
@@ -54,13 +55,15 @@ def test_init(self):
         assert detector.backbone
         assert detector.panoptic_head
 
-    @parameterized.expand([('cpu', ), ('cuda', )])
+    @parameterized.expand([('cpu', ), ('cuda', ), ('musa', )])
     def test_forward_loss_mode(self, device):
         model_cfg = self._create_model_cfg()
         detector = MODELS.build(model_cfg)
 
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
 
         packed_inputs = demo_mm_inputs(
@@ -74,12 +77,14 @@ def test_forward_loss_mode(self, device):
         losses = detector.forward(**data, mode='loss')
         self.assertIsInstance(losses, dict)
 
-    @parameterized.expand([('cpu', ), ('cuda', )])
+    @parameterized.expand([('cpu', ), ('cuda', ), ('musa', )])
     def test_forward_predict_mode(self, device):
         model_cfg = self._create_model_cfg()
         detector = MODELS.build(model_cfg)
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
         packed_inputs = demo_mm_inputs(
             2,
@@ -95,12 +100,14 @@ def test_forward_predict_mode(self, device):
             self.assertEqual(len(batch_results), 2)
             self.assertIsInstance(batch_results[0], DetDataSample)
 
-    @parameterized.expand([('cpu', ), ('cuda', )])
+    @parameterized.expand([('cpu', ), ('cuda', ), ('musa', )])
     def test_forward_tensor_mode(self, device):
         model_cfg = self._create_model_cfg()
         detector = MODELS.build(model_cfg)
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
 
         packed_inputs = demo_mm_inputs(
@@ -162,7 +169,9 @@ def test_init(self):
         ('cpu', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
         ('cpu', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py'),
         ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
-        ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py')
+        ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py'),
+        ('musa', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
+        ('musa', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py')
     ])
     def test_forward_loss_mode(self, device, cfg_path):
         print(device, cfg_path)
@@ -172,6 +181,9 @@ def test_forward_loss_mode(self, device, cfg_path):
 
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+cuda')
+
         detector = detector.to(device)
 
         packed_inputs = demo_mm_inputs(
@@ -189,7 +201,9 @@ def test_forward_loss_mode(self, device, cfg_path):
         ('cpu', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
         ('cpu', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py'),
         ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
-        ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py')
+        ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py'),
+        ('musa', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
+        ('musa', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py')
     ])
     def test_forward_predict_mode(self, device, cfg_path):
         with_semantic = 'panoptic' in cfg_path
@@ -197,6 +211,8 @@ def test_forward_predict_mode(self, device, cfg_path):
         detector = MODELS.build(model_cfg)
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
         packed_inputs = demo_mm_inputs(
             2,
@@ -216,7 +232,9 @@ def test_forward_predict_mode(self, device, cfg_path):
         ('cpu', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
         ('cpu', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py'),
         ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
-        ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py')
+        ('cuda', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py'),
+        ('musa', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py'),
+        ('musa', 'mask2former/mask2former_r50_8xb2-lsj-50e_coco.py')
     ])
     def test_forward_tensor_mode(self, device, cfg_path):
         with_semantic = 'panoptic' in cfg_path
@@ -224,6 +242,8 @@ def test_forward_tensor_mode(self, device, cfg_path):
         detector = MODELS.build(model_cfg)
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
 
         packed_inputs = demo_mm_inputs(
diff --git a/tests/test_models/test_detectors/test_panoptic_two_stage_segmentor.py b/tests/test_models/test_detectors/test_panoptic_two_stage_segmentor.py
index 9234554f7b3..85dc70bd85b 100644
--- a/tests/test_models/test_detectors/test_panoptic_two_stage_segmentor.py
+++ b/tests/test_models/test_detectors/test_panoptic_two_stage_segmentor.py
@@ -2,6 +2,7 @@
 import unittest
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
@@ -34,13 +35,15 @@ def test_init(self):
         assert detector.with_semantic_head
         assert detector.with_panoptic_fusion_head
 
-    @parameterized.expand([('cpu', ), ('cuda', )])
+    @parameterized.expand([('cpu', ), ('cuda', ), ('musa', )])
     def test_forward_loss_mode(self, device):
         model_cfg = self._create_model_cfg()
         detector = MODELS.build(model_cfg)
 
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
 
         packed_inputs = demo_mm_inputs(
@@ -54,12 +57,14 @@ def test_forward_loss_mode(self, device):
         losses = detector.forward(**data, mode='loss')
         self.assertIsInstance(losses, dict)
 
-    @parameterized.expand([('cpu', ), ('cuda', )])
+    @parameterized.expand([('cpu', ), ('cuda', ), ('musa', )])
     def test_forward_predict_mode(self, device):
         model_cfg = self._create_model_cfg()
         detector = MODELS.build(model_cfg)
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
         packed_inputs = demo_mm_inputs(
             2,
@@ -75,12 +80,14 @@ def test_forward_predict_mode(self, device):
             self.assertEqual(len(batch_results), 2)
             self.assertIsInstance(batch_results[0], DetDataSample)
 
-    @parameterized.expand([('cpu', ), ('cuda', )])
+    @parameterized.expand([('cpu', ), ('cuda', ), ('musa', )])
     def test_forward_tensor_mode(self, device):
         model_cfg = self._create_model_cfg()
         detector = MODELS.build(model_cfg)
         if device == 'cuda' and not torch.cuda.is_available():
             return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa' and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+musa')
         detector = detector.to(device)
 
         packed_inputs = demo_mm_inputs(
diff --git a/tests/test_models/test_detectors/test_rpn.py b/tests/test_models/test_detectors/test_rpn.py
index 60f7492a96b..45bdcb4800c 100644
--- a/tests/test_models/test_detectors/test_rpn.py
+++ b/tests/test_models/test_detectors/test_rpn.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.structures import DetDataSample
@@ -34,7 +35,8 @@ def test_init(self, cfg_file):
         detector = MODELS.build(model)
         self.assertEqual(detector.bbox_head.num_classes, 1)
 
-    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda'))])
+    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda',
+                                                           'musa'))])
     def test_rpn_forward_loss_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         # backbone convert to ResNet18
@@ -43,7 +45,7 @@ def test_rpn_forward_loss_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -52,6 +54,10 @@ def test_rpn_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, True)
@@ -59,7 +65,8 @@ def test_rpn_forward_loss_mode(self, cfg_file, devices):
             losses = detector.forward(**data, mode='loss')
             self.assertIsInstance(losses, dict)
 
-    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda'))])
+    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda',
+                                                           'musa'))])
     def test_rpn_forward_predict_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         # backbone convert to ResNet18
@@ -68,7 +75,7 @@ def test_rpn_forward_predict_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -77,6 +84,10 @@ def test_rpn_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, False)
@@ -87,7 +98,8 @@ def test_rpn_forward_predict_mode(self, cfg_file, devices):
                 self.assertEqual(len(batch_results), 2)
                 self.assertIsInstance(batch_results[0], DetDataSample)
 
-    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda'))])
+    @parameterized.expand([('rpn/rpn_r50_fpn_1x_coco.py', ('cpu', 'cuda',
+                                                           'musa'))])
     def test_rpn_forward_tensor_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         # backbone convert to ResNet18
@@ -96,7 +108,7 @@ def test_rpn_forward_tensor_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -105,6 +117,10 @@ def test_rpn_forward_tensor_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, False)
diff --git a/tests/test_models/test_detectors/test_single_stage.py b/tests/test_models/test_detectors/test_single_stage.py
index 22dbd1a98cb..26f2488e075 100644
--- a/tests/test_models/test_detectors/test_single_stage.py
+++ b/tests/test_models/test_detectors/test_single_stage.py
@@ -4,6 +4,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from parameterized import parameterized
 
@@ -36,11 +37,12 @@ def test_init(self, cfg_file):
         self.assertTrue(detector.bbox_head)
 
     @parameterized.expand([
-        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda')),
-        ('centernet/centernet_r18_8xb16-crop512-140e_coco.py', ('cpu',
-                                                                'cuda')),
-        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda')),
-        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda')),
+        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda', 'musa')),
+        ('centernet/centernet_r18_8xb16-crop512-140e_coco.py', ('cpu', 'cuda',
+                                                                'musa')),
+        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda', 'musa')),
+        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda',
+                                                            'musa')),
     ])
     def test_single_stage_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -51,7 +53,7 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -61,6 +63,10 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, True)
@@ -68,18 +74,19 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
             self.assertIsInstance(losses, dict)
 
     @parameterized.expand([
-        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda')),
-        ('centernet/centernet_r18_8xb16-crop512-140e_coco.py', ('cpu',
-                                                                'cuda')),
-        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda')),
-        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda')),
+        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda', 'musa')),
+        ('centernet/centernet_r18_8xb16-crop512-140e_coco.py', ('cpu', 'cuda',
+                                                                'musa')),
+        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda', 'musa')),
+        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda',
+                                                            'musa')),
     ])
     def test_single_stage_forward_predict_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -88,6 +95,10 @@ def test_single_stage_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, False)
@@ -99,18 +110,19 @@ def test_single_stage_forward_predict_mode(self, cfg_file, devices):
                 self.assertIsInstance(batch_results[0], DetDataSample)
 
     @parameterized.expand([
-        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda')),
-        ('centernet/centernet_r18_8xb16-crop512-140e_coco.py', ('cpu',
-                                                                'cuda')),
-        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda')),
-        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda')),
+        ('retinanet/retinanet_r18_fpn_1x_coco.py', ('cpu', 'cuda', 'musa')),
+        ('centernet/centernet_r18_8xb16-crop512-140e_coco.py', ('cpu', 'cuda',
+                                                                'musa')),
+        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cpu', 'cuda', 'musa')),
+        ('yolo/yolov3_mobilenetv2_8xb24-320-300e_coco.py', ('cpu', 'cuda',
+                                                            'musa')),
     ])
     def test_single_stage_forward_tensor_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -119,6 +131,10 @@ def test_single_stage_forward_tensor_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, False)
diff --git a/tests/test_models/test_detectors/test_single_stage_instance_seg.py b/tests/test_models/test_detectors/test_single_stage_instance_seg.py
index 51530341241..5fc40a03050 100644
--- a/tests/test_models/test_detectors/test_single_stage_instance_seg.py
+++ b/tests/test_models/test_detectors/test_single_stage_instance_seg.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.structures import DetDataSample
@@ -33,9 +34,9 @@ def test_init(self, cfg_file):
             self.assertTrue(detector.bbox_head)
 
     @parameterized.expand([
-        ('solo/solo_r50_fpn_1x_coco.py', ('cpu', 'cuda')),
-        ('solov2/solov2-light_r18_fpn_ms-3x_coco.py', ('cpu', 'cuda')),
-        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda')),
+        ('solo/solo_r50_fpn_1x_coco.py', ('cpu', 'cuda', 'musa')),
+        ('solov2/solov2-light_r18_fpn_ms-3x_coco.py', ('cpu', 'cuda', 'musa')),
+        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_single_stage_forward_loss_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
@@ -45,7 +46,7 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -55,6 +56,10 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            if device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(
                 2, [[3, 128, 128], [3, 125, 130]], with_mask=True)
@@ -63,8 +68,9 @@ def test_single_stage_forward_loss_mode(self, cfg_file, devices):
             self.assertIsInstance(losses, dict)
 
     @parameterized.expand([
-        ('solo/decoupled-solo-light_r50_fpn_3x_coco.py', ('cpu', 'cuda')),
-        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda')),
+        ('solo/decoupled-solo-light_r50_fpn_3x_coco.py', ('cpu', 'cuda',
+                                                          'musa')),
+        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_single_stage_forward_predict_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
@@ -74,7 +80,7 @@ def test_single_stage_forward_predict_mode(self, cfg_file, devices):
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -83,6 +89,10 @@ def test_single_stage_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            if device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(
                 2, [[3, 128, 128], [3, 125, 130]], with_mask=True)
@@ -95,16 +105,16 @@ def test_single_stage_forward_predict_mode(self, cfg_file, devices):
                 self.assertIsInstance(batch_results[0], DetDataSample)
 
     @parameterized.expand([
-        ('solo/solo_r50_fpn_1x_coco.py', ('cpu', 'cuda')),
-        ('solov2/solov2_r50_fpn_1x_coco.py', ('cpu', 'cuda')),
-        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda')),
+        ('solo/solo_r50_fpn_1x_coco.py', ('cpu', 'cuda', 'musa')),
+        ('solov2/solov2_r50_fpn_1x_coco.py', ('cpu', 'cuda', 'musa')),
+        ('yolact/yolact_r50_1xb8-55e_coco.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_single_stage_forward_tensor_mode(self, cfg_file, devices):
         model = get_detector_cfg(cfg_file)
         model.backbone.init_cfg = None
 
         from mmdet.registry import MODELS
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             detector = MODELS.build(model)
@@ -113,6 +123,10 @@ def test_single_stage_forward_tensor_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 detector = detector.cuda()
+            if device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                detector = detector.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = detector.data_preprocessor(packed_inputs, False)
diff --git a/tests/test_models/test_detectors/test_two_stage.py b/tests/test_models/test_detectors/test_two_stage.py
index 5609c0821dc..4f42b93caf8 100644
--- a/tests/test_models/test_detectors/test_two_stage.py
+++ b/tests/test_models/test_detectors/test_two_stage.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.structures import DetDataSample
@@ -55,9 +56,12 @@ def test_two_stage_forward_loss_mode(self, cfg_file):
         from mmdet.registry import MODELS
         detector = MODELS.build(model)
 
-        if not torch.cuda.is_available():
-            return unittest.skip('test requires GPU and torch+cuda')
-        detector = detector.cuda()
+        if not torch.cuda.is_available() and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+cuda+musa')
+        if is_musa_available():
+            detector = detector.musa()
+        else:
+            detector = detector.cuda()
 
         packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
 
@@ -81,9 +85,12 @@ def test_two_stage_forward_predict_mode(self, cfg_file):
         from mmdet.registry import MODELS
         detector = MODELS.build(model)
 
-        if not torch.cuda.is_available():
-            return unittest.skip('test requires GPU and torch+cuda')
-        detector = detector.cuda()
+        if not torch.cuda.is_available() and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+cuda+musa')
+        if is_musa_available():
+            detector = detector.musa()
+        else:
+            detector = detector.cuda()
 
         packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
         data = detector.data_preprocessor(packed_inputs, False)
@@ -169,9 +176,12 @@ def test_two_stage_forward_loss_mode(self, cfg_file):
         from mmdet.registry import MODELS
         detector = MODELS.build(model)
 
-        if not torch.cuda.is_available():
-            return unittest.skip('test requires GPU and torch+cuda')
-        detector = detector.cuda()
+        if not torch.cuda.is_available() and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+cuda+musa')
+        if is_musa_available():
+            detector = detector.musa()
+        else:
+            detector = detector.cuda()
 
         packed_inputs = demo_mm_inputs(
             2, [[3, 128, 128], [3, 125, 130]], with_mask=True)
@@ -195,9 +205,12 @@ def test_two_stage_forward_predict_mode(self, cfg_file):
         from mmdet.registry import MODELS
         detector = MODELS.build(model)
 
-        if not torch.cuda.is_available():
-            return unittest.skip('test requires GPU and torch+cuda')
-        detector = detector.cuda()
+        if not torch.cuda.is_available() and not is_musa_available():
+            return unittest.skip('test requires GPU and torch+cuda+musa')
+        if is_musa_available():
+            detector = detector.musa()
+        else:
+            detector = detector.cuda()
 
         packed_inputs = demo_mm_inputs(2, [[3, 256, 256], [3, 255, 260]])
         data = detector.data_preprocessor(packed_inputs, False)
diff --git a/tests/test_models/test_mot/test_byte_track.py b/tests/test_models/test_mot/test_byte_track.py
index a48548c7510..67477eb4fcf 100644
--- a/tests/test_models/test_mot/test_byte_track.py
+++ b/tests/test_models/test_mot/test_byte_track.py
@@ -4,6 +4,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
@@ -33,14 +34,14 @@ def test_bytetrack_init(self, cfg_file):
 
     @parameterized.expand([
         ('bytetrack/bytetrack_yolox_x_8xb4-80e_crowdhuman-mot17halftrain_'
-         'test-mot17halfval.py', ('cpu', 'cuda')),
+         'test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
             f'test_bytetrack_forward_loss_mode-{time.time()}')
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -56,6 +57,10 @@ def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = model.data_preprocessor(packed_inputs, True)
@@ -64,7 +69,7 @@ def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
 
     @parameterized.expand([
         ('bytetrack/bytetrack_yolox_x_8xb4-80e_crowdhuman-mot17halftrain_'
-         'test-mot17halfval.py', ('cpu', 'cuda')),
+         'test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_bytetrack_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -72,7 +77,7 @@ def test_bytetrack_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -86,6 +91,10 @@ def test_bytetrack_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_track_inputs(
                 batch_size=1,
diff --git a/tests/test_models/test_mot/test_deep_sort.py b/tests/test_models/test_mot/test_deep_sort.py
index 72dfeb43510..028942a8235 100644
--- a/tests/test_models/test_mot/test_deep_sort.py
+++ b/tests/test_models/test_mot/test_deep_sort.py
@@ -4,6 +4,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
@@ -31,7 +32,7 @@ def test_init(self, cfg_file):
 
     @parameterized.expand([
         ('deepsort/deepsort_faster-rcnn_r50_fpn_8xb2-4e'
-         '_mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda')),
+         '_mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_deepsort_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -39,7 +40,7 @@ def test_deepsort_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -49,6 +50,10 @@ def test_deepsort_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            if device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_track_inputs(
                 batch_size=1,
diff --git a/tests/test_models/test_mot/test_oc_sort.py b/tests/test_models/test_mot/test_oc_sort.py
index 5bf29513e00..8026d2f4e16 100644
--- a/tests/test_models/test_mot/test_oc_sort.py
+++ b/tests/test_models/test_mot/test_oc_sort.py
@@ -4,6 +4,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
@@ -33,14 +34,14 @@ def test_bytetrack_init(self, cfg_file):
 
     @parameterized.expand([
         ('ocsort/ocsort_yolox_x_8xb4-amp-80e_crowdhuman-mot17halftrain_'
-         'test-mot17halfval.py', ('cpu', 'cuda')),
+         'test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
             f'test_bytetrack_forward_loss_mode-{time.time()}')
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -56,6 +57,10 @@ def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            if device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_mm_inputs(2, [[3, 128, 128], [3, 125, 130]])
             data = model.data_preprocessor(packed_inputs, True)
@@ -64,7 +69,7 @@ def test_bytetrack_forward_loss_mode(self, cfg_file, devices):
 
     @parameterized.expand([
         ('ocsort/ocsort_yolox_x_8xb4-amp-80e_crowdhuman-mot17halftrain_'
-         'test-mot17halfval.py', ('cpu', 'cuda')),
+         'test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_bytetrack_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -72,7 +77,7 @@ def test_bytetrack_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
diff --git a/tests/test_models/test_mot/test_qdtrack.py b/tests/test_models/test_mot/test_qdtrack.py
index 714e022fdec..b14a83d6a4c 100644
--- a/tests/test_models/test_mot/test_qdtrack.py
+++ b/tests/test_models/test_mot/test_qdtrack.py
@@ -4,6 +4,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
@@ -31,14 +32,14 @@ def test_qdtrack_init(self, cfg_file):
 
     @parameterized.expand([
         ('qdtrack/qdtrack_faster-rcnn_r50_fpn_8xb2-4e_mot17'
-         'halftrain_test-mot17halfval.py', ('cpu', 'cuda')),
+         'halftrain_test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_qdtrack_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
             f'test_qdtrack_forward_loss_mode-{time.time()}')
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -49,6 +50,10 @@ def test_qdtrack_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_track_inputs(
                 batch_size=1,
@@ -64,7 +69,7 @@ def test_qdtrack_forward_loss_mode(self, cfg_file, devices):
 
     @parameterized.expand([
         ('qdtrack/qdtrack_faster-rcnn_r50_fpn_8xb2-4e_mot17'
-         'halftrain_test-mot17halfval.py', ('cpu', 'cuda')),
+         'halftrain_test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_qdtrack_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -72,7 +77,7 @@ def test_qdtrack_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -82,6 +87,10 @@ def test_qdtrack_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_track_inputs(
                 batch_size=1, num_frames=1, image_shapes=(3, 128, 128))
diff --git a/tests/test_models/test_mot/test_sort.py b/tests/test_models/test_mot/test_sort.py
index ec15a6bdde2..87a91180a26 100644
--- a/tests/test_models/test_mot/test_sort.py
+++ b/tests/test_models/test_mot/test_sort.py
@@ -4,6 +4,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
@@ -30,7 +31,7 @@ def test_init(self, cfg_file):
 
     @parameterized.expand([
         ('sort/sort_faster-rcnn_r50_fpn_8xb2-4e'
-         '_mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda')),
+         '_mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_deepsort_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -38,7 +39,7 @@ def test_deepsort_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -48,6 +49,10 @@ def test_deepsort_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_track_inputs(
                 batch_size=1,
diff --git a/tests/test_models/test_mot/test_strong_sort.py b/tests/test_models/test_mot/test_strong_sort.py
index e0d48a1dbf2..ea1202f0fee 100644
--- a/tests/test_models/test_mot/test_strong_sort.py
+++ b/tests/test_models/test_mot/test_strong_sort.py
@@ -4,6 +4,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
@@ -40,7 +41,7 @@ def test_init(self, cfg_file):
 
     @parameterized.expand([
         ('strongsort/strongsort_yolox_x_8xb4-80e_crowdhuman'
-         '-mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda')),
+         '-mot17halftrain_test-mot17halfval.py', ('cpu', 'cuda', 'musa')),
     ])
     def test_strongsort_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -48,7 +49,7 @@ def test_strongsort_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -68,6 +69,10 @@ def test_strongsort_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            if device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_track_inputs(
                 batch_size=1,
diff --git a/tests/test_models/test_necks/test_ct_resnet_neck.py b/tests/test_models/test_necks/test_ct_resnet_neck.py
index acc5258f0d2..35cd9d3a140 100644
--- a/tests/test_models/test_necks/test_ct_resnet_neck.py
+++ b/tests/test_models/test_necks/test_ct_resnet_neck.py
@@ -2,6 +2,7 @@
 import unittest
 
 import torch
+from mmengine.device.utils import is_musa_available
 
 from mmdet.models.necks import CTResNetNeck
 
@@ -51,3 +52,13 @@ def test_forward(self):
             feat = feat.cuda()
             out_feat = ct_resnet_neck([feat])[0]
             self.assertEqual(out_feat.shape, (1, num_filters[-1], 16, 16))
+        elif is_musa_available():
+            # test dcn
+            ct_resnet_neck = CTResNetNeck(
+                in_channels=in_channels,
+                num_deconv_filters=num_filters,
+                num_deconv_kernels=num_kernels)
+            ct_resnet_neck = ct_resnet_neck.musa()
+            feat = feat.musa()
+            out_feat = ct_resnet_neck([feat])[0]
+            self.assertEqual(out_feat.shape, (1, num_filters[-1], 16, 16))
diff --git a/tests/test_models/test_roi_heads/test_bbox_heads/test_double_bbox_head.py b/tests/test_models/test_roi_heads/test_bbox_heads/test_double_bbox_head.py
index 95aa02ee6bd..ca272a00783 100644
--- a/tests/test_models/test_roi_heads/test_bbox_heads/test_double_bbox_head.py
+++ b/tests/test_models/test_roi_heads/test_bbox_heads/test_double_bbox_head.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.models.roi_heads.bbox_heads import DoubleConvFCBBoxHead
@@ -10,12 +11,14 @@
 
 class TestDoubleBboxHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward_loss(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         double_bbox_head = DoubleConvFCBBoxHead(
             num_convs=4,
             num_fcs=2,
diff --git a/tests/test_models/test_roi_heads/test_cascade_roi_head.py b/tests/test_models/test_roi_heads/test_cascade_roi_head.py
index 5918b0067f1..6f89996e90c 100644
--- a/tests/test_models/test_roi_heads/test_cascade_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_cascade_roi_head.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.models.roi_heads import StandardRoIHead  # noqa
@@ -26,9 +27,9 @@ def test_init(self, cfg_file):
         ['cascade_rcnn/cascade-mask-rcnn_r50_fpn_1x_coco.py'])
     def test_cascade_roi_head_loss(self, cfg_file):
         """Tests standard roi head loss when truth is empty and non-empty."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
         s = 256
         img_metas = [{
             'img_shape': (s, s, 3),
@@ -36,45 +37,96 @@ def test_cascade_roi_head_loss(self, cfg_file):
         }]
         roi_head_cfg = get_roi_head_cfg(cfg_file)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 1, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
 
-        # When truth is non-empty then both cls, box, and mask loss
-        # should be nonzero for random inputs
-        img_shape_list = [(3, s, s) for _ in img_metas]
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
 
-        # When there is no truth, the cls loss should be nonzero but
-        # there should be no box and mask loss.
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss_cls' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
-            elif 'loss_bbox' in name or 'loss_mask' in name:
-                self.assertEqual(value.sum(), 0)
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
diff --git a/tests/test_models/test_roi_heads/test_dynamic_roi_head.py b/tests/test_models/test_roi_heads/test_dynamic_roi_head.py
index 8b4b44de699..cd6a35ebd56 100644
--- a/tests/test_models/test_roi_heads/test_dynamic_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_dynamic_roi_head.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
@@ -21,12 +22,15 @@ def test_init(self):
         roi_head = MODELS.build(self.roi_head_cfg)
         self.assertTrue(roi_head.with_bbox)
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_dynamic_roi_head_loss(self, device):
         """Tests trident roi head predict."""
         if not torch.cuda.is_available() and device == 'cuda':
             # RoI pooling only support in GPU
             return unittest.skip('test requires GPU and torch+cuda')
+        elif not is_musa_available() and device == 'musa':
+            # RoI pooling only support in GPU
+            return unittest.skip('test requires GPU and torch+musa')
         roi_head = MODELS.build(self.roi_head_cfg)
         roi_head = roi_head.to(device=device)
         s = 256
diff --git a/tests/test_models/test_roi_heads/test_grid_roi_head.py b/tests/test_models/test_roi_heads/test_grid_roi_head.py
index fc2988760c8..0a53bf3aa28 100644
--- a/tests/test_models/test_roi_heads/test_grid_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_grid_roi_head.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
@@ -21,12 +22,15 @@ def test_init(self):
         roi_head = MODELS.build(self.roi_head_cfg)
         self.assertTrue(roi_head.with_bbox)
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_grid_roi_head_loss(self, device):
         """Tests trident roi head predict."""
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
 
         roi_head = MODELS.build(self.roi_head_cfg)
         roi_head = roi_head.to(device=device)
@@ -71,12 +75,15 @@ def test_grid_roi_head_loss(self, device):
             'loss_grid', out,
             'grid loss should be passed when there are no true boxes')
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_grid_roi_head_predict(self, device):
         """Tests trident roi head predict."""
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
 
         roi_head = MODELS.build(self.roi_head_cfg)
         roi_head = roi_head.to(device=device)
@@ -99,12 +106,15 @@ def test_grid_roi_head_predict(self, device):
             image_shapes=image_shapes, num_proposals=100, device=device)
         roi_head.predict(feats, proposals_list, batch_data_samples)
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_grid_roi_head_forward(self, device):
         """Tests trident roi head forward."""
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
 
         roi_head = MODELS.build(self.roi_head_cfg)
         roi_head = roi_head.to(device=device)
diff --git a/tests/test_models/test_roi_heads/test_htc_roi_head.py b/tests/test_models/test_roi_heads/test_htc_roi_head.py
index 37bb92f5787..15db0b8c6b1 100644
--- a/tests/test_models/test_roi_heads/test_htc_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_htc_roi_head.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.models.roi_heads import HybridTaskCascadeRoIHead  # noqa
@@ -25,7 +26,7 @@ def test_init(self, cfg_file):
     @parameterized.expand(['htc/htc_r50_fpn_1x_coco.py'])
     def test_htc_roi_head_loss(self, cfg_file):
         """Tests htc roi head loss when truth is empty and non-empty."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
             return unittest.skip('test requires GPU and torch+cuda')
         s = 256
@@ -35,54 +36,106 @@ def test_htc_roi_head_loss(self, cfg_file):
         }]
         roi_head_cfg = get_roi_head_cfg(cfg_file)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
 
-        # When truth is non-empty then both cls, box, and mask loss
-        # should be nonzero for random inputs
-        img_shape_list = [(3, s, s) for _ in img_metas]
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            with_semantic=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='musa')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
 
-        # When there is no truth, the cls loss should be nonzero but
-        # there should be no box and mask loss.
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            with_semantic=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss_cls' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
-            elif 'loss_bbox' in name or 'loss_mask' in name:
-                self.assertEqual(value.sum(), 0)
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='musa')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
+        else:
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
 
     @parameterized.expand(['htc/htc_r50_fpn_1x_coco.py'])
     def test_htc_roi_head_predict(self, cfg_file):
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
             return unittest.skip('test requires GPU and torch+cuda')
         s = 256
@@ -92,23 +145,49 @@ def test_htc_roi_head_predict(self, cfg_file):
         }]
         roi_head_cfg = get_roi_head_cfg(cfg_file)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
+
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            results = roi_head.predict(
+                feats, proposal_list, batch_data_samples, rescale=True)
+            self.assertEqual(results[0].masks.shape[-2:], (s, s))
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
 
-        img_shape_list = [(3, s, s) for _ in img_metas]
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        results = roi_head.predict(
-            feats, proposal_list, batch_data_samples, rescale=True)
-        self.assertEqual(results[0].masks.shape[-2:], (s, s))
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            results = roi_head.predict(
+                feats, proposal_list, batch_data_samples, rescale=True)
+            self.assertEqual(results[0].masks.shape[-2:], (s, s))
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_coarse_mask_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_coarse_mask_head.py
index ffadc19ff2b..886e40c0b2b 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_coarse_mask_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_coarse_mask_head.py
@@ -1,6 +1,7 @@
 import unittest
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.models.roi_heads.mask_heads import CoarseMaskHead
@@ -15,12 +16,14 @@ def test_init(self):
         with self.assertRaises(AssertionError):
             CoarseMaskHead(downsample_factor=0.5)
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         x = torch.rand((1, 32, 7, 7)).to(device)
         mask_head = CoarseMaskHead(
             downsample_factor=2,
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_fcn_mask_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_fcn_mask_head.py
index e0b4ee9362b..91511c37206 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_fcn_mask_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_fcn_mask_head.py
@@ -4,6 +4,7 @@
 
 import torch
 from mmengine.config import ConfigDict
+from mmengine.device.utils import is_musa_available
 from mmengine.structures import InstanceData
 from parameterized import parameterized
 
@@ -12,11 +13,14 @@
 
 class TestFCNMaskHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_get_seg_masks(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         num_classes = 6
         mask_head = FCNMaskHead(
             num_convs=1,
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_feature_relay_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_feature_relay_head.py
index 4a182b842d9..b3c931b93b9 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_feature_relay_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_feature_relay_head.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 from torch import Tensor
 
@@ -11,11 +12,14 @@
 
 class TestFeatureRelayHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         mask_head = FeatureRelayHead(in_channels=10, out_conv_channels=10)
 
         x = torch.rand((1, 10))
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_fused_semantic_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_fused_semantic_head.py
index 7f912d797eb..fca49e43e03 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_fused_semantic_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_fused_semantic_head.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 from torch import Tensor
 
@@ -11,12 +12,14 @@
 
 class TestFusedSemanticHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward_loss(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         semantic_head = FusedSemanticHead(
             num_ins=5,
             fusion_level=1,
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_global_context_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_global_context_head.py
index 32e85093501..1a407149a3c 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_global_context_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_global_context_head.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 from torch import Tensor
 
@@ -11,12 +12,14 @@
 
 class TestGlobalContextHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward_loss(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         head = GlobalContextHead(
             num_convs=1, in_channels=4, conv_out_channels=4, num_classes=10)
         feats = [
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_grid_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_grid_head.py
index 7a583cd4e61..a2837954fc0 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_grid_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_grid_head.py
@@ -4,6 +4,7 @@
 
 import torch
 from mmengine.config import ConfigDict
+from mmengine.device.utils import is_musa_available
 from mmengine.structures import InstanceData
 from parameterized import parameterized
 
@@ -15,11 +16,14 @@
 
 class TestGridHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_grid_head_loss(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
 
         grid_head = GridHead()
         grid_head.to(device=device)
@@ -54,11 +58,14 @@ def test_grid_head_loss(self, device):
 
         grid_head.loss(grid_pred, sample_idx, sampling_results, train_cfg)
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_mask_iou_head_predict_by_feat(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
+        if device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
 
         grid_head = GridHead()
         grid_head.to(device=device)
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_htc_mask_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_htc_mask_head.py
index 6927e618da4..652f3f9e80c 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_htc_mask_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_htc_mask_head.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 from torch import Tensor
 
@@ -11,11 +12,14 @@
 
 class TestHTCMaskHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         num_classes = 6
         mask_head = HTCMaskHead(
             with_conv_res=True,
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_maskiou_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_maskiou_head.py
index 548147861d5..be3f7b81c34 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_maskiou_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_maskiou_head.py
@@ -4,6 +4,7 @@
 
 import torch
 from mmengine.config import ConfigDict
+from mmengine.device.utils import is_musa_available
 from mmengine.structures import InstanceData
 from parameterized import parameterized
 
@@ -16,12 +17,14 @@
 
 class TestMaskIoUHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_mask_iou_head_loss_and_target(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         mask_iou_head = MaskIoUHead(num_classes=4)
         mask_iou_head.to(device=device)
 
@@ -67,12 +70,14 @@ def test_mask_iou_head_loss_and_target(self, device):
                                       mask_targets, sampling_results,
                                       batch_gt_instances, train_cfg)
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_mask_iou_head_predict_by_feat(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         mask_iou_head = MaskIoUHead(num_classes=4)
         mask_iou_head.to(device=device)
 
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_mask_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_mask_head.py
index 4df9dc59e9b..252e66ea875 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_mask_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_mask_head.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 from torch import Tensor
 
@@ -11,11 +12,14 @@
 
 class TestSCNetMaskHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         num_classes = 6
         mask_head = SCNetMaskHead(
             conv_to_res=True,
diff --git a/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_semantic_head.py b/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_semantic_head.py
index 84f787bb7f4..492954fb042 100644
--- a/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_semantic_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_heads/test_scnet_semantic_head.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 from torch import Tensor
 
@@ -11,12 +12,14 @@
 
 class TestSCNetSemanticHead(TestCase):
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_forward_loss(self, device):
         if device == 'cuda':
             if not torch.cuda.is_available():
                 return unittest.skip('test requires GPU and torch+cuda')
-
+        elif device == 'musa':
+            if not is_musa_available():
+                return unittest.skip('test requires GPU and torch+musa')
         semantic_head = SCNetSemanticHead(
             num_ins=5,
             fusion_level=1,
diff --git a/tests/test_models/test_roi_heads/test_mask_scoring_roI_head.py b/tests/test_models/test_roi_heads/test_mask_scoring_roI_head.py
index 458eb302b00..6fb02cca77d 100644
--- a/tests/test_models/test_roi_heads/test_mask_scoring_roI_head.py
+++ b/tests/test_models/test_roi_heads/test_mask_scoring_roI_head.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
@@ -24,101 +25,193 @@ def test_init(self):
 
     def test_mask_scoring_roi_head_loss(self):
         """Tests trident roi head predict."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
         roi_head = MODELS.build(self.roi_head_cfg)
-        roi_head = roi_head.cuda()
-        s = 256
-        feats = []
-        for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-
-        image_shapes = [(3, s, s)]
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-
-        out = roi_head.loss(feats, proposals_list, batch_data_samples)
-        loss_cls = out['loss_cls']
-        loss_bbox = out['loss_bbox']
-        loss_mask = out['loss_mask']
-        self.assertGreater(loss_cls.sum(), 0, 'cls loss should be non-zero')
-        self.assertGreater(loss_bbox.sum(), 0, 'box loss should be non-zero')
-        self.assertGreater(loss_mask.sum(), 0, 'mask loss should be non-zero')
-
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-
-        out = roi_head.loss(feats, proposals_list, batch_data_samples)
-        empty_cls_loss = out['loss_cls']
-        empty_bbox_loss = out['loss_bbox']
-        empty_mask_loss = out['loss_mask']
-        self.assertGreater(empty_cls_loss.sum(), 0,
-                           'cls loss should be non-zero')
-        self.assertEqual(
-            empty_bbox_loss.sum(), 0,
-            'there should be no box loss when there are no true boxes')
-        self.assertEqual(
-            empty_mask_loss.sum(), 0,
-            'there should be no mask loss when there are no true boxes')
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='musa'))
+
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            loss_cls = out['loss_cls']
+            loss_bbox = out['loss_bbox']
+            loss_mask = out['loss_mask']
+            self.assertGreater(loss_cls.sum(), 0,
+                               'cls loss should be non-zero')
+            self.assertGreater(loss_bbox.sum(), 0,
+                               'box loss should be non-zero')
+            self.assertGreater(loss_mask.sum(), 0,
+                               'mask loss should be non-zero')
+
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            empty_cls_loss = out['loss_cls']
+            empty_bbox_loss = out['loss_bbox']
+            empty_mask_loss = out['loss_mask']
+            self.assertGreater(empty_cls_loss.sum(), 0,
+                               'cls loss should be non-zero')
+            self.assertEqual(
+                empty_bbox_loss.sum(), 0,
+                'there should be no box loss when there are no true boxes')
+            self.assertEqual(
+                empty_mask_loss.sum(), 0,
+                'there should be no mask loss when there are no true boxes')
+        else:
+            roi_head = roi_head.cuda()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='cuda'))
+
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            loss_cls = out['loss_cls']
+            loss_bbox = out['loss_bbox']
+            loss_mask = out['loss_mask']
+            self.assertGreater(loss_cls.sum(), 0,
+                               'cls loss should be non-zero')
+            self.assertGreater(loss_bbox.sum(), 0,
+                               'box loss should be non-zero')
+            self.assertGreater(loss_mask.sum(), 0,
+                               'mask loss should be non-zero')
+
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            empty_cls_loss = out['loss_cls']
+            empty_bbox_loss = out['loss_bbox']
+            empty_mask_loss = out['loss_mask']
+            self.assertGreater(empty_cls_loss.sum(), 0,
+                               'cls loss should be non-zero')
+            self.assertEqual(
+                empty_bbox_loss.sum(), 0,
+                'there should be no box loss when there are no true boxes')
+            self.assertEqual(
+                empty_mask_loss.sum(), 0,
+                'there should be no mask loss when there are no true boxes')
 
     def test_mask_scoring_roi_head_predict(self):
         """Tests trident roi head predict."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
         roi_head = MODELS.build(self.roi_head_cfg)
-        roi_head = roi_head.cuda()
-        s = 256
-        feats = []
-        for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-
-        image_shapes = [(3, s, s)]
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-        roi_head.predict(feats, proposals_list, batch_data_samples)
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='musa'))
+
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+            roi_head.predict(feats, proposals_list, batch_data_samples)
+        else:
+            roi_head = roi_head.cuda()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='cuda'))
+
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+            roi_head.predict(feats, proposals_list, batch_data_samples)
 
     def test_mask_scoring_roi_head_forward(self):
         """Tests trident roi head forward."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
         roi_head = MODELS.build(self.roi_head_cfg)
-        roi_head = roi_head.cuda()
-        s = 256
-        feats = []
-        for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-
-        image_shapes = [(3, s, s)]
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-        roi_head.forward(feats, proposals_list)
+        if is_musa_available():
+            roi_head = roi_head.cuda()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='cuda'))
+
+            image_shapes = [(3, s, s)]
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+            roi_head.forward(feats, proposals_list)
+        else:
+            roi_head = roi_head.cuda()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='cuda'))
+
+            image_shapes = [(3, s, s)]
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+            roi_head.forward(feats, proposals_list)
diff --git a/tests/test_models/test_roi_heads/test_multi_instance_roi_head.py b/tests/test_models/test_roi_heads/test_multi_instance_roi_head.py
index df7734c5a95..2aeddf4f77e 100644
--- a/tests/test_models/test_roi_heads/test_multi_instance_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_multi_instance_roi_head.py
@@ -4,6 +4,7 @@
 
 import torch
 from mmengine.config import Config
+from mmengine.device.utils import is_musa_available
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals
@@ -81,49 +82,91 @@ def test_init(self):
     def test_standard_roi_head_loss(self):
         """Tests multi instance roi head loss when truth is empty and non-
         empty."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
         s = 256
         roi_head_cfg = _fake_roi_head()
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 1, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
-
-        # When truth is non-empty then emd loss should be nonzero for
-        # random inputs
-        image_shapes = [(3, s, s)]
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[1],
-            num_classes=4,
-            with_mask=False,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-
-        out = roi_head.loss(feats, proposals_list, batch_data_samples)
-        loss = out['loss_rcnn_emd']
-        self.assertGreater(loss.sum(), 0, 'loss should be non-zero')
-
-        # When there is no truth, the emd loss should be zero.
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-        out = roi_head.loss(feats, proposals_list, batch_data_samples)
-        empty_loss = out['loss_rcnn_emd']
-        self.assertEqual(
-            empty_loss.sum(), 0,
-            'there should be no emd loss when there are no true boxes')
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then emd loss should be nonzero for
+            # random inputs
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[1],
+                num_classes=4,
+                with_mask=False,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            loss = out['loss_rcnn_emd']
+            self.assertGreater(loss.sum(), 0, 'loss should be non-zero')
+
+            # When there is no truth, the emd loss should be zero.
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            empty_loss = out['loss_rcnn_emd']
+            self.assertEqual(
+                empty_loss.sum(), 0,
+                'there should be no emd loss when there are no true boxes')
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then emd loss should be nonzero for
+            # random inputs
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[1],
+                num_classes=4,
+                with_mask=False,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            loss = out['loss_rcnn_emd']
+            self.assertGreater(loss.sum(), 0, 'loss should be non-zero')
+
+            # When there is no truth, the emd loss should be zero.
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            empty_loss = out['loss_rcnn_emd']
+            self.assertEqual(
+                empty_loss.sum(), 0,
+                'there should be no emd loss when there are no true boxes')
diff --git a/tests/test_models/test_roi_heads/test_pisa_roi_head.py b/tests/test_models/test_roi_heads/test_pisa_roi_head.py
index 5820c3977c8..fc1d48fe478 100644
--- a/tests/test_models/test_roi_heads/test_pisa_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_pisa_roi_head.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
@@ -21,12 +22,15 @@ def test_init(self):
         roi_head = MODELS.build(self.roi_head_cfg)
         self.assertTrue(roi_head.with_bbox)
 
-    @parameterized.expand(['cpu', 'cuda'])
+    @parameterized.expand(['cpu', 'cuda', 'musa'])
     def test_pisa_roi_head(self, device):
         """Tests trident roi head predict."""
         if not torch.cuda.is_available() and device == 'cuda':
             # RoI pooling only support in GPU
             return unittest.skip('test requires GPU and torch+cuda')
+        elif not is_musa_available() and device == 'musa':
+            # RoI pooling only support in GPU
+            return unittest.skip('test requires GPU and torch+musa')
         roi_head = MODELS.build(self.roi_head_cfg)
         roi_head = roi_head.to(device=device)
         s = 256
diff --git a/tests/test_models/test_roi_heads/test_point_rend_roi_head.py b/tests/test_models/test_roi_heads/test_point_rend_roi_head.py
index dce8bf498ea..410edcf58db 100644
--- a/tests/test_models/test_roi_heads/test_point_rend_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_point_rend_roi_head.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.models.roi_heads import PointRendRoIHead  # noqa
@@ -26,7 +27,7 @@ def test_init(self, cfg_file):
         ['point_rend/point-rend_r50-caffe_fpn_ms-1x_coco.py'])
     def test_point_rend_roi_head_loss(self, cfg_file):
         """Tests htc roi head loss when truth is empty and non-empty."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and is_musa_available():
             # RoI pooling only support in GPU
             return unittest.skip('test requires GPU and torch+cuda')
         s = 256
@@ -36,42 +37,86 @@ def test_point_rend_roi_head_loss(self, cfg_file):
         }]
         roi_head_cfg = get_roi_head_cfg(cfg_file)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
 
-        # When truth is non-empty then both cls, box, and mask loss
-        # should be nonzero for random inputs
-        img_shape_list = [img_meta['img_shape'] for img_meta in img_metas]
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [img_meta['img_shape'] for img_meta in img_metas]
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
 
-        # Positive rois must not be empty
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        with self.assertRaises(AssertionError):
+            # Positive rois must not be empty
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            with self.assertRaises(AssertionError):
+                out = roi_head.loss(feats, proposal_list, batch_data_samples)
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [img_meta['img_shape'] for img_meta in img_metas]
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
             out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+
+            # Positive rois must not be empty
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            with self.assertRaises(AssertionError):
+                out = roi_head.loss(feats, proposal_list, batch_data_samples)
 
     @parameterized.expand(
         ['point_rend/point-rend_r50-caffe_fpn_ms-1x_coco.py'])
diff --git a/tests/test_models/test_roi_heads/test_scnet_roi_head.py b/tests/test_models/test_roi_heads/test_scnet_roi_head.py
index 9f14530ba7b..b133caaf673 100644
--- a/tests/test_models/test_roi_heads/test_scnet_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_scnet_roi_head.py
@@ -3,6 +3,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.models.roi_heads import SCNetRoIHead  # noqa
@@ -27,7 +28,7 @@ def test_init(self, cfg_file):
     @parameterized.expand(['scnet/scnet_r50_fpn_1x_coco.py'])
     def test_scnet_roi_head_loss(self, cfg_file):
         """Tests htc roi head loss when truth is empty and non-empty."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and is_musa_available():
             # RoI pooling only support in GPU
             return unittest.skip('test requires GPU and torch+cuda')
         s = 256
@@ -37,54 +38,107 @@ def test_scnet_roi_head_loss(self, cfg_file):
         }]
         roi_head_cfg = get_roi_head_cfg(cfg_file)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
+        if is_musa_available():
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
 
-        # When truth is non-empty then both cls, box, and mask loss
-        # should be nonzero for random inputs
-        img_shape_list = [(3, s, s) for _ in img_metas]
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            with_semantic=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
 
-        # When there is no truth, the cls loss should be nonzero but
-        # there should be no box and mask loss.
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            with_semantic=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss_cls' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
-            elif 'loss_bbox' in name or 'loss_mask' in name:
-                self.assertEqual(value.sum(), 0)
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
+
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                with_semantic=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
 
     @parameterized.expand(['scnet/scnet_r50_fpn_1x_coco.py'])
     def test_scnet_roi_head_predict(self, cfg_file):
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and is_musa_available():
             # RoI pooling only support in GPU
             return unittest.skip('test requires GPU and torch+cuda')
         s = 256
@@ -94,23 +148,49 @@ def test_scnet_roi_head_predict(self, cfg_file):
         }]
         roi_head_cfg = get_roi_head_cfg(cfg_file)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 256, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
+
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            results = roi_head.predict(
+                feats, proposal_list, batch_data_samples, rescale=True)
+            self.assertEqual(results[0].masks.shape[-2:], (s, s))
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 256, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
 
-        img_shape_list = [(3, s, s) for _ in img_metas]
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        results = roi_head.predict(
-            feats, proposal_list, batch_data_samples, rescale=True)
-        self.assertEqual(results[0].masks.shape[-2:], (s, s))
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            results = roi_head.predict(
+                feats, proposal_list, batch_data_samples, rescale=True)
+            self.assertEqual(results[0].masks.shape[-2:], (s, s))
diff --git a/tests/test_models/test_roi_heads/test_sparse_roi_head.py b/tests/test_models/test_roi_heads/test_sparse_roi_head.py
index 1182786c0e0..132e44ddcbb 100644
--- a/tests/test_models/test_roi_heads/test_sparse_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_sparse_roi_head.py
@@ -4,6 +4,7 @@
 
 import torch
 import torch.nn as nn
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.models.roi_heads import StandardRoIHead  # noqa
@@ -26,9 +27,9 @@ def test_init(self, cfg_file):
     @parameterized.expand(['queryinst/queryinst_r50_fpn_1x_coco.py'])
     def test_cascade_roi_head_loss(self, cfg_file):
         """Tests standard roi head loss when truth is empty and non-empty."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
         s = 256
         img_metas = [{
             'img_shape': (s, s, 3),
@@ -36,57 +37,124 @@ def test_cascade_roi_head_loss(self, cfg_file):
         }]
         roi_head_cfg = get_roi_head_cfg(cfg_file)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 1, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
 
-        # When truth is non-empty then both cls, box, and mask loss
-        # should be nonzero for random inputs
-        img_shape_list = [(3, s, s) for _ in img_metas]
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        # add import elements into proposal
-        init_proposal_features = nn.Embedding(100, 256).cuda().weight.clone()
-        for proposal in proposal_list:
-            proposal.features = init_proposal_features
-            proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
-                                                       s]]).repeat(100, 1)
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
+            # add import elements into proposal
+            init_proposal_features = nn.Embedding(100,
+                                                  256).musa().weight.clone()
+            for proposal in proposal_list:
+                proposal.features = init_proposal_features
+                proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
+                                                           s]]).repeat(100, 1)
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
 
-        # When there is no truth, the cls loss should be nonzero but
-        # there should be no box and mask loss.
-        proposal_list = demo_mm_proposals(img_shape_list, 100, device='cuda')
-        # add import elements into proposal
-        init_proposal_features = nn.Embedding(100, 256).cuda().weight.clone()
-        for proposal in proposal_list:
-            proposal.features = init_proposal_features
-            proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
-                                                       s]]).repeat(100, 1)
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=[(3, s, s)],
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        out = roi_head.loss(feats, proposal_list, batch_data_samples)
-        for name, value in out.items():
-            if 'loss_cls' in name:
-                self.assertGreaterEqual(
-                    value.sum(), 0, msg='loss should be non-zero')
-            elif 'loss_bbox' in name or 'loss_mask' in name:
-                self.assertEqual(value.sum(), 0)
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='musa')
+            # add import elements into proposal
+            init_proposal_features = nn.Embedding(100,
+                                                  256).musa().weight.clone()
+            for proposal in proposal_list:
+                proposal.features = init_proposal_features
+                proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
+                                                           s]]).repeat(100, 1)
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
+
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(
+                    len(roi_head_cfg.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            img_shape_list = [(3, s, s) for _ in img_metas]
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
+            # add import elements into proposal
+            init_proposal_features = nn.Embedding(100,
+                                                  256).cuda().weight.clone()
+            for proposal in proposal_list:
+                proposal.features = init_proposal_features
+                proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
+                                                           s]]).repeat(100, 1)
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            proposal_list = demo_mm_proposals(
+                img_shape_list, 100, device='cuda')
+            # add import elements into proposal
+            init_proposal_features = nn.Embedding(100,
+                                                  256).cuda().weight.clone()
+            for proposal in proposal_list:
+                proposal.features = init_proposal_features
+                proposal.imgs_whwh = feats[0].new_tensor([[s, s, s,
+                                                           s]]).repeat(100, 1)
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=[(3, s, s)],
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            out = roi_head.loss(feats, proposal_list, batch_data_samples)
+            for name, value in out.items():
+                if 'loss_cls' in name:
+                    self.assertGreaterEqual(
+                        value.sum(), 0, msg='loss should be non-zero')
+                elif 'loss_bbox' in name or 'loss_mask' in name:
+                    self.assertEqual(value.sum(), 0)
diff --git a/tests/test_models/test_roi_heads/test_standard_roi_head.py b/tests/test_models/test_roi_heads/test_standard_roi_head.py
index 5ae95e28440..6bd7db1f719 100644
--- a/tests/test_models/test_roi_heads/test_standard_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_standard_roi_head.py
@@ -4,6 +4,7 @@
 
 import torch
 from mmengine.config import Config
+from mmengine.device.utils import is_musa_available
 from parameterized import parameterized
 
 from mmdet.registry import MODELS
@@ -141,66 +142,132 @@ def test_init(self):
     @parameterized.expand([(False, ), (True, )])
     def test_standard_roi_head_loss(self, with_shared_head):
         """Tests standard roi head loss when truth is empty and non-empty."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
         s = 256
         roi_head_cfg = _fake_roi_head(with_shared_head=with_shared_head)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        feats = []
-        for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
-            if not with_shared_head:
-                feats.append(
-                    torch.rand(1, 1, s // (2**(i + 2)),
-                               s // (2**(i + 2))).to(device='cuda'))
-            else:
-                feats.append(
-                    torch.rand(1, 1024, s // (2**(i + 2)),
-                               s // (2**(i + 2))).to(device='cuda'))
-        feats = tuple(feats)
-
-        # When truth is non-empty then both cls, box, and mask loss
-        # should be nonzero for random inputs
-        image_shapes = [(3, s, s)]
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[1],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-
-        out = roi_head.loss(feats, proposals_list, batch_data_samples)
-        loss_cls = out['loss_cls']
-        loss_bbox = out['loss_bbox']
-        loss_mask = out['loss_mask']
-        self.assertGreater(loss_cls.sum(), 0, 'cls loss should be non-zero')
-        self.assertGreater(loss_bbox.sum(), 0, 'box loss should be non-zero')
-        self.assertGreater(loss_mask.sum(), 0, 'mask loss should be non-zero')
-
-        # When there is no truth, the cls loss should be nonzero but
-        # there should be no box and mask loss.
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-        out = roi_head.loss(feats, proposals_list, batch_data_samples)
-        empty_cls_loss = out['loss_cls']
-        empty_bbox_loss = out['loss_bbox']
-        empty_mask_loss = out['loss_mask']
-        self.assertGreater(empty_cls_loss.sum(), 0,
-                           'cls loss should be non-zero')
-        self.assertEqual(
-            empty_bbox_loss.sum(), 0,
-            'there should be no box loss when there are no true boxes')
-        self.assertEqual(
-            empty_mask_loss.sum(), 0,
-            'there should be no mask loss when there are no true boxes')
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                if not with_shared_head:
+                    feats.append(
+                        torch.rand(1, 1, s // (2**(i + 2)),
+                                   s // (2**(i + 2))).to(device='musa'))
+                else:
+                    feats.append(
+                        torch.rand(1, 1024, s // (2**(i + 2)),
+                                   s // (2**(i + 2))).to(device='musa'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            loss_cls = out['loss_cls']
+            loss_bbox = out['loss_bbox']
+            loss_mask = out['loss_mask']
+            self.assertGreater(loss_cls.sum(), 0,
+                               'cls loss should be non-zero')
+            self.assertGreater(loss_bbox.sum(), 0,
+                               'box loss should be non-zero')
+            self.assertGreater(loss_mask.sum(), 0,
+                               'mask loss should be non-zero')
+
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            empty_cls_loss = out['loss_cls']
+            empty_bbox_loss = out['loss_bbox']
+            empty_mask_loss = out['loss_mask']
+            self.assertGreater(empty_cls_loss.sum(), 0,
+                               'cls loss should be non-zero')
+            self.assertEqual(
+                empty_bbox_loss.sum(), 0,
+                'there should be no box loss when there are no true boxes')
+            self.assertEqual(
+                empty_mask_loss.sum(), 0,
+                'there should be no mask loss when there are no true boxes')
+
+        else:
+            roi_head = roi_head.cuda()
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                if not with_shared_head:
+                    feats.append(
+                        torch.rand(1, 1, s // (2**(i + 2)),
+                                   s // (2**(i + 2))).to(device='cuda'))
+                else:
+                    feats.append(
+                        torch.rand(1, 1024, s // (2**(i + 2)),
+                                   s // (2**(i + 2))).to(device='cuda'))
+            feats = tuple(feats)
+
+            # When truth is non-empty then both cls, box, and mask loss
+            # should be nonzero for random inputs
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[1],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            loss_cls = out['loss_cls']
+            loss_bbox = out['loss_bbox']
+            loss_mask = out['loss_mask']
+            self.assertGreater(loss_cls.sum(), 0,
+                               'cls loss should be non-zero')
+            self.assertGreater(loss_bbox.sum(), 0,
+                               'box loss should be non-zero')
+            self.assertGreater(loss_mask.sum(), 0,
+                               'mask loss should be non-zero')
+
+            # When there is no truth, the cls loss should be nonzero but
+            # there should be no box and mask loss.
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+            out = roi_head.loss(feats, proposals_list, batch_data_samples)
+            empty_cls_loss = out['loss_cls']
+            empty_bbox_loss = out['loss_bbox']
+            empty_mask_loss = out['loss_mask']
+            self.assertGreater(empty_cls_loss.sum(), 0,
+                               'cls loss should be non-zero')
+            self.assertEqual(
+                empty_bbox_loss.sum(), 0,
+                'there should be no box loss when there are no true boxes')
+            self.assertEqual(
+                empty_mask_loss.sum(), 0,
+                'there should be no mask loss when there are no true boxes')
diff --git a/tests/test_models/test_roi_heads/test_trident_roi_head.py b/tests/test_models/test_roi_heads/test_trident_roi_head.py
index a173b01066c..2759ff98476 100644
--- a/tests/test_models/test_roi_heads/test_trident_roi_head.py
+++ b/tests/test_models/test_roi_heads/test_trident_roi_head.py
@@ -4,6 +4,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 
 from mmdet.registry import MODELS
 from mmdet.testing import demo_mm_inputs, demo_mm_proposals, get_roi_head_cfg
@@ -25,34 +26,62 @@ def test_init(self):
 
     def test_trident_roi_head_predict(self):
         """Tests trident roi head predict."""
-        if not torch.cuda.is_available():
+        if not torch.cuda.is_available() and not is_musa_available():
             # RoI pooling only support in GPU
-            return unittest.skip('test requires GPU and torch+cuda')
+            return unittest.skip('test requires GPU and torch+cuda+musa')
 
         roi_head_cfg = copy.deepcopy(self.roi_head_cfg)
         roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        s = 256
-        feats = []
-        for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
-            feats.append(
-                torch.rand(1, 1024, s // (2**(i + 2)),
-                           s // (2**(i + 2))).to(device='cuda'))
-
-        image_shapes = [(3, s, s)]
-        batch_data_samples = demo_mm_inputs(
-            batch_size=1,
-            image_shapes=image_shapes,
-            num_items=[0],
-            num_classes=4,
-            with_mask=True,
-            device='cuda')['data_samples']
-        proposals_list = demo_mm_proposals(
-            image_shapes=image_shapes, num_proposals=100, device='cuda')
-        # When `test_branch_idx == 1`
-        roi_head.predict(feats, proposals_list, batch_data_samples)
-        # When `test_branch_idx == -1`
-        roi_head_cfg.test_branch_idx = -1
-        roi_head = MODELS.build(roi_head_cfg)
-        roi_head = roi_head.cuda()
-        roi_head.predict(feats, proposals_list, batch_data_samples)
+        if is_musa_available():
+            roi_head = roi_head.musa()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1024, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='musa'))
+
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='musa')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='musa')
+            # When `test_branch_idx == 1`
+            roi_head.predict(feats, proposals_list, batch_data_samples)
+            # When `test_branch_idx == -1`
+            roi_head_cfg.test_branch_idx = -1
+            roi_head = MODELS.build(roi_head_cfg)
+            roi_head = roi_head.musa()
+            roi_head.predict(feats, proposals_list, batch_data_samples)
+
+        else:
+            roi_head = roi_head.cuda()
+            s = 256
+            feats = []
+            for i in range(len(roi_head.bbox_roi_extractor.featmap_strides)):
+                feats.append(
+                    torch.rand(1, 1024, s // (2**(i + 2)),
+                               s // (2**(i + 2))).to(device='cuda'))
+
+            image_shapes = [(3, s, s)]
+            batch_data_samples = demo_mm_inputs(
+                batch_size=1,
+                image_shapes=image_shapes,
+                num_items=[0],
+                num_classes=4,
+                with_mask=True,
+                device='cuda')['data_samples']
+            proposals_list = demo_mm_proposals(
+                image_shapes=image_shapes, num_proposals=100, device='cuda')
+            # When `test_branch_idx == 1`
+            roi_head.predict(feats, proposals_list, batch_data_samples)
+            # When `test_branch_idx == -1`
+            roi_head_cfg.test_branch_idx = -1
+            roi_head = MODELS.build(roi_head_cfg)
+            roi_head = roi_head.cuda()
+            roi_head.predict(feats, proposals_list, batch_data_samples)
diff --git a/tests/test_models/test_task_modules/test_prior_generators/test_anchor_generator.py b/tests/test_models/test_task_modules/test_prior_generators/test_anchor_generator.py
index db0b60717bc..6a42e525354 100644
--- a/tests/test_models/test_task_modules/test_prior_generators/test_anchor_generator.py
+++ b/tests/test_models/test_task_modules/test_prior_generators/test_anchor_generator.py
@@ -7,6 +7,7 @@
 """
 import pytest
 import torch
+from mmengine.device.utils import is_musa_available
 
 
 def test_standard_points_generator():
@@ -102,6 +103,49 @@ def test_standard_points_generator():
 
         assert (priors_half_offset[0][0] - priors[0][0]).sum() == 4 * 0.5 * 2
         assert (priors_half_offset[1][0] - priors[1][0]).sum() == 10 * 0.5 * 2
+    elif is_musa_available():
+        anchor_generator_cfg = dict(
+            type='MlvlPointGenerator', strides=[4, 8], offset=0)
+        anchor_generator = build_prior_generator(anchor_generator_cfg)
+        assert anchor_generator is not None
+        # Square strides
+        mlvl_points = MlvlPointGenerator(strides=[4, 10], offset=0)
+        mlvl_points_half_stride_generator = MlvlPointGenerator(
+            strides=[4, 10], offset=0.5)
+        assert mlvl_points.num_levels == 2
+
+        # assert self.num_levels == len(featmap_sizes)
+        with pytest.raises(AssertionError):
+            mlvl_points.grid_priors(featmap_sizes=[(2, 2)], device='musa')
+        priors = mlvl_points.grid_priors(
+            featmap_sizes=[(2, 2), (4, 8)], device='musa')
+        priors_with_stride = mlvl_points.grid_priors(
+            featmap_sizes=[(2, 2), (4, 8)], with_stride=True, device='musa')
+        assert len(priors) == 2
+
+        # assert last dimension is (coord_x, coord_y, stride_w, stride_h).
+        assert priors_with_stride[0].size(1) == 4
+        assert priors_with_stride[0][0][2] == 4
+        assert priors_with_stride[0][0][3] == 4
+        assert priors_with_stride[1][0][2] == 10
+        assert priors_with_stride[1][0][3] == 10
+
+        stride_4_feat_2_2 = priors[0]
+        assert (stride_4_feat_2_2[1] - stride_4_feat_2_2[0]).sum() == 4
+        assert stride_4_feat_2_2.size(0) == 4
+        assert stride_4_feat_2_2.size(1) == 2
+
+        stride_10_feat_4_8 = priors[1]
+        assert (stride_10_feat_4_8[1] - stride_10_feat_4_8[0]).sum() == 10
+        assert stride_10_feat_4_8.size(0) == 4 * 8
+        assert stride_10_feat_4_8.size(1) == 2
+
+        # assert the offset of 0.5 * stride
+        priors_half_offset = mlvl_points_half_stride_generator.grid_priors(
+            featmap_sizes=[(2, 2), (4, 8)], device='musa')
+
+        assert (priors_half_offset[0][0] - priors[0][0]).sum() == 4 * 0.5 * 2
+        assert (priors_half_offset[1][0] - priors[1][0]).sum() == 10 * 0.5 * 2
 
 
 def test_sparse_prior():
@@ -118,7 +162,7 @@ def test_sparse_prior():
         level_idx=0,
         device='cpu')
 
-    assert not sparse_prior.is_cuda
+    assert not sparse_prior.is_cuda and not sparse_prior.is_musa
     assert (sparse_prior == grid_anchors[0][prior_indexs]).all()
     sparse_prior = mlvl_points.sparse_priors(
         prior_idxs=prior_indexs,
@@ -270,6 +314,91 @@ def test_sparse_prior():
                 featmap_size=featmap_sizes[i],
                 device='cuda')
             assert (sparse_yolo_anchors == yolo_anchors[i][prior_indexs]).all()
+    elif is_musa_available():
+        mlvl_points = MlvlPointGenerator(strides=[4, 10], offset=0)
+        prior_indexs = torch.Tensor([0, 3, 4, 5, 6, 7, 1, 2, 4, 5, 6,
+                                     9]).long().musa()
+
+        featmap_sizes = [(6, 8), (6, 4)]
+        grid_anchors = mlvl_points.grid_priors(
+            featmap_sizes=featmap_sizes, with_stride=False, device='musa')
+        sparse_prior = mlvl_points.sparse_priors(
+            prior_idxs=prior_indexs,
+            featmap_size=featmap_sizes[0],
+            level_idx=0,
+            device='musa')
+        assert (sparse_prior == grid_anchors[0][prior_indexs]).all()
+        sparse_prior = mlvl_points.sparse_priors(
+            prior_idxs=prior_indexs,
+            featmap_size=featmap_sizes[1],
+            level_idx=1,
+            device='musa')
+        assert (sparse_prior == grid_anchors[1][prior_indexs]).all()
+        assert sparse_prior.is_musa
+        mlvl_anchors = AnchorGenerator(
+            strides=[16, 32],
+            ratios=[1., 2.5],
+            scales=[1., 5.],
+            base_sizes=[4, 8])
+        prior_indexs = torch.Tensor([4, 5, 6, 7, 0, 2, 50, 4, 5, 6,
+                                     9]).long().to('musa')
+
+        featmap_sizes = [(13, 5), (16, 4)]
+        grid_anchors = mlvl_anchors.grid_priors(
+            featmap_sizes=featmap_sizes, device='musa')
+        sparse_prior = mlvl_anchors.sparse_priors(
+            prior_idxs=prior_indexs,
+            featmap_size=featmap_sizes[0],
+            level_idx=0,
+            device='musa')
+        assert (sparse_prior == grid_anchors[0][prior_indexs]).all()
+        sparse_prior = mlvl_anchors.sparse_priors(
+            prior_idxs=prior_indexs,
+            featmap_size=featmap_sizes[1],
+            level_idx=1,
+            device='musa')
+        assert (sparse_prior == grid_anchors[1][prior_indexs]).all()
+
+        # for ssd
+        from mmdet.models.task_modules.prior_generators import \
+            SSDAnchorGenerator
+        featmap_sizes = [(38, 38), (19, 19), (10, 10)]
+        anchor_generator = SSDAnchorGenerator(
+            scale_major=False,
+            input_size=300,
+            basesize_ratio_range=(0.15, 0.9),
+            strides=[8, 16, 32],
+            ratios=[[2], [2, 3], [2, 3]])
+        ssd_anchors = anchor_generator.grid_anchors(
+            featmap_sizes, device='musa')
+        for i in range(len(featmap_sizes)):
+            sparse_ssd_anchors = anchor_generator.sparse_priors(
+                prior_idxs=prior_indexs,
+                level_idx=i,
+                featmap_size=featmap_sizes[i],
+                device='musa')
+            assert (sparse_ssd_anchors == ssd_anchors[i][prior_indexs]).all()
+
+        # for yolo
+        from mmdet.models.task_modules.prior_generators import \
+            YOLOAnchorGenerator
+        featmap_sizes = [(38, 38), (19, 19), (10, 10)]
+        anchor_generator = YOLOAnchorGenerator(
+            strides=[32, 16, 8],
+            base_sizes=[
+                [(116, 90), (156, 198), (373, 326)],
+                [(30, 61), (62, 45), (59, 119)],
+                [(10, 13), (16, 30), (33, 23)],
+            ])
+        yolo_anchors = anchor_generator.grid_anchors(
+            featmap_sizes, device='musa')
+        for i in range(len(featmap_sizes)):
+            sparse_yolo_anchors = anchor_generator.sparse_priors(
+                prior_idxs=prior_indexs,
+                level_idx=i,
+                featmap_size=featmap_sizes[i],
+                device='musa')
+            assert (sparse_yolo_anchors == yolo_anchors[i][prior_indexs]).all()
 
 
 def test_standard_anchor_generator():
@@ -313,6 +442,8 @@ def test_ssd_anchor_generator():
     from mmdet.models.task_modules import build_anchor_generator
     if torch.cuda.is_available():
         device = 'cuda'
+    elif is_musa_available():
+        device = 'musa'
     else:
         device = 'cpu'
 
@@ -475,6 +606,8 @@ def test_anchor_generator_with_tuples():
     from mmdet.models.task_modules import build_anchor_generator
     if torch.cuda.is_available():
         device = 'cuda'
+    elif is_musa_available():
+        device = 'musa'
     else:
         device = 'cpu'
 
@@ -510,6 +643,8 @@ def test_yolo_anchor_generator():
     from mmdet.models.task_modules import build_anchor_generator
     if torch.cuda.is_available():
         device = 'cuda'
+    elif is_musa_available():
+        device = 'musa'
     else:
         device = 'cpu'
 
@@ -553,6 +688,8 @@ def test_retina_anchor():
     from mmdet.registry import MODELS
     if torch.cuda.is_available():
         device = 'cuda'
+    elif is_musa_available():
+        device = 'musa'
     else:
         device = 'cpu'
 
@@ -652,6 +789,8 @@ def test_guided_anchor():
     from mmdet.registry import MODELS
     if torch.cuda.is_available():
         device = 'cuda'
+    elif is_musa_available():
+        device = 'musa'
     else:
         device = 'cpu'
     # head configs modified from
diff --git a/tests/test_models/test_vis/test_mask2former.py b/tests/test_models/test_vis/test_mask2former.py
index c8d3474e9ca..adc2160ccb4 100644
--- a/tests/test_models/test_vis/test_mask2former.py
+++ b/tests/test_models/test_vis/test_mask2former.py
@@ -4,6 +4,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
@@ -30,14 +31,14 @@ def test_mask2former_init(self, cfg_file):
 
     @parameterized.expand([
         ('mask2former_vis/mask2former_r50_8xb2-8e_youtubevis2021.py',
-         ('cpu', 'cuda')),
+         ('cpu', 'cuda', 'musa')),
     ])
     def test_mask2former_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
             f'test_mask2former_forward_loss_mode-{time.time()}')
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -48,6 +49,10 @@ def test_mask2former_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_track_inputs(
                 batch_size=1,
@@ -63,7 +68,7 @@ def test_mask2former_forward_loss_mode(self, cfg_file, devices):
 
     @parameterized.expand([
         ('mask2former_vis/mask2former_r50_8xb2-8e_youtubevis2021.py',
-         ('cpu', 'cuda')),
+         ('cpu', 'cuda', 'musa')),
     ])
     def test_mask2former_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -71,7 +76,7 @@ def test_mask2former_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -81,6 +86,10 @@ def test_mask2former_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
 
             packed_inputs = demo_track_inputs(
                 batch_size=1,
diff --git a/tests/test_models/test_vis/test_masktrack_rcnn.py b/tests/test_models/test_vis/test_masktrack_rcnn.py
index fb94391f4d1..67fd95da48a 100644
--- a/tests/test_models/test_vis/test_masktrack_rcnn.py
+++ b/tests/test_models/test_vis/test_masktrack_rcnn.py
@@ -4,6 +4,7 @@
 from unittest import TestCase
 
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.logging import MessageHub
 from mmengine.registry import init_default_scope
 from parameterized import parameterized
@@ -32,14 +33,14 @@ def test_mask_track_rcnn_init(self, cfg_file):
     @parameterized.expand([
         (
             'masktrack_rcnn/masktrack-rcnn_mask-rcnn_r50_fpn_8xb1-12e_youtubevis2019.py',  # noqa: E501
-            ('cpu', 'cuda')),
+            ('cpu', 'cuda', 'musa')),
     ])
     def test_mask_track_rcnn_forward_loss_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
             f'test_mask_track_rcnn_forward_loss_mode-{time.time()}')
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -50,7 +51,10 @@ def test_mask_track_rcnn_forward_loss_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
-
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
             packed_inputs = demo_track_inputs(
                 batch_size=1,
                 num_frames=2,
@@ -66,7 +70,7 @@ def test_mask_track_rcnn_forward_loss_mode(self, cfg_file, devices):
     @parameterized.expand([
         (
             'masktrack_rcnn/masktrack-rcnn_mask-rcnn_r50_fpn_8xb1-12e_youtubevis2019.py',  # noqa: E501
-            ('cpu', 'cuda')),
+            ('cpu', 'cuda', 'musa')),
     ])
     def test_mask_track_rcnn_forward_predict_mode(self, cfg_file, devices):
         message_hub = MessageHub.get_instance(
@@ -74,7 +78,7 @@ def test_mask_track_rcnn_forward_predict_mode(self, cfg_file, devices):
         message_hub.update_info('iter', 0)
         message_hub.update_info('epoch', 0)
 
-        assert all([device in ['cpu', 'cuda'] for device in devices])
+        assert all([device in ['cpu', 'cuda', 'musa'] for device in devices])
 
         for device in devices:
             _model = get_detector_cfg(cfg_file)
@@ -84,7 +88,10 @@ def test_mask_track_rcnn_forward_predict_mode(self, cfg_file, devices):
                 if not torch.cuda.is_available():
                     return unittest.skip('test requires GPU and torch+cuda')
                 model = model.cuda()
-
+            elif device == 'musa':
+                if not is_musa_available():
+                    return unittest.skip('test requires GPU and torch+musa')
+                model = model.musa()
             packed_inputs = demo_track_inputs(
                 batch_size=1,
                 num_frames=1,
diff --git a/tests/test_structures/test_bbox/test_base_boxes.py b/tests/test_structures/test_bbox/test_base_boxes.py
index 651eeefe120..79127e00c99 100644
--- a/tests/test_structures/test_bbox/test_base_boxes.py
+++ b/tests/test_structures/test_bbox/test_base_boxes.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import torch
+from mmengine.device.utils import is_musa_available
 from mmengine.testing import assert_allclose
 
 from .utils import ToyBaseBoxes
@@ -19,7 +20,9 @@ def test_init(self):
         if torch.cuda.is_available():
             boxes = ToyBaseBoxes(box_tensor, device='cuda')
             self.assertTrue(boxes.tensor.is_cuda)
-
+        elif is_musa_available():
+            boxes = ToyBaseBoxes(box_tensor, device='musa')
+            self.assertTrue(boxes.tensor.is_musa)
         with self.assertRaises(AssertionError):
             box_tensor = torch.rand((4, ))
             boxes = ToyBaseBoxes(box_tensor)
@@ -147,15 +150,25 @@ def test_tensor_like_functions(self):
         if torch.cuda.is_available():
             new_boxes = boxes.to(device='cuda')
             self.assertTrue(new_boxes.tensor.is_cuda)
+        elif is_musa_available():
+            new_boxes = boxes.to(device='musa')
+            self.assertTrue(new_boxes.tensor.is_musa)
         # cpu
         if torch.cuda.is_available():
             new_boxes = boxes.to(device='cuda')
             new_boxes = new_boxes.cpu()
             self.assertFalse(new_boxes.tensor.is_cuda)
+        elif is_musa_available():
+            new_boxes = boxes.to(device='musa')
+            new_boxes = new_boxes.cpu()
+            self.assertFalse(new_boxes.tensor.is_musa)
         # cuda
         if torch.cuda.is_available():
             new_boxes = boxes.cuda()
             self.assertTrue(new_boxes.tensor.is_cuda)
+        elif is_musa_available():
+            new_boxes = boxes.to('musa')
+            self.assertTrue(new_boxes.tensor.is_musa)
         # clone
         boxes.clone()
         # detach
@@ -274,3 +287,6 @@ def test_misc(self):
         if torch.cuda.is_available():
             new_boxes = boxes.fake_boxes((3, 4, 4), device='cuda')
             self.assertTrue(new_boxes.tensor.is_cuda)
+        if is_musa_available():
+            new_boxes = boxes.fake_boxes((3, 4, 4), device='musa')
+            self.assertTrue(new_boxes.tensor.is_musa)
diff --git a/tests/test_utils/test_benchmark.py b/tests/test_utils/test_benchmark.py
index 939a7eca4e5..08611223652 100644
--- a/tests/test_utils/test_benchmark.py
+++ b/tests/test_utils/test_benchmark.py
@@ -83,8 +83,9 @@ def setUp(self) -> None:
         self.max_iter = 10
         self.log_interval = 5
 
-    @unittest.skipIf(not torch.cuda.is_available(),
-                     'test requires GPU and torch+cuda')
+    @unittest.skipIf(not torch.cuda.is_available()
+                     and not torch.cuda.is_available(),
+                     'test requires GPU and torch+cuda+musa')
     def test_init_and_run(self):
         checkpoint_path = os.path.join(tempfile.gettempdir(), 'checkpoint.pth')
         torch.save(ToyDetector().state_dict(), checkpoint_path)