diff --git a/python/ray/serve/tests/unit/test_pow_2_replica_scheduler.py b/python/ray/serve/tests/unit/test_pow_2_replica_scheduler.py
index 237a6c046a00..e078885486c5 100644
--- a/python/ray/serve/tests/unit/test_pow_2_replica_scheduler.py
+++ b/python/ray/serve/tests/unit/test_pow_2_replica_scheduler.py
@@ -1253,7 +1253,7 @@ async def test_multiple_queries_with_different_model_ids(self, pow_2_scheduler):
                 ),
             ]
 
-            done, _ = await asyncio.wait(tasks, timeout=0.1)
+            done, _ = await asyncio.wait(tasks, timeout=0.01)
             assert len(done) == len(tasks)
 
             assert all(
@@ -1600,7 +1600,7 @@ async def test_queue_len_cache_replica_at_capacity_is_probed(pow_2_scheduler):
     s.replica_queue_len_cache.update(r1.replica_id, DEFAULT_MAX_ONGOING_REQUESTS)
 
     task = loop.create_task(s.choose_replica_for_request(fake_pending_request()))
-    done, _ = await asyncio.wait([task], timeout=0.1)
+    done, _ = await asyncio.wait([task], timeout=0.01)
     assert len(done) == 0
     # 1 probe from scheduling requests
     # + 1 probe from when the replica set was updated with replica r1
@@ -1608,7 +1608,7 @@ async def test_queue_len_cache_replica_at_capacity_is_probed(pow_2_scheduler):
 
     # Now let the replica respond and accept the request, it should be scheduled.
     r1.set_queue_len_response(DEFAULT_MAX_ONGOING_REQUESTS - 1)
-    done, _ = await asyncio.wait([task], timeout=0.1)
+    done, _ = await asyncio.wait([task], timeout=0.01)
     assert len(done) == 1
     assert (await task) == r1
 
@@ -1636,7 +1636,7 @@ async def test_queue_len_cache_background_probing(pow_2_scheduler):
     s.replica_queue_len_cache.update(r1.replica_id, 0)
 
     task = loop.create_task(s.choose_replica_for_request(fake_pending_request()))
-    done, _ = await asyncio.wait([task], timeout=0.1)
+    done, _ = await asyncio.wait([task], timeout=0.01)
     assert len(done) == 1
     assert (await task) == r1
     # 0 probes from scheduling requests