learning-at-home · mryab · Feb 10, 2025 · Feb 10, 2025 · Feb 10, 2025 · Feb 10, 2025
diff --git a/.github/workflows/check-style.yml b/.github/workflows/check-style.yml
@@ -5,20 +5,24 @@ on:
     branches: [ master ]
   pull_request:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
   black:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: psf/black@stable
         with:
           options: "--check --diff"
           version: "22.3.0"
   isort:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v3
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: 3.11
       - uses: isort/isort-action@master
@@ -28,7 +32,7 @@ jobs:
   codespell:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - uses: codespell-project/actions-codespell@v1
         with:
           only_warn: 1

diff --git a/.github/workflows/push-docker-image.yml b/.github/workflows/push-docker-image.yml
@@ -8,13 +8,17 @@ on:
   pull_request:
     branches: [ master ]
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
   build:
     runs-on: ubuntu-latest
 
     steps:
       - name: Checkout
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
 
       - name: Docker meta
         id: meta

diff --git a/.github/workflows/run-benchmarks.yml b/.github/workflows/run-benchmarks.yml
@@ -5,19 +5,23 @@ on:
     branches: [ master ]
   pull_request:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
 jobs:
   run_benchmarks:
 
     runs-on: ubuntu-latest
     timeout-minutes: 10
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Set up Python
-        uses: actions/setup-python@v3
+        uses: actions/setup-python@v5
         with:
           python-version: 3.11
       - name: Cache dependencies
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cache/pip
           key: Key-v1-3.11-${{ hashFiles('requirements.txt') }}-${{ hashFiles('requirements-dev.txt') }}
@@ -28,7 +32,7 @@ jobs:
           pip install -r requirements-dev.txt
       - name: Build bitsandbytes
         run: |
-          pip install bitsandbytes==0.41.1
+          pip install bitsandbytes==0.45.2
       - name: Build hivemind
         run: |
           pip install .

diff --git a/.github/workflows/run-tests-on-modal.yml b/.github/workflows/run-tests-on-modal.yml
@@ -0,0 +1,112 @@
+name: Modal tests
+
+on:
+  push:
+    branches: [master]
+  pull_request:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  run_tests:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+      fail-fast: false
+    env:
+      MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+      MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
+      PYTHON_VERSION: ${{ matrix.python-version }}
+    timeout-minutes: 15
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Cache dependencies
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: Key-v1-3.12-modal
+
+      - name: Install build dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install modal==0.73.32
+
+      - name: Run tests
+        run: |
+          modal run modal_ci.py::run_tests
+
+  measure_coverage:
+    runs-on: ubuntu-latest
+    env:
+      MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+      MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
+      CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+      GITHUB_EVENT_NAME: ${{ github.event_name }}
+      GITHUB_EVENT_NUMBER: ${{ github.event.number }}
+      GITHUB_EVENT_PULL_REQUEST_HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+      PYTHON_VERSION: "3.11"
+    timeout-minutes: 15
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Cache dependencies
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: Key-v1-3.12-modal
+
+      - name: Install build dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install modal==0.73.32
+
+      - name: Measure and upload coverage
+        run: |
+          modal run modal_ci.py::run_codecov
+
+  build_and_test_p2pd:
+    runs-on: ubuntu-latest
+    env:
+      MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+      MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
+      PYTHON_VERSION: "3.11"
+    timeout-minutes: 10
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Cache dependencies
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/pip
+          key: Key-v1-3.12-modal
+
+      - name: Install build dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install modal==0.73.32
+
+      - name: Run p2pd tests
+        run: |
+          modal run modal_ci.py::build_and_test_p2pd
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -1,9 +1,11 @@
 name: Tests
 
-on:
-  push:
-    branches: [ master ]
-  pull_request:
+# Tests in GHA only run manually, see run-tests-on-modal.yml for the same tests in CI
+on: workflow_dispatch
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
 
 jobs:
   run_tests:
@@ -15,13 +17,13 @@ jobs:
       fail-fast: false
     timeout-minutes: 15
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Set up Python
-        uses: actions/setup-python@v3
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
       - name: Cache dependencies
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: ~/.cache/pip
           key: Key-v1-${{ matrix.python-version }}-${{ hashFiles('requirements.txt') }}-${{ hashFiles('requirements-dev.txt') }}
@@ -32,7 +34,7 @@ jobs:
           pip install -r requirements-dev.txt
       - name: Build bitsandbytes
         run: |
-          pip install bitsandbytes==0.41.1
+          pip install bitsandbytes==0.45.2
       - name: Build hivemind
         run: |
           pip install .
@@ -94,7 +96,7 @@ jobs:
           pip install -r requirements-dev.txt
       - name: Build bitsandbytes
         run: |
-          pip install bitsandbytes==0.41.1
+          pip install bitsandbytes==0.45.2
       - name: Build hivemind
         run: |
           pip install -e . --no-use-pep517

diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -2,6 +2,7 @@ version: 2
 
 sphinx:
   fail_on_warning: true
+  configuration: docs/conf.py
 
 python:
   install:

diff --git a/README.md b/README.md
@@ -118,6 +118,10 @@ the [contributing guidelines](https://github.com/learning-at-home/hivemind/blob/
 more about other ways to contribute, read
 our [guide](https://learning-at-home.readthedocs.io/en/latest/user/contributing.html).
 
+## Collaborators and Sponsorship
+
+* [Prime Intellect](https://www.primeintellect.ai/) sponsoring compute resources over [Modal](https://modal.com/) for CI
+
 ## Citation
 
 If you found hivemind or its underlying algorithms useful for your research, please cite the following source:

diff --git a/hivemind/compression/base.py b/hivemind/compression/base.py
@@ -107,14 +107,14 @@ def extract(self, serialized_tensor: runtime_pb2.Tensor) -> torch.Tensor:
         if serialized_tensor.dtype == "bfloat16":
             numel = shape.numel()
             if numel > 0 and len(serialized_tensor.buffer) // numel == 4:
-                array = np.frombuffer(serialized_tensor.buffer, dtype=np.float32)
+                array = np.frombuffer(bytearray(serialized_tensor.buffer), dtype=np.float32)
                 tensor = torch.as_tensor(array, dtype=torch.bfloat16)
             else:
-                array = np.frombuffer(serialized_tensor.buffer, dtype=np.int16)
+                array = np.frombuffer(bytearray(serialized_tensor.buffer), dtype=np.int16)
                 # reinterpret_cast from an arbitrary 2-byte type supported by numpy
                 tensor = torch.as_tensor(array).view(torch.bfloat16)
         else:
-            array = np.frombuffer(serialized_tensor.buffer, dtype=np.dtype(serialized_tensor.dtype))
+            array = np.frombuffer(bytearray(serialized_tensor.buffer), dtype=np.dtype(serialized_tensor.dtype))
             tensor = torch.as_tensor(array)
         return tensor.reshape(shape)
 

diff --git a/hivemind/compression/quantization.py b/hivemind/compression/quantization.py
@@ -140,8 +140,14 @@ def quantize(
         except ImportError:
             raise ImportError(BNB_MISSING_MESSAGE)
 
-        quantized, (absmax, codebook, *extra_params) = quantize_blockwise(tensor, blocksize=4096, nested=False)
-        assert tuple(extra_params) == self.EXTRA_PARAMS  # blocksize, nested, dtype, offset, state2
+        assert tensor.dtype == torch.float32
+        quantized, quant_state = quantize_blockwise(tensor, blocksize=4096, nested=False)
+        absmax, codebook = quant_state.absmax, quant_state.code
+        assert quant_state.blocksize == 4096
+        assert quant_state.nested is False
+        assert quant_state.dtype == self.EXTRA_PARAMS[2]
+        assert quant_state.offset == self.EXTRA_PARAMS[3]
+        assert quant_state.state2 == self.EXTRA_PARAMS[4]
         return quantized.numpy(), (absmax.numpy(), codebook.numpy())
 
     def compress(self, tensor: torch.Tensor, info: CompressionInfo, allow_inplace: bool = False) -> runtime_pb2.Tensor:
@@ -187,5 +193,5 @@ def extract(self, serialized_tensor: runtime_pb2.Tensor) -> torch.Tensor:
         absmax = torch.as_tensor(absmax)
         codebook = torch.as_tensor(codebook)
         quantized = torch.as_tensor(quantized).reshape(tuple(serialized_tensor.size))
-        result = dequantize_blockwise(quantized, (absmax, codebook, *self.EXTRA_PARAMS))
+        result = dequantize_blockwise(quantized, absmax=absmax, code=codebook, blocksize=4096, nested=False)
         return result.to(getattr(torch, serialized_tensor.dtype)).requires_grad_(serialized_tensor.requires_grad)
diff --git a/hivemind/moe/client/moe.py b/hivemind/moe/client/moe.py
@@ -90,9 +90,11 @@
         else:
             input_for_gating = input
 
+        logger.debug("Computing expert scores")
         # 1. compute scores and find most appropriate experts with beam search
         grid_scores = self.proj(input_for_gating).split_with_sizes(self.beam_search.grid_size, dim=-1)
 
+        logger.debug("Finding best experts")
         chosen_experts: List[List[RemoteExpert]] = self.beam_search.batch_find_best_experts(
             [scores.detach().cpu().numpy() for scores in grid_scores], self.k_best
         )
@@ -108,6 +110,7 @@
             except P2PDaemonError as e:
                 logger.warning(f"Failed to get RemoteMixtureOfExperts.output_shape: {e}")
 
+        logger.debug(f"Calling experts {chosen_experts}")
         expert_mask, *expert_outputs = _RemoteCallMany.apply(
             DUMMY,
             chosen_experts,
@@ -123,6 +126,7 @@
         )
         # ^-- multiple tensors of shape [batch_size, max_experts, ...output_shape]
 
+        logger.debug("Computing expert weights")
         expert_logits = self.compute_expert_scores(grid_scores, chosen_experts)
         masked_logits = torch.full((1,), float("-inf"), device=expert_logits.device, dtype=expert_logits.dtype)
         expert_logits = torch.where(expert_mask, expert_logits, masked_logits)
@@ -375,19 +379,26 @@
         timeout_total = float("inf") if timeout_total is None else timeout_total
         timeout_after_k_min = float("inf") if timeout_after_k_min is None else timeout_after_k_min
         num_successful_tasks = [0 for _ in range(num_samples)]
-        pending_samples = num_samples  # samples for which we have less than k_min results
+
+        samples_with_tasks = {sample_idx for sample_idx, _ in task_to_indices.values()}
+        pending_samples = len(samples_with_tasks)  # samples for which we have less than k_min results
+        assert pending_samples <= num_samples
+
         finished_indices, finished_outputs = [], []
         t_finish = time.perf_counter() + timeout_total
         pending_tasks = set(task_to_indices.keys())
         finished_tasks = Queue()
 
+        logger.debug(f"Pending tasks: {list(pending_tasks)}")
         try:
             # the algorithm below is essentially futures.as_completed, but for grpc.Future
             for task in pending_tasks:
                 task.add_done_callback(finished_tasks.put)
 
             for _ in range(len(task_to_indices)):
                 timeout = max(0.0, t_finish - time.perf_counter()) if t_finish != float("inf") else None
+                logger.debug(f"Finished tasks: {list(finished_tasks.queue)}")
+                logger.debug(f"Pending tasks: {list(pending_tasks)}")
                 task = finished_tasks.get(timeout=timeout)
                 pending_tasks.discard(task)
 
@@ -399,6 +410,7 @@
                     # count how many successes we have for each input sample
                     sample_index = task_to_indices[task][0]
                     num_successful_tasks[sample_index] += 1
+                    logger.debug(f"Num successful tasks: {num_successful_tasks}")
                     if num_successful_tasks[sample_index] == k_min:
                         pending_samples -= 1
                         if (
@@ -416,7 +428,7 @@
 
 def _process_dispatched_task(task: Future, detect_anomalies: bool) -> Optional[Tuple[torch.Tensor]]:
     if task.exception() or task.cancelled():
-        logger.warning(f"Task {task} failed: {type(task.exception())}")
+        logger.warning(f"Task {task} failed: {task.exception()}")
         return None
 
     outputs = task.result()

diff --git a/hivemind/moe/server/connection_handler.py b/hivemind/moe/server/connection_handler.py
@@ -134,6 +134,7 @@ async def _process_inputs(
     async def rpc_forward(self, request: runtime_pb2.ExpertRequest, context: P2PContext) -> runtime_pb2.ExpertResponse:
         inputs = [deserialize_torch_tensor(tensor) for tensor in request.tensors]
         expert = self.module_backends[request.uid]
+        logger.debug(f"Processing inputs for expert {request.uid}")
         return runtime_pb2.ExpertResponse(
             tensors=await self._process_inputs(inputs, expert.forward_pool, expert.outputs_schema)
         )
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,7 @@ version: 2 @@
     sphinx:
       fail_on_warning: true
+      configuration: docs/conf.py
     python:
       install:
@@ Expand Down @@