4-bit quantization meta device bias loading bug (#2805)

* 4-bit quantization meta device bias loading bug: fixes #2742 * move condition --------- Co-authored-by: mh <[email protected]>
huggingface · May 31, 2024 · 065e74d · 065e74d
1 parent 86b6dea
commit 065e74d
Showing 1 changed file with 5 additions and 1 deletion.
diff --git a/src/accelerate/utils/modeling.py b/src/accelerate/utils/modeling.py
@@ -442,7 +442,11 @@ def set_module_tensor_to_device(
                     elif module.bias is None:
                         # if no bias exists, we can quantize right away
                         module = module.cuda(device_index)
-            elif module.__class__.__name__ == "Linear4bit" and getattr(module.weight, "quant_state", None) is None:
+            elif (
+                module.__class__.__name__ == "Linear4bit"
+                and getattr(module.weight, "quant_state", None) is None
+                and str(module.weight.device) != "meta"
+            ):
                 # quantize only if necessary
                 device_index = torch.device(device).index if torch.device(device).type == "cuda" else None
                 if not getattr(module.weight, "quant_state", None) and device_index is not None: