Fix AOPerModuleConfig bug in skipping quantizing modules (#2135)

jerryzh168 · web-flow · commit f343336b6103 · 2025-04-25T22:21:44.000-07:00
* Fix AOPerModuleConfig bug in skipping quantizing modules

Summary:
Previous logic is problematic in handling skipping modules (None setting),
this PR fixes it.

Test Plan:
pytest test/quantization/test_quant_api.py -k test_ao_per_module_config_skip

Reviewers:

Subscribers:

Tasks:

Tags:

* add IntxWeightOnlyConfig to torchao.quantization
diff --git a/test/quantization/test_quant_api.py b/test/quantization/test_quant_api.py
@@ -1005,6 +1005,18 @@ def test_ao_per_module_config_embedding_linear(self):
         assert isinstance(model.emb.weight._layout, QDQLayout)
         assert isinstance(model.linear.weight, LinearActivationQuantizedTensor)
 
+    @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available")
+    def test_ao_per_module_config_skip(self):
+        config1 = Int4WeightOnlyConfig(group_size=32)
+        config = AOPerModuleConfig({"_default": config1, "linear2": None})
+        model = ToyLinearModel().cuda().to(dtype=torch.bfloat16)
+        example_inputs = model.example_inputs(device="cuda", dtype=torch.bfloat16)
+        quantize_(model, config)
+        model(*example_inputs)
+        assert isinstance(model.linear1.weight, AffineQuantizedTensor)
+        assert isinstance(model.linear1.weight._layout, TensorCoreTiledLayout)
+        assert not isinstance(model.linear2.weight, AffineQuantizedTensor)
+
 
 class TestMultiTensorFlow(TestCase):
     @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "Test only enabled for 2.4+")
diff --git a/torchao/quantization/__init__.py b/torchao/quantization/__init__.py
@@ -53,6 +53,7 @@
     Int8DynamicActivationInt4WeightConfig,
     Int8DynamicActivationInt8WeightConfig,
     Int8WeightOnlyConfig,
+    IntxWeightOnlyConfig,
     PlainLayout,
     TensorCoreTiledLayout,
     UIntXWeightOnlyConfig,
@@ -139,6 +140,7 @@
     "Float8StaticActivationFloat8WeightConfig",
     "Float8DynamicActivationFloat8SemiSparseWeightConfig",
     "UIntXWeightOnlyConfig",
+    "IntxWeightOnlyConfig",
     "FPXWeightOnlyConfig",
     "GemliteUIntXWeightOnlyConfig",
     "AOPerModuleConfig",
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -594,6 +594,7 @@ def quantize_(
 
     """
     filter_fn = _is_linear if filter_fn is None else filter_fn
+
     if isinstance(config, AOPerModuleConfig):
         _replace_with_custom_fn_if_matches_filter_with_name(
             model,
@@ -1975,18 +1976,19 @@ class AOPerModuleConfig(AOBaseConfig):
 def _ao_per_module_config_handler(
     module: torch.nn.Module, module_fqn: str, config: AOPerModuleConfig
 ):
-    c = config.module_fqn_to_config.get(module_fqn, None)
-    # Maybe: we can add module type specific config in the future, in needed
-    # fallback to use default if no module specific config is provided
-    default_c = config.module_fqn_to_config.get("_default", None)
-    if default_c is not None and c is None:
-        c = default_c
+    c = None
+    if module_fqn in config.module_fqn_to_config:
+        # Maybe: we can add module type specific config in the future, in needed
+        c = config.module_fqn_to_config[module_fqn]
+    else:
+        # fallback to use default if no module specific config is provided
+        c = config.module_fqn_to_config.get("_default", None)
 
     if c is not None:
         handler = _QUANTIZE_CONFIG_HANDLER[type(c)]
         return handler(module, c)
 
-    return handler(module, c)
+    return module
 
 
 if TORCH_VERSION_AT_LEAST_2_5: