Skip to content

Commit

Permalink
[Bugfix] Ignore GPTQ quantization of Qwen2-VL visual module (vllm-pro…
Browse files Browse the repository at this point in the history
…ject#10169)

Signed-off-by: mgoin <[email protected]>
  • Loading branch information
mgoin authored Nov 9, 2024
1 parent 0655942 commit fd1cf61
Showing 1 changed file with 12 additions and 2 deletions.
14 changes: 12 additions & 2 deletions vllm/model_executor/models/qwen2_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@
from vllm.model_executor.layers.linear import (ColumnParallelLinear,
RowParallelLinear)
from vllm.model_executor.layers.logits_processor import LogitsProcessor
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.model_executor.layers.quantization import (GPTQConfig,
GPTQMarlinConfig,
QuantizationConfig)
from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler
from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
Expand Down Expand Up @@ -982,7 +984,7 @@ def __init__(self,
self.visual = Qwen2VisionTransformer(
config.vision_config,
norm_eps=getattr(config, "rms_norm_eps", 1e-6),
quant_config=quant_config,
quant_config=self._maybe_ignore_quant_config(quant_config),
prefix="visual",
)

Expand All @@ -1008,6 +1010,14 @@ def __init__(self,
make_empty_intermediate_tensors_factory(
["hidden_states", "residual"], config.hidden_size))

def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig):
# GPTQ configs do not have a list of ignored modules, however AutoGPTQ
# seems to avoid vision encoder sections for some models.
# See: https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4
if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)):
return None
return quant_config

def _validate_and_reshape_mm_tensor(self,
mm_input: Union[torch.Tensor,
List[torch.Tensor]],
Expand Down

0 comments on commit fd1cf61

Please sign in to comment.