Skip to content

Commit

Permalink
[PT FE] Disable support for cuda GPTQ type, since it was never suppor…
Browse files Browse the repository at this point in the history
…ted (#27005)

### Details:
- *`QUANT_TYPE == "cuda"` is not supported, but claimed as supported and
produce incorrect model.*

### Tickets:
 - *CVS-154376*
  • Loading branch information
mvafin authored Oct 11, 2024
1 parent ebfefab commit f7081a7
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions src/bindings/python/src/openvino/frontend/pytorch/gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def patched_forward_sym(self, *args, **kwargs):


# All the following AutoGPTQ's quant types are supposed to have the same weights packing schema
supported_quant_types = ['triton', 'exllama', 'cuda', 'exllamav2', 'cuda-old']
supported_quant_types = ['triton', 'exllama', 'exllamav2', 'cuda-old']


def patch_model(model):
Expand All @@ -116,11 +116,12 @@ def patch_model(model):
m.float() # enables tracing on CPU, applied for all modules
if hasattr(m, 'QUANT_TYPE'):
if m.QUANT_TYPE not in supported_quant_types:
raise ValueError(
f'Unsupported QUANT_TYPE == {m.QUANT_TYPE} is discovered for AutoGPTQ model, only the following types are supported: {supported_quant_types}')
raise ValueError(f'Unsupported QUANT_TYPE == {m.QUANT_TYPE} is discovered for '
'AutoGPTQ model, only the following types are supported: '
f'{supported_quant_types}')
if m.bits != 4:
raise ValueError(
f'Unsupported bits == {m.bits} is discovered in module {name} in AutoGPTQ model, only bits == 4 is supported.')
raise ValueError(f'Unsupported bits == {m.bits} is discovered in module {name} '
'in AutoGPTQ model, only bits == 4 is supported.')

int4_in_int32 = 8
groups = m.qzeros.shape[0]
Expand Down

0 comments on commit f7081a7

Please sign in to comment.