From 28bba6dfa66a3e3548f99f1da1fd4db25c9e55cb Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Fri, 27 Sep 2024 15:12:34 -0400 Subject: [PATCH] [Bugfix] fix for deepseek w4a16 (#8906) Co-authored-by: mgoin Signed-off-by: Alvant --- .../model_executor/layers/quantization/kernels/marlin.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/quantization/kernels/marlin.py b/vllm/model_executor/layers/quantization/kernels/marlin.py index 5b4bba76ee0ca..6969583d6d473 100644 --- a/vllm/model_executor/layers/quantization/kernels/marlin.py +++ b/vllm/model_executor/layers/quantization/kernels/marlin.py @@ -38,10 +38,11 @@ def can_implement(cls, "Marlin, supported group sizes are: "\ f"{MARLIN_SUPPORTED_GROUP_SIZES}" - return check_marlin_supports_shape(c.partition_weight_shape[0], - c.partition_weight_shape[1], - c.full_weight_shape[1], - c.group_size) + return check_marlin_supports_shape( + c.partition_weight_shape[1], # out_features + c.partition_weight_shape[0], # in_features + c.full_weight_shape[0], # in_features + c.group_size) # note assumes that # `weight_packed` is: {input_dim = 0, output_dim = 1, packed_dim = 0}