Skip to content

Commit

Permalink
fix return type of quant methods
Browse files Browse the repository at this point in the history
  • Loading branch information
jikunshang committed Apr 11, 2024
1 parent 455bbbd commit 31ffd25
Showing 1 changed file with 9 additions and 7 deletions.
16 changes: 9 additions & 7 deletions vllm/_custom_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,21 +115,23 @@ def fused_add_rms_norm(input: torch.Tensor, residual: torch.Tensor,
# awq
def awq_dequantize(qweight: torch.Tensor, scales: torch.Tensor,
zeros: torch.Tensor, split_k_iters: int, thx: int,
thy: int) -> None:
vllm_ops.awq_dequantize(qweight, scales, zeros, split_k_iters, thx, thy)
thy: int) -> torch.Tensor:
return vllm_ops.awq_dequantize(qweight, scales, zeros, split_k_iters, thx,
thy)


def awq_gemm(input: torch.Tensor, qweight: torch.Tensor, qzeros: torch.Tensor,
scales: torch.Tensor, split_k_iters: int) -> None:
vllm_ops.awq_gemm(input, qweight, qzeros, scales, split_k_iters)
scales: torch.Tensor, split_k_iters: int) -> torch.Tensor:
return vllm_ops.awq_gemm(input, qweight, qzeros, scales, split_k_iters)


# gptq
def gptq_gemm(a: torch.Tensor, b_q_weight: torch.Tensor,
b_gptq_qzeros: torch.Tensor, b_gptq_scales: torch.Tensor,
b_g_idx: torch.Tensor, use_exllama: bool, bit: int) -> None:
vllm_ops.gptq_gemm(a, b_q_weight, b_gptq_qzeros, b_gptq_scales, b_g_idx,
use_exllama, bit)
b_g_idx: torch.Tensor, use_exllama: bool,
bit: int) -> torch.Tensor:
return vllm_ops.gptq_gemm(a, b_q_weight, b_gptq_qzeros, b_gptq_scales,
b_g_idx, use_exllama, bit)


def gptq_shuffle(q_weight: torch.Tensor, q_perm: torch.Tensor,
Expand Down

0 comments on commit 31ffd25

Please sign in to comment.