From 28a6d5423db63ba9c4df13608f6151a484bdb7c9 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Wed, 16 Jul 2025 22:54:45 -0400 Subject: [PATCH] [Bugfix] Fix Machete zero point issue for GPTQ models on SM90 (#21066) Signed-off-by: mgoin --- .../layers/quantization/kernels/mixed_precision/machete.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py b/vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py index ed81b02bc4a10..da951ddab2e4e 100644 --- a/vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py +++ b/vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py @@ -126,6 +126,11 @@ class MacheteLinearKernel(MPLinearKernel): if c.has_g_idx: x_2d = self.act_perm(x_2d) + if c.zero_points: + assert w_zp is not None + else: + w_zp = None + output = ops.machete_mm(a=x_2d, b_q=w_q, b_type=c.weight_type,