From abb448b457877b3044a353bd84d8c09cb5aa5474 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Fri, 19 Sep 2025 18:38:37 -0400 Subject: [PATCH] Update vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Signed-off-by: yewentao256 --- .../layers/quantization/kernels/scaled_mm/cutlass.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py b/vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py index 321084d9754c0..4922b7513c931 100644 --- a/vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py +++ b/vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py @@ -89,7 +89,7 @@ class CutlassScaledMMLinearKernel(ScaledMMLinearKernel): # It does not depend on scales or azp, so it is the same for # static and dynamic quantization. # For more details, see csrc/quantization/w8a8/cutlass/Epilogues.md - # https://github.com/vllm-project/vllm/blob/8d59dbb00044a588cab96bcdc028006ed922eb06/csrc/quantization/w8a8/cutlass/Epilogues.md + # https://github.com/vllm-project/vllm/blob/main/csrc/quantization/w8a8/cutlass/Epilogues.md if not self.config.input_symmetric: weight = getattr(layer, self.w_q_name) azp_adj = weight.sum(dim=0, keepdim=True, dtype=torch.int32)