From 981e1d834326ba13a7cb1844f7ad5de86674f23b Mon Sep 17 00:00:00 2001
From: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
Date: Tue, 23 Dec 2025 17:28:08 +0000
Subject: [PATCH] Disable the broken gemm_a8w8_blockscale

Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
---
 .../layers/quantization/utils/fp8_utils.py        | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py
index ea68745585160..82e6de621fc87 100644
--- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py
@@ -362,23 +362,10 @@ class W8A8BlockFp8LinearOp:
 
         n, k = weight.shape
 
-        use_triton = (
-            not current_platform.is_fp8_fnuz()
-            and rocm_aiter_ops.is_triton_gemm_w8a8_tuned(n, k)
-        )
-
-        if use_triton:
-            gemm_a8w8_blockscale_op = rocm_aiter_ops.triton_gemm_a8w8_blockscale
-        else:
-            gemm_a8w8_blockscale_op = rocm_aiter_ops.gemm_a8w8_blockscale
+        gemm_a8w8_blockscale_op = rocm_aiter_ops.gemm_a8w8_blockscale
 
         if input_scale is not None:
             q_input = input_2d
-        elif use_triton:
-            q_input, input_scale = torch.ops.vllm.triton_per_token_group_quant_fp8(
-                input_2d,
-                self.act_quant_group_shape.col,
-            )
         else:
             q_input, input_scale = rocm_aiter_ops.group_fp8_quant(
                 input_2d, self.act_quant_group_shape.col