[Benchmarks] Fix imports in FP8 tuning script (#26407)

Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com>
2026-03-16 14:07:13 +08:00 · 2025-10-08 17:31:59 +01:00 · 2025-10-08 17:31:59 +01:00 · 6273fe8d3d
commit 6273fe8d3d
parent 9fb3ae4e6f
1 changed files with 2 additions and 2 deletions
--- a/benchmarks/kernels/benchmark_w8a8_block_fp8.py
+++ b/benchmarks/kernels/benchmark_w8a8_block_fp8.py
@ -14,7 +14,7 @@ import torch
 from tqdm import tqdm

 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
-    _w8a8_block_fp8_matmul,
+    _w8a8_triton_block_scaled_mm,
 )
 from vllm.platforms import current_platform
 from vllm.triton_utils import triton
@ -83,7 +83,7 @@ def w8a8_block_matmul(
        )

    if A.dtype == torch.float8_e4m3fn:
-        kernel = _w8a8_block_fp8_matmul
+        kernel = _w8a8_triton_block_scaled_mm
    else:
        raise RuntimeError("Currently, only support tune w8a8 block fp8 kernel.")