From 6273fe8d3d72d26b7a60e0d60d5c378273156e9e Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Wed, 8 Oct 2025 17:31:59 +0100 Subject: [PATCH] [Benchmarks] Fix imports in FP8 tuning script (#26407) Signed-off-by: Lukas Geiger --- benchmarks/kernels/benchmark_w8a8_block_fp8.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/kernels/benchmark_w8a8_block_fp8.py b/benchmarks/kernels/benchmark_w8a8_block_fp8.py index c6c8e0b0b936b..602fad1810748 100644 --- a/benchmarks/kernels/benchmark_w8a8_block_fp8.py +++ b/benchmarks/kernels/benchmark_w8a8_block_fp8.py @@ -14,7 +14,7 @@ import torch from tqdm import tqdm from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - _w8a8_block_fp8_matmul, + _w8a8_triton_block_scaled_mm, ) from vllm.platforms import current_platform from vllm.triton_utils import triton @@ -83,7 +83,7 @@ def w8a8_block_matmul( ) if A.dtype == torch.float8_e4m3fn: - kernel = _w8a8_block_fp8_matmul + kernel = _w8a8_triton_block_scaled_mm else: raise RuntimeError("Currently, only support tune w8a8 block fp8 kernel.")