[Benchmarks] Fix imports in FP8 tuning script (#26407)

Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com>
This commit is contained in:
Lukas Geiger 2025-10-08 17:31:59 +01:00 committed by GitHub
parent 9fb3ae4e6f
commit 6273fe8d3d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -14,7 +14,7 @@ import torch
from tqdm import tqdm
from vllm.model_executor.layers.quantization.utils.fp8_utils import (
_w8a8_block_fp8_matmul,
_w8a8_triton_block_scaled_mm,
)
from vllm.platforms import current_platform
from vllm.triton_utils import triton
@ -83,7 +83,7 @@ def w8a8_block_matmul(
)
if A.dtype == torch.float8_e4m3fn:
kernel = _w8a8_block_fp8_matmul
kernel = _w8a8_triton_block_scaled_mm
else:
raise RuntimeError("Currently, only support tune w8a8 block fp8 kernel.")