From e6c22d2b2fde94b32a6af408cd3c2818560245ab Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Mon, 22 Sep 2025 21:42:45 -0400 Subject: [PATCH] [Perf] Apply torch.compile for `per_block_cast_to_fp8` (#24611) Signed-off-by: yewentao256 --- vllm/utils/deep_gemm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/utils/deep_gemm.py b/vllm/utils/deep_gemm.py index 38d92f01192b1..4083193d7650e 100644 --- a/vllm/utils/deep_gemm.py +++ b/vllm/utils/deep_gemm.py @@ -135,7 +135,7 @@ DEFAULT_BLOCK_SIZE = [128, 128] # Taken from https://github.com/deepseek-ai/DeepGEMM/blob/dd6ed14acbc7445dcef224248a77ab4d22b5f240/deep_gemm/utils/math.py#L38 -# TODO(wentao): optimize this function, using triton or cuda kernel +@torch.compile(dynamic=True, backend=current_platform.simple_compile_backend) def per_block_cast_to_fp8( x: torch.Tensor, block_size: list[int] = DEFAULT_BLOCK_SIZE, @@ -187,4 +187,4 @@ __all__ = [ "is_deep_gemm_e8m0_used", "is_deep_gemm_supported", "should_use_deepgemm_for_fp8_linear", -] +] \ No newline at end of file