From 0ec3779df74ff68ab920856234e2a1aafc21f1a1 Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Thu, 3 Jul 2025 11:11:36 +0800 Subject: [PATCH] [Bugfix][CI/CD][CPU] Fix CPU CI tests (#20383) Signed-off-by: jiang1.li --- .../layers/quantization/utils/fp8_utils.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index c38a445c571b8..cbf8231defc6c 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -201,12 +201,13 @@ def apply_w8a8_block_fp8_linear_fake( return torch.empty(output_shape, dtype=input.dtype, device=input.device) -direct_register_custom_op( - op_name="apply_w8a8_block_fp8_linear", - op_func=apply_w8a8_block_fp8_linear, - mutates_args=[], - fake_impl=apply_w8a8_block_fp8_linear_fake, -) +if not current_platform.is_cpu(): + direct_register_custom_op( + op_name="apply_w8a8_block_fp8_linear", + op_func=apply_w8a8_block_fp8_linear, + mutates_args=[], + fake_impl=apply_w8a8_block_fp8_linear_fake, + ) def input_to_float8(