From 2039c6305bdcf2f920c80d8fcfce07cd6396f62f Mon Sep 17 00:00:00 2001 From: Thien Tran Date: Wed, 2 Apr 2025 11:33:55 +0800 Subject: [PATCH] [Bugfix] Fix imports for MoE on CPU (#15841) Signed-off-by: Thien Tran --- vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py b/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py index c9bb676710a78..ac158a7eee534 100644 --- a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py @@ -4,8 +4,6 @@ from typing import List, Optional import torch import vllm.envs as envs -from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - per_token_group_quant_fp8) from vllm.platforms import current_platform @@ -38,6 +36,9 @@ def rocm_aiter_fused_experts( import aiter as rocm_aiter import aiter.fused_moe_bf16_asm as rocm_aiter_asm_fmoe + from vllm.model_executor.layers.quantization.utils.fp8_utils import ( + per_token_group_quant_fp8) + if envs.VLLM_ROCM_USE_AITER_FP8_BLOCK_SCALED_MOE and use_fp8_w8a8: assert w1_scale is not None assert w2_scale is not None