mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 15:25:01 +08:00
[Bugfix] Fix imports for MoE on CPU (#15841)
Signed-off-by: Thien Tran <gau.nernst@yahoo.com.sg>
This commit is contained in:
parent
6efb195a6e
commit
2039c6305b
@ -4,8 +4,6 @@ from typing import List, Optional
|
||||
import torch
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.model_executor.layers.quantization.utils.fp8_utils import (
|
||||
per_token_group_quant_fp8)
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
|
||||
@ -38,6 +36,9 @@ def rocm_aiter_fused_experts(
|
||||
import aiter as rocm_aiter
|
||||
import aiter.fused_moe_bf16_asm as rocm_aiter_asm_fmoe
|
||||
|
||||
from vllm.model_executor.layers.quantization.utils.fp8_utils import (
|
||||
per_token_group_quant_fp8)
|
||||
|
||||
if envs.VLLM_ROCM_USE_AITER_FP8_BLOCK_SCALED_MOE and use_fp8_w8a8:
|
||||
assert w1_scale is not None
|
||||
assert w2_scale is not None
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user