mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-19 20:37:03 +08:00
[Bugfix] Fix imports for MoE on CPU (#15841)
Signed-off-by: Thien Tran <gau.nernst@yahoo.com.sg>
This commit is contained in:
parent
6efb195a6e
commit
2039c6305b
@ -4,8 +4,6 @@ from typing import List, Optional
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
from vllm.model_executor.layers.quantization.utils.fp8_utils import (
|
|
||||||
per_token_group_quant_fp8)
|
|
||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
|
|
||||||
@ -38,6 +36,9 @@ def rocm_aiter_fused_experts(
|
|||||||
import aiter as rocm_aiter
|
import aiter as rocm_aiter
|
||||||
import aiter.fused_moe_bf16_asm as rocm_aiter_asm_fmoe
|
import aiter.fused_moe_bf16_asm as rocm_aiter_asm_fmoe
|
||||||
|
|
||||||
|
from vllm.model_executor.layers.quantization.utils.fp8_utils import (
|
||||||
|
per_token_group_quant_fp8)
|
||||||
|
|
||||||
if envs.VLLM_ROCM_USE_AITER_FP8_BLOCK_SCALED_MOE and use_fp8_w8a8:
|
if envs.VLLM_ROCM_USE_AITER_FP8_BLOCK_SCALED_MOE and use_fp8_w8a8:
|
||||||
assert w1_scale is not None
|
assert w1_scale is not None
|
||||||
assert w2_scale is not None
|
assert w2_scale is not None
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user