mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-11 02:35:42 +08:00
[Bugfix] Lazy import fused_experts in BitsAndBytesMoEMethod to avoid break not-cuda-alike devices (#20822)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
parent
44d02f54db
commit
b1235c3e10
@ -5,7 +5,6 @@ from typing import Any, Callable, Optional, Union
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from vllm.model_executor.layers.fused_moe import fused_experts
|
|
||||||
from vllm.model_executor.layers.fused_moe.layer import (FusedMoE,
|
from vllm.model_executor.layers.fused_moe.layer import (FusedMoE,
|
||||||
FusedMoEMethodBase)
|
FusedMoEMethodBase)
|
||||||
from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase,
|
from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase,
|
||||||
@ -467,6 +466,7 @@ class BitsAndBytesMoEMethod(FusedMoEMethodBase):
|
|||||||
logical_to_physical_map: Optional[torch.Tensor] = None,
|
logical_to_physical_map: Optional[torch.Tensor] = None,
|
||||||
logical_replica_count: Optional[torch.Tensor] = None,
|
logical_replica_count: Optional[torch.Tensor] = None,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
|
from vllm.model_executor.layers.fused_moe import fused_experts
|
||||||
|
|
||||||
if enable_eplb:
|
if enable_eplb:
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user