mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-31 03:37:02 +08:00
[ROCm] [Fused Moe EP] Use binary expert mask for aiter fused moe kernel (#29773)
Signed-off-by: ZhiweiYan-96 <zhiwei.yan@amd.com>
This commit is contained in:
parent
d726a7b0ed
commit
c6df05ebb4
@ -520,6 +520,10 @@ class FusedMoE(CustomOp):
|
||||
self._init_aiter_shared_experts_topK_buffer(
|
||||
vllm_config=vllm_config, dp_size=dp_size_
|
||||
)
|
||||
if self.use_ep and self.rocm_aiter_fmoe_enabled:
|
||||
assert self.expert_mask is None or torch.all(
|
||||
(expert_mask == 0) | (expert_mask == 1)
|
||||
), "Aiter Fused MoE kernel only supports expert_map with 0 and 1s."
|
||||
|
||||
assert intermediate_size % self.tp_size == 0
|
||||
self.hidden_size = hidden_size
|
||||
|
||||
@ -633,6 +633,7 @@ class QuarkOCP_MX_MoEMethod(QuarkMoEMethod):
|
||||
topk_ids=topk_ids,
|
||||
activation=activation,
|
||||
quant_config=self.moe_quant_config,
|
||||
expert_map=expert_map,
|
||||
)
|
||||
else:
|
||||
from vllm.model_executor.layers.fused_moe import fused_experts
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user