From 6e97eccf5dd5036e26d63141d2bc1a9ea17a2cc8 Mon Sep 17 00:00:00 2001 From: Frost Mitchell Date: Wed, 5 Nov 2025 08:39:57 -0500 Subject: [PATCH] [XPU] Enable custom routing functions in IPEX for Llama4 (#28004) Signed-off-by: frost-intel --- vllm/model_executor/layers/fused_moe/layer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 118d5fa6b45c4..0a8c2f311f5c6 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -915,7 +915,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): or logical_replica_count is not None ): raise NotImplementedError("Expert load balancing is not supported for XPU.") - assert custom_routing_function is None return layer.ipex_fusion( x, use_grouped_topk, @@ -924,6 +923,7 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): renormalize, topk_group, num_expert_group, + custom_routing_function=custom_routing_function, ) def forward_tpu(