[XPU] Enable custom routing functions in IPEX for Llama4 (#28004)

Signed-off-by: frost-intel <frost.mitchell@intel.com>
2026-06-19 14:07:29 +08:00 · 2025-11-05 08:39:57 -05:00 · 2025-11-05 08:39:57 -05:00 · 6e97eccf5d
commit 6e97eccf5d
parent 6ab183813c
1 changed files with 1 additions and 1 deletions
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@ -915,7 +915,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
            or logical_replica_count is not None
        ):
            raise NotImplementedError("Expert load balancing is not supported for XPU.")
-        assert custom_routing_function is None
        return layer.ipex_fusion(
            x,
            use_grouped_topk,
@ -924,6 +923,7 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
            renormalize,
            topk_group,
            num_expert_group,
+            custom_routing_function=custom_routing_function,
        )

    def forward_tpu(