From 6e97eccf5dd5036e26d63141d2bc1a9ea17a2cc8 Mon Sep 17 00:00:00 2001
From: Frost Mitchell <frost.mitchell@intel.com>
Date: Wed, 5 Nov 2025 08:39:57 -0500
Subject: [PATCH] [XPU] Enable custom routing functions in IPEX for Llama4
 (#28004)

Signed-off-by: frost-intel <frost.mitchell@intel.com>
---
 vllm/model_executor/layers/fused_moe/layer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
index 118d5fa6b45c4..0a8c2f311f5c6 100644
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -915,7 +915,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
             or logical_replica_count is not None
         ):
             raise NotImplementedError("Expert load balancing is not supported for XPU.")
-        assert custom_routing_function is None
         return layer.ipex_fusion(
             x,
             use_grouped_topk,
@@ -924,6 +923,7 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
             renormalize,
             topk_group,
             num_expert_group,
+            custom_routing_function=custom_routing_function,
         )
 
     def forward_tpu(