From 92540529c051fe6e8f111d7688ffac84ac561a0d Mon Sep 17 00:00:00 2001 From: TJian Date: Fri, 16 May 2025 00:53:18 +0800 Subject: [PATCH] [Bugfix] [ROCm]: Remove assertion logic when using AITER fused moe in unquantizedMethod to reenable LLama4 BF16 (#18205) Signed-off-by: tjtanaa --- vllm/model_executor/layers/fused_moe/layer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 0b3c02d1ba28f..f1cb77f64eae7 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -503,7 +503,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): indices_type=torch.uint32 if self.moe.use_pplx_kernels else None) if self.rocm_aiter_moe_enabled: - assert not apply_router_weight_on_input assert expert_map is None return self.rocm_aiter_fused_experts( hidden_states=x,