diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 6fe95d32a10e7..672244385e52c 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -1743,7 +1743,8 @@ def moe_forward_fake(hidden_states: torch.Tensor, router_logits: torch.Tensor, direct_register_custom_op( op_name="moe_forward", op_func=moe_forward, - mutates_args=[], + mutates_args=["hidden_states"], fake_impl=moe_forward_fake, dispatch_key=current_platform.dispatch_key, + tags=(torch.Tag.needs_fixed_stride_order, ), )