From f52b991db6f2afc3550d6974512eb945915e2748 Mon Sep 17 00:00:00 2001 From: Corey Lowman Date: Tue, 23 Sep 2025 23:16:13 -0400 Subject: [PATCH] [Perf] Fix jit compiles at runtime of fla gated delta rule (#25432) Co-authored-by: Michael Goin Signed-off-by: yewentao256 --- vllm/model_executor/layers/fla/ops/fused_recurrent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fla/ops/fused_recurrent.py b/vllm/model_executor/layers/fla/ops/fused_recurrent.py index b278e37415748..98437340fd242 100644 --- a/vllm/model_executor/layers/fla/ops/fused_recurrent.py +++ b/vllm/model_executor/layers/fla/ops/fused_recurrent.py @@ -40,8 +40,8 @@ def fused_recurrent_gated_delta_rule_fwd_kernel( ssm_state_indices, num_accepted_tokens, scale, - N: tl.constexpr, # num of sequences - T: tl.constexpr, # num of tokens + N: tl.int64, # num of sequences + T: tl.int64, # num of tokens B: tl.constexpr, H: tl.constexpr, HV: tl.constexpr,