From 68af77e51c5ca78ec0fd2496eca80b2257176b6e Mon Sep 17 00:00:00 2001
From: JartX <sagformas@epdcenter.es>
Date: Fri, 15 Aug 2025 19:42:49 +0200
Subject: [PATCH] [FIXBUG] Correctly Apply Grammar Bitmask in Mixed Batches
 (#22896)

Signed-off-by: JartX <sagformas@epdcenter.es>
---
 vllm/v1/worker/gpu_model_runner.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index 9460d91c5832..3ea39dc519d8 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -1337,9 +1337,10 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
         out_indices = []
 
         # Reorder the bitmask to match the order of the requests in the batch.
-        sorted_bitmask = np.zeros_like(grammar_bitmask,
-                                       shape=(logits.shape[0],
-                                              grammar_bitmask.shape[1]))
+        sorted_bitmask = np.full(shape=(logits.shape[0],
+                                        grammar_bitmask.shape[1]),
+                                 fill_value=-1,
+                                 dtype=grammar_bitmask.dtype)
         cumulative_index = 0
         seq = sorted(scheduler_output.structured_output_request_ids.items(),
                      key=lambda x: x[1])