[FIXBUG] Correctly Apply Grammar Bitmask in Mixed Batches (#22896)

Signed-off-by: JartX <sagformas@epdcenter.es>
2025-12-10 23:35:52 +08:00 · 2025-08-15 19:42:49 +02:00 · 2025-08-15 19:42:49 +02:00 · 68af77e51c
commit 68af77e51c
parent 6b04039a72
1 changed files with 4 additions and 3 deletions
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@ -1337,9 +1337,10 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
        out_indices = []
        # Reorder the bitmask to match the order of the requests in the batch.
-        sorted_bitmask = np.zeros_like(grammar_bitmask,
+        sorted_bitmask = np.full(shape=(logits.shape[0],
-                                       shape=(logits.shape[0],
+                                        grammar_bitmask.shape[1]),
-                                              grammar_bitmask.shape[1]))
+                                 fill_value=-1,
                                 dtype=grammar_bitmask.dtype)
        cumulative_index = 0
        seq = sorted(scheduler_output.structured_output_request_ids.items(),
                     key=lambda x: x[1])