[FIXBUG] Correctly Apply Grammar Bitmask in Mixed Batches (#22896)

Signed-off-by: JartX <sagformas@epdcenter.es>
This commit is contained in:
JartX 2025-08-15 19:42:49 +02:00 committed by GitHub
parent 6b04039a72
commit 68af77e51c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1337,9 +1337,10 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
out_indices = []
# Reorder the bitmask to match the order of the requests in the batch.
sorted_bitmask = np.zeros_like(grammar_bitmask,
shape=(logits.shape[0],
grammar_bitmask.shape[1]))
sorted_bitmask = np.full(shape=(logits.shape[0],
grammar_bitmask.shape[1]),
fill_value=-1,
dtype=grammar_bitmask.dtype)
cumulative_index = 0
seq = sorted(scheduler_output.structured_output_request_ids.items(),
key=lambda x: x[1])