mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 23:35:52 +08:00
[FIXBUG] Correctly Apply Grammar Bitmask in Mixed Batches (#22896)
Signed-off-by: JartX <sagformas@epdcenter.es>
This commit is contained in:
parent
6b04039a72
commit
68af77e51c
@ -1337,9 +1337,10 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
out_indices = []
|
out_indices = []
|
||||||
|
|
||||||
# Reorder the bitmask to match the order of the requests in the batch.
|
# Reorder the bitmask to match the order of the requests in the batch.
|
||||||
sorted_bitmask = np.zeros_like(grammar_bitmask,
|
sorted_bitmask = np.full(shape=(logits.shape[0],
|
||||||
shape=(logits.shape[0],
|
grammar_bitmask.shape[1]),
|
||||||
grammar_bitmask.shape[1]))
|
fill_value=-1,
|
||||||
|
dtype=grammar_bitmask.dtype)
|
||||||
cumulative_index = 0
|
cumulative_index = 0
|
||||||
seq = sorted(scheduler_output.structured_output_request_ids.items(),
|
seq = sorted(scheduler_output.structured_output_request_ids.items(),
|
||||||
key=lambda x: x[1])
|
key=lambda x: x[1])
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user