mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 07:24:56 +08:00
[FIXBUG] Correctly Apply Grammar Bitmask in Mixed Batches (#22896)
Signed-off-by: JartX <sagformas@epdcenter.es>
This commit is contained in:
parent
6b04039a72
commit
68af77e51c
@ -1337,9 +1337,10 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
out_indices = []
|
||||
|
||||
# Reorder the bitmask to match the order of the requests in the batch.
|
||||
sorted_bitmask = np.zeros_like(grammar_bitmask,
|
||||
shape=(logits.shape[0],
|
||||
grammar_bitmask.shape[1]))
|
||||
sorted_bitmask = np.full(shape=(logits.shape[0],
|
||||
grammar_bitmask.shape[1]),
|
||||
fill_value=-1,
|
||||
dtype=grammar_bitmask.dtype)
|
||||
cumulative_index = 0
|
||||
seq = sorted(scheduler_output.structured_output_request_ids.items(),
|
||||
key=lambda x: x[1])
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user