mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 00:15:01 +08:00
[Structured Outputs] [Bug] Fix misalignment in apply_grammar_bitmask causing unintended masking and NaN logits (#22963)
Signed-off-by: rishitdholakia13 <rishit+github@cohere.com>
This commit is contained in:
parent
76144adf76
commit
1fc375dc05
@ -1355,10 +1355,10 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
|||||||
cumulative_index += 1 + num_spec_tokens
|
cumulative_index += 1 + num_spec_tokens
|
||||||
grammar_bitmask = sorted_bitmask
|
grammar_bitmask = sorted_bitmask
|
||||||
|
|
||||||
# If the grammar bitmask and the logits have the same shape
|
# If the length of out indices and the logits have the same shape
|
||||||
# we don't need to pass indices to the kernel,
|
# we don't need to pass indices to the kernel,
|
||||||
# since the bitmask is already aligned with the logits.
|
# since the bitmask is already aligned with the logits.
|
||||||
skip_out_indices = grammar_bitmask.shape[0] == logits.shape[0]
|
skip_out_indices = len(out_indices) == logits.shape[0]
|
||||||
|
|
||||||
# Serialization of np.ndarray is much more efficient than a tensor,
|
# Serialization of np.ndarray is much more efficient than a tensor,
|
||||||
# so we receive it in that format.
|
# so we receive it in that format.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user