mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 12:25:45 +08:00
[Bugfix]: Fix the incompatibility issue with Structured Outputs when Thinking is disabled (#18879)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
This commit is contained in:
parent
1e123529d7
commit
ba5111f237
@ -149,31 +149,37 @@ class StructuredOutputManager:
|
|||||||
# NOTE: This outer loop can likely be parallelized to improve
|
# NOTE: This outer loop can likely be parallelized to improve
|
||||||
# performance of bitmask generation for large batches.
|
# performance of bitmask generation for large batches.
|
||||||
for req_id, _ in ordered_seq:
|
for req_id, _ in ordered_seq:
|
||||||
request = requests[req_id].structured_output_request
|
request = requests[req_id]
|
||||||
if TYPE_CHECKING:
|
structured_output_request = request.structured_output_request
|
||||||
assert request is not None
|
|
||||||
assert request.grammar is not None
|
|
||||||
|
|
||||||
apply_bitmask = (
|
if TYPE_CHECKING:
|
||||||
request.reasoning_ended if self.reasoner is not None else True
|
assert structured_output_request is not None
|
||||||
) # noqa: E501
|
assert structured_output_request.grammar is not None
|
||||||
|
apply_bitmask: bool = True
|
||||||
|
if self.reasoner is not None:
|
||||||
|
if structured_output_request.reasoning_ended is None:
|
||||||
|
structured_output_request.reasoning_ended = \
|
||||||
|
self.reasoner.is_reasoning_end(request.prompt_token_ids)
|
||||||
|
apply_bitmask = structured_output_request.reasoning_ended
|
||||||
|
|
||||||
state_advancements = 0
|
state_advancements = 0
|
||||||
req_tokens = scheduled_spec_decode_tokens.get(req_id, []) + [None]
|
req_tokens = scheduled_spec_decode_tokens.get(req_id, []) + [None]
|
||||||
for i, token in enumerate(req_tokens):
|
for i, token in enumerate(req_tokens):
|
||||||
if apply_bitmask and not request.grammar.is_terminated():
|
if apply_bitmask and not \
|
||||||
request.grammar.fill_bitmask(bitmask_tensor,
|
structured_output_request.grammar.is_terminated():
|
||||||
cumulative_index)
|
structured_output_request.grammar.fill_bitmask(
|
||||||
|
bitmask_tensor, cumulative_index)
|
||||||
if token is not None:
|
if token is not None:
|
||||||
# In order to generate the correct bitmask for each
|
# In order to generate the correct bitmask for each
|
||||||
# position in the speculative sequence, we advance
|
# position in the speculative sequence, we advance
|
||||||
# the FSM state for each speculative token and rollback
|
# the FSM state for each speculative token and rollback
|
||||||
# to restore the previous state when we are finished.
|
# to restore the previous state when we are finished.
|
||||||
assert request.grammar.accept_tokens(req_id, [token])
|
assert structured_output_request.grammar.accept_tokens(
|
||||||
|
req_id, [token])
|
||||||
state_advancements += 1
|
state_advancements += 1
|
||||||
cumulative_index += 1
|
cumulative_index += 1
|
||||||
if state_advancements > 0:
|
if state_advancements > 0:
|
||||||
request.grammar.rollback(state_advancements)
|
structured_output_request.grammar.rollback(state_advancements)
|
||||||
|
|
||||||
if cumulative_index < bitmask_tensor.shape[0]:
|
if cumulative_index < bitmask_tensor.shape[0]:
|
||||||
bitmask_tensor = bitmask_tensor[:cumulative_index]
|
bitmask_tensor = bitmask_tensor[:cumulative_index]
|
||||||
|
|||||||
@ -20,7 +20,7 @@ class StructuredOutputRequest:
|
|||||||
sampling_params: SamplingParams
|
sampling_params: SamplingParams
|
||||||
_grammar: Optional[Union[Future[StructuredOutputGrammar],
|
_grammar: Optional[Union[Future[StructuredOutputGrammar],
|
||||||
StructuredOutputGrammar]] = None
|
StructuredOutputGrammar]] = None
|
||||||
reasoning_ended: bool = False
|
reasoning_ended: Optional[bool] = None
|
||||||
|
|
||||||
def _check_grammar_completion(self) -> bool:
|
def _check_grammar_completion(self) -> bool:
|
||||||
# NOTE: We have to lazy import to gate circular imports
|
# NOTE: We have to lazy import to gate circular imports
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user