mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 05:25:00 +08:00
Signed-off-by: Lu Fang <fanglu@fb.com>
This commit is contained in:
parent
138f0d1e75
commit
b716ab93a7
@ -578,8 +578,10 @@ class Scheduler(SchedulerInterface):
|
||||
scheduled_spec_decode_tokens,
|
||||
req_to_new_blocks,
|
||||
)
|
||||
scheduled_requests = (scheduled_new_reqs + scheduled_running_reqs +
|
||||
scheduled_resumed_reqs)
|
||||
structured_output_request_ids, grammar_bitmask = (
|
||||
self.get_grammar_bitmask(self.running,
|
||||
self.get_grammar_bitmask(scheduled_requests,
|
||||
scheduled_spec_decode_tokens))
|
||||
scheduler_output = SchedulerOutput(
|
||||
scheduled_new_reqs=new_reqs_data,
|
||||
|
||||
@ -90,13 +90,14 @@ def apply_grammar_bitmask(
|
||||
seq = sorted(scheduler_output.structured_output_request_ids.items(),
|
||||
key=lambda x: x[1])
|
||||
for req_id, _ in seq:
|
||||
logit_index = struct_out_req_batch_indices[req_id]
|
||||
num_spec_tokens = len(
|
||||
scheduler_output.scheduled_spec_decode_tokens.get(req_id, []))
|
||||
for i in range(1 + num_spec_tokens):
|
||||
sorted_bitmask[logit_index + i] = \
|
||||
grammar_bitmask[cumulative_index + i]
|
||||
out_indices.append(logit_index + i)
|
||||
if req_id in struct_out_req_batch_indices:
|
||||
logit_index = struct_out_req_batch_indices[req_id]
|
||||
for i in range(1 + num_spec_tokens):
|
||||
sorted_bitmask[logit_index + i] = \
|
||||
grammar_bitmask[cumulative_index + i]
|
||||
out_indices.append(logit_index + i)
|
||||
cumulative_index += 1 + num_spec_tokens
|
||||
grammar_bitmask = sorted_bitmask
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user