mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 07:54:57 +08:00
[Fix] Fix a condition for ignored sequences (#867)
This commit is contained in:
parent
4b6f069b6f
commit
d2b2eed67c
@ -64,6 +64,9 @@ class Scheduler:
|
|||||||
self.scheduler_config = scheduler_config
|
self.scheduler_config = scheduler_config
|
||||||
self.cache_config = cache_config
|
self.cache_config = cache_config
|
||||||
|
|
||||||
|
self.prompt_limit = min(self.scheduler_config.max_model_len,
|
||||||
|
self.scheduler_config.max_num_batched_tokens)
|
||||||
|
|
||||||
# Instantiate the scheduling policy.
|
# Instantiate the scheduling policy.
|
||||||
self.policy = PolicyFactory.get_policy(policy_name="fcfs")
|
self.policy = PolicyFactory.get_policy(policy_name="fcfs")
|
||||||
# Create the block space manager.
|
# Create the block space manager.
|
||||||
@ -123,18 +126,15 @@ class Scheduler:
|
|||||||
seq_group = self.waiting[0]
|
seq_group = self.waiting[0]
|
||||||
|
|
||||||
num_prompt_tokens = seq_group.get_seqs()[0].get_len()
|
num_prompt_tokens = seq_group.get_seqs()[0].get_len()
|
||||||
prompt_limit = min(
|
if num_prompt_tokens > self.prompt_limit:
|
||||||
self.scheduler_config.max_model_len,
|
|
||||||
self.scheduler_config.max_num_batched_tokens)
|
|
||||||
if num_prompt_tokens > prompt_limit:
|
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Input prompt ({num_prompt_tokens} tokens) is too long"
|
f"Input prompt ({num_prompt_tokens} tokens) is too long"
|
||||||
f" and exceeds limit of {prompt_limit}")
|
f" and exceeds limit of {self.prompt_limit}")
|
||||||
for seq in seq_group.get_seqs():
|
for seq in seq_group.get_seqs():
|
||||||
seq.status = SequenceStatus.FINISHED_IGNORED
|
seq.status = SequenceStatus.FINISHED_IGNORED
|
||||||
ignored_seq_groups.append(seq_group)
|
ignored_seq_groups.append(seq_group)
|
||||||
self.waiting.pop(0)
|
self.waiting.pop(0)
|
||||||
break
|
continue
|
||||||
|
|
||||||
# If the sequence group cannot be allocated, stop.
|
# If the sequence group cannot be allocated, stop.
|
||||||
if not self.block_manager.can_allocate(seq_group):
|
if not self.block_manager.can_allocate(seq_group):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user