mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-31 15:17:10 +08:00
Fix Whisper crash caused by invalid`` max_num_batched_tokens`` config (#17853)
Signed-off-by: inkcherry <mingzhi.liu@intel.com>
This commit is contained in:
parent
6e4a93e3f7
commit
5b2dcbf0b8
@ -2050,6 +2050,13 @@ class SchedulerConfig:
|
|||||||
_MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS,
|
_MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# When using default settings,
|
||||||
|
# Ensure max_num_batched_tokens does not exceed model limit.
|
||||||
|
# Some models (e.g., Whisper) have embeddings tied to max length.
|
||||||
|
self.max_num_batched_tokens = min(
|
||||||
|
self.max_num_seqs * self.max_model_len,
|
||||||
|
self.max_num_batched_tokens)
|
||||||
|
|
||||||
self.max_num_encoder_input_tokens = self.max_num_batched_tokens
|
self.max_num_encoder_input_tokens = self.max_num_batched_tokens
|
||||||
self.encoder_cache_size = self.max_num_batched_tokens
|
self.encoder_cache_size = self.max_num_batched_tokens
|
||||||
|
|
||||||
@ -2090,6 +2097,13 @@ class SchedulerConfig:
|
|||||||
"be greater than or equal to max_num_seqs "
|
"be greater than or equal to max_num_seqs "
|
||||||
f"({self.max_num_seqs}).")
|
f"({self.max_num_seqs}).")
|
||||||
|
|
||||||
|
if self.max_num_batched_tokens > self.max_num_seqs * self.max_model_len:
|
||||||
|
logger.warning(
|
||||||
|
"max_num_batched_tokens (%d) exceeds max_num_seqs"
|
||||||
|
"* max_model_len (%d). This may lead to unexpected behavior.",
|
||||||
|
self.max_num_batched_tokens,
|
||||||
|
self.max_num_seqs * self.max_model_len)
|
||||||
|
|
||||||
if self.num_lookahead_slots < 0:
|
if self.num_lookahead_slots < 0:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"num_lookahead_slots "
|
"num_lookahead_slots "
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user