From 5b2dcbf0b8dd9ee9199d7496c84e84c010122a00 Mon Sep 17 00:00:00 2001 From: inkcherry Date: Fri, 9 May 2025 17:16:26 +0800 Subject: [PATCH] Fix Whisper crash caused by invalid``` max_num_batched_tokens``` config (#17853) Signed-off-by: inkcherry --- vllm/config.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/vllm/config.py b/vllm/config.py index fca2865f85d54..ac1dc960ccbe2 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -2050,6 +2050,13 @@ class SchedulerConfig: _MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS, ) + # When using default settings, + # Ensure max_num_batched_tokens does not exceed model limit. + # Some models (e.g., Whisper) have embeddings tied to max length. + self.max_num_batched_tokens = min( + self.max_num_seqs * self.max_model_len, + self.max_num_batched_tokens) + self.max_num_encoder_input_tokens = self.max_num_batched_tokens self.encoder_cache_size = self.max_num_batched_tokens @@ -2090,6 +2097,13 @@ class SchedulerConfig: "be greater than or equal to max_num_seqs " f"({self.max_num_seqs}).") + if self.max_num_batched_tokens > self.max_num_seqs * self.max_model_len: + logger.warning( + "max_num_batched_tokens (%d) exceeds max_num_seqs" + "* max_model_len (%d). This may lead to unexpected behavior.", + self.max_num_batched_tokens, + self.max_num_seqs * self.max_model_len) + if self.num_lookahead_slots < 0: raise ValueError( "num_lookahead_slots "