mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-24 05:11:26 +08:00
[Core] Get num_encoder_tokens from scheduler config (#24989)
Signed-off-by: Russell Bryant <rbryant@redhat.com>
This commit is contained in:
parent
ea3de5ef0d
commit
58d4c705a8
@ -465,9 +465,8 @@ class Scheduler(SchedulerInterface):
|
||||
in self.vllm_config.model_config.model.lower()), (
|
||||
"Whisper is the only supported "
|
||||
"encoder-decoder model.")
|
||||
num_encoder_tokens = MULTIMODAL_REGISTRY.\
|
||||
get_encdec_max_encoder_len(
|
||||
self.vllm_config.model_config)
|
||||
num_encoder_tokens =\
|
||||
self.scheduler_config.max_num_encoder_input_tokens
|
||||
else:
|
||||
num_encoder_tokens = 0
|
||||
|
||||
|
||||
@ -11,7 +11,6 @@ from typing_extensions import Self
|
||||
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.logger import init_logger
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.utils import cdiv, get_dtype_size
|
||||
|
||||
logger = init_logger(__name__)
|
||||
@ -230,8 +229,8 @@ class CrossAttentionSpec(AttentionSpec):
|
||||
def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int:
|
||||
# For cross-attention, we need to cache encoder states
|
||||
# Get encoder length (e.g., 1500 for Whisper).
|
||||
max_encoder_len = MULTIMODAL_REGISTRY.\
|
||||
get_encdec_max_encoder_len(vllm_config.model_config)
|
||||
max_encoder_len = vllm_config.scheduler_config.\
|
||||
max_num_encoder_input_tokens
|
||||
return cdiv(max_encoder_len, self.block_size) * self.page_size_bytes
|
||||
|
||||
|
||||
|
||||
@ -234,8 +234,8 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
|
||||
if self.model_config.is_encoder_decoder:
|
||||
# Maximum length of the encoder input, only for encoder-decoder
|
||||
# models.
|
||||
self.max_encoder_len = self.mm_registry.\
|
||||
get_encdec_max_encoder_len(model_config)
|
||||
self.max_encoder_len = scheduler_config.\
|
||||
max_num_encoder_input_tokens
|
||||
else:
|
||||
self.max_encoder_len = 0
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user