[Ultravox] Use wrapped_model_config to instantiate inner model (#24679)

Signed-off-by: Peter Salas <peter@fixie.ai>
This commit is contained in:
Peter Salas 2025-09-11 11:52:24 -07:00 committed by GitHub
parent bb2b5126da
commit 127ded0a9e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 5 additions and 6 deletions

View File

@ -418,7 +418,7 @@ class UltravoxModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA):
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
config = vllm_config.model_config.hf_config
config: UltravoxConfig = vllm_config.model_config.hf_config
multimodal_config = vllm_config.model_config.multimodal_config
self.config = config
self.multi_modal_config = multimodal_config
@ -438,7 +438,7 @@ class UltravoxModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA):
self.multi_modal_projector = UltravoxProjector(config)
self.language_model = init_vllm_registered_model(
vllm_config=vllm_config,
hf_config=config.text_config,
hf_config=config.wrapped_model_config,
prefix=maybe_prefix(prefix, "language_model"),
)
if config.text_model_id is not None:

View File

@ -46,7 +46,7 @@ class UltravoxConfig(transformers.PretrainedConfig):
projector or at the end. Versions v0.4.1 and below
use `False`, but v0.5 and above use `True`.
"""
wrapped_model_config: transformers.PretrainedConfig
model_type = "ultravox"
audio_token = "<|audio|>"
is_composition = False
@ -113,9 +113,8 @@ class UltravoxConfig(transformers.PretrainedConfig):
return super().__setattr__(key, value)
@property
def text_config(self) -> Optional[transformers.PretrainedConfig]:
def text_config(self) -> transformers.PretrainedConfig:
# When Ultravox wraps a multi-modal model (e.g. Gemma), we instantiate
# the full model, but the text config is the text config of the inner
# model.
return (self.wrapped_model_config.get_text_config()
if self.wrapped_model_config else None)
return self.wrapped_model_config.get_text_config()