From 46c759c165a5a985ce62f019bf684e4a6109e41c Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Wed, 19 Mar 2025 00:40:29 +0800 Subject: [PATCH] [Bugfix] Fix LoRA extra vocab size (#15047) Signed-off-by: Jee Jee Li --- examples/offline_inference/audio_language.py | 1 - examples/offline_inference/vision_language.py | 1 - examples/offline_inference/vision_language_multi_image.py | 1 - tests/models/decoder_only/vision_language/test_phi4mm.py | 1 - vllm/config.py | 2 +- 5 files changed, 1 insertion(+), 5 deletions(-) diff --git a/examples/offline_inference/audio_language.py b/examples/offline_inference/audio_language.py index 02dbdcb64232f..840892ea07010 100644 --- a/examples/offline_inference/audio_language.py +++ b/examples/offline_inference/audio_language.py @@ -93,7 +93,6 @@ def run_phi4mm(question: str, audio_count: int) -> ModelRequestData: max_num_seqs=2, enable_lora=True, max_lora_rank=320, - lora_extra_vocab_size=0, limit_mm_per_prompt={"audio": audio_count}, ) diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py index 58fd5e53bf8dc..3849bd37a8290 100644 --- a/examples/offline_inference/vision_language.py +++ b/examples/offline_inference/vision_language.py @@ -682,7 +682,6 @@ def run_phi4mm(questions: list[str], modality: str) -> ModelRequestData: max_num_seqs=2, enable_lora=True, max_lora_rank=320, - lora_extra_vocab_size=0, ) return ModelRequestData( diff --git a/examples/offline_inference/vision_language_multi_image.py b/examples/offline_inference/vision_language_multi_image.py index c110f96669e8c..3a17e5bab0931 100644 --- a/examples/offline_inference/vision_language_multi_image.py +++ b/examples/offline_inference/vision_language_multi_image.py @@ -342,7 +342,6 @@ def load_phi4mm(question: str, image_urls: list[str]) -> ModelRequestData: limit_mm_per_prompt={"image": len(image_urls)}, enable_lora=True, max_lora_rank=320, - lora_extra_vocab_size=0, ) placeholders = "".join(f"<|image_{i}|>" diff --git a/tests/models/decoder_only/vision_language/test_phi4mm.py b/tests/models/decoder_only/vision_language/test_phi4mm.py index fb69beaf77598..c3e88b60978a0 100644 --- a/tests/models/decoder_only/vision_language/test_phi4mm.py +++ b/tests/models/decoder_only/vision_language/test_phi4mm.py @@ -100,7 +100,6 @@ def run_test( distributed_executor_backend=distributed_executor_backend, enable_lora=True, max_lora_rank=320, - lora_extra_vocab_size=0, gpu_memory_utilization=0.8, # set to 0.8 to avoid OOM in CI enforce_eager=True, ) as vllm_model: diff --git a/vllm/config.py b/vllm/config.py index e83a8eeee829e..c510677d64eae 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -2324,7 +2324,7 @@ class LoRAConfig: # Setting the maximum rank to 512 should be able to satisfy the vast # majority of applications. possible_max_ranks = (8, 16, 32, 64, 128, 256, 320, 512) - possible_lora_extra_vocab_size = (0, 256, 512) + possible_lora_extra_vocab_size = (256, 512) if self.max_lora_rank not in possible_max_ranks: raise ValueError( f"max_lora_rank ({self.max_lora_rank}) must be one of "