mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-28 20:20:21 +08:00
qwen2.5 & 3 vl fixes and tests
Signed-off-by: prashanth058 <prashanth.dannamaneni@uipath.com>
This commit is contained in:
parent
a3647878c2
commit
81573635da
@ -240,6 +240,16 @@ def qwen2vl_vision_tower_lora_files():
|
||||
return snapshot_download(repo_id="prashanth058/qwen2vl-flickr-lora-tower")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def qwen25vl_vision_lora_files():
|
||||
return snapshot_download(repo_id="prashanth058/qwen2.5-3b-vl-flickr-lora-vision")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def qwen3vl_vision_lora_files():
|
||||
return snapshot_download(repo_id="prashanth058/qwen3-4b-vl-lora-vision-connector")
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def tinyllama_lora_files():
|
||||
return snapshot_download(repo_id="jashing/tinyllama-colorist-lora")
|
||||
|
||||
@ -14,8 +14,9 @@ class TestConfig:
|
||||
lora_path: str
|
||||
max_num_seqs: int = 2
|
||||
max_loras: int = 2
|
||||
max_lora_rank: int = 16
|
||||
max_model_len: int = 4096
|
||||
max_lora_rank: int = 32
|
||||
max_model_len: int = 8192
|
||||
gpu_memory_utilization: float = 0.85
|
||||
mm_processor_kwargs: dict[str, int] | None = None
|
||||
|
||||
def __post_init__(self):
|
||||
@ -49,6 +50,7 @@ class Qwen2VLTester:
|
||||
max_loras=self.config.max_loras,
|
||||
max_lora_rank=self.config.max_lora_rank,
|
||||
trust_remote_code=True,
|
||||
gpu_memory_utilization=self.config.gpu_memory_utilization,
|
||||
mm_processor_kwargs=self.config.mm_processor_kwargs,
|
||||
max_model_len=self.config.max_model_len,
|
||||
)
|
||||
@ -142,6 +144,16 @@ EXPECTED_OUTPUTS_VISION_NO_CONNECTOR = [
|
||||
"A closeup shot of the Tokyo Skytree with pink flowers in the foreground.",
|
||||
]
|
||||
|
||||
EXPECTED_OUTPUTS_VISION_QWEN2_5_VL = [
|
||||
"A black car is driving past a stop sign and a large red and gold arch.",
|
||||
"A view of the Tokyo Skytree through the branches of a cherry blossom tree.",
|
||||
]
|
||||
|
||||
EXPECTED_OUTPUTS_VISION_QWEN3_VL = [
|
||||
"A black SUV drives past a stop sign in front of a Chinese gate.",
|
||||
"A white tower is seen through pink flowers.",
|
||||
]
|
||||
|
||||
# NOTE - beam search .text contains the whole text
|
||||
EXPECTED_BEAM_SEARCH_OUTPUTS = [
|
||||
[
|
||||
@ -152,6 +164,7 @@ EXPECTED_BEAM_SEARCH_OUTPUTS = [
|
||||
|
||||
QWEN2VL_MODEL_PATH = "Qwen/Qwen2-VL-2B-Instruct"
|
||||
QWEN25VL_MODEL_PATH = "Qwen/Qwen2.5-VL-3B-Instruct"
|
||||
QWEN3VL_MODEL_PATH = "Qwen/Qwen3-VL-4B-Instruct"
|
||||
|
||||
|
||||
def test_qwen2vl_lora(qwen2vl_lora_files):
|
||||
@ -192,10 +205,6 @@ def test_qwen25vl_lora(qwen25vl_lora_files):
|
||||
tester.run_test(TEST_IMAGES, expected_outputs=EXPECTED_OUTPUTS, lora_id=lora_id)
|
||||
|
||||
|
||||
@pytest.mark.xfail(
|
||||
current_platform.is_rocm(),
|
||||
reason="Qwen2-VL dependency xformers incompatible with ROCm",
|
||||
)
|
||||
def test_qwen2vl_language_lora(qwen2vl_language_lora_files):
|
||||
"""
|
||||
Test language-only LoRA adapter.
|
||||
@ -210,10 +219,6 @@ def test_qwen2vl_language_lora(qwen2vl_language_lora_files):
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.xfail(
|
||||
current_platform.is_rocm(),
|
||||
reason="Qwen2-VL dependency xformers incompatible with ROCm",
|
||||
)
|
||||
def test_qwen2vl_vision_lora(qwen2vl_vision_tower_connector_lora_files):
|
||||
"""
|
||||
Test vision tower + connector LoRA adapter.
|
||||
@ -229,10 +234,6 @@ def test_qwen2vl_vision_lora(qwen2vl_vision_tower_connector_lora_files):
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.xfail(
|
||||
current_platform.is_rocm(),
|
||||
reason="Qwen2-VL dependency xformers incompatible with ROCm",
|
||||
)
|
||||
def test_qwen2vl_vision_no_connector_lora(
|
||||
qwen2vl_vision_tower_lora_files,
|
||||
):
|
||||
@ -251,3 +252,31 @@ def test_qwen2vl_vision_no_connector_lora(
|
||||
expected_outputs=EXPECTED_OUTPUTS_VISION_NO_CONNECTOR,
|
||||
lora_id=lora_id,
|
||||
)
|
||||
|
||||
|
||||
def test_qwen25vl_vision_lora(qwen25vl_vision_lora_files):
|
||||
config = TestConfig(
|
||||
model_path=QWEN25VL_MODEL_PATH,
|
||||
lora_path=qwen25vl_vision_lora_files,
|
||||
)
|
||||
tester = Qwen2VLTester(config)
|
||||
for lora_id in [1, 2]:
|
||||
tester.run_test(
|
||||
TEST_IMAGES,
|
||||
expected_outputs=EXPECTED_OUTPUTS_VISION_QWEN2_5_VL,
|
||||
lora_id=lora_id,
|
||||
)
|
||||
|
||||
|
||||
def test_qwen3vl_vision_lora(qwen3vl_vision_lora_files):
|
||||
config = TestConfig(
|
||||
model_path=QWEN3VL_MODEL_PATH,
|
||||
lora_path=qwen3vl_vision_lora_files,
|
||||
)
|
||||
tester = Qwen2VLTester(config)
|
||||
for lora_id in [1, 2]:
|
||||
tester.run_test(
|
||||
TEST_IMAGES,
|
||||
expected_outputs=EXPECTED_OUTPUTS_VISION_QWEN3_VL,
|
||||
lora_id=lora_id,
|
||||
)
|
||||
@ -340,7 +340,12 @@ class QKVParallelLinearWithLoRA(ColumnParallelLinearWithLoRA):
|
||||
packed_modules_list: list,
|
||||
model_config: PretrainedConfig | None,
|
||||
) -> bool:
|
||||
return type(source_layer) is QKVParallelLinear and len(packed_modules_list) == 1
|
||||
# Vision tower QKV has packed_modules_list=[] (already packed in checkpoint)
|
||||
# Language models have packed_modules_list=[module_name]
|
||||
# (single LoRA for qkv_proj)
|
||||
return type(source_layer) is QKVParallelLinear and (
|
||||
len(packed_modules_list) <= 1
|
||||
)
|
||||
|
||||
|
||||
class MergedQKVParallelLinearWithLoRA(MergedColumnParallelLinearWithLoRA):
|
||||
|
||||
@ -562,10 +562,12 @@ class LoRAModelManager:
|
||||
target_wrapper = self.punica_wrapper
|
||||
|
||||
if self.supports_mm_lora:
|
||||
if mapping.type == LoRAMappingType.TOWER:
|
||||
if mapping.type == LoRAMappingType.TOWER and self.mm_mapping.tower_model:
|
||||
target_name = self.mm_mapping.tower_model[0]
|
||||
target_wrapper = self.mm_punica_wrapper_mapping[target_name]
|
||||
elif mapping.type == LoRAMappingType.CONNECTOR:
|
||||
elif (
|
||||
mapping.type == LoRAMappingType.CONNECTOR and self.mm_mapping.connector
|
||||
):
|
||||
target_name = self.mm_mapping.connector[0]
|
||||
target_wrapper = self.mm_punica_wrapper_mapping[target_name]
|
||||
else:
|
||||
|
||||
@ -1675,6 +1675,6 @@ class Qwen3VLForConditionalGeneration(
|
||||
"""
|
||||
return MultiModelKeys.from_string_field(
|
||||
language_model="language_model",
|
||||
connector="visual.merger",
|
||||
connector=["visual.merger", "visual.deepstack_merger_list"],
|
||||
tower_model="visual.",
|
||||
)
|
||||
|
||||
@ -2075,7 +2075,9 @@ class GPUModelRunner(
|
||||
req_idx = self.input_batch.req_id_to_index[req_id]
|
||||
lora_id = int(self.input_batch.request_lora_mapping[req_idx])
|
||||
|
||||
num_tokens = self.info.get_num_mm_encoder_tokens(pos_info.length)
|
||||
num_tokens = self.info.get_num_mm_encoder_tokens( # type: ignore[attr-defined]
|
||||
pos_info.length
|
||||
)
|
||||
prompt_lora_mapping.append(lora_id)
|
||||
token_lora_mapping.extend([lora_id] * num_tokens)
|
||||
|
||||
@ -2095,16 +2097,18 @@ class GPUModelRunner(
|
||||
if hasattr(self.info, "get_num_mm_connector_tokens"):
|
||||
num_post_op_tokens = []
|
||||
for _, pos_info in mm_hashes_pos:
|
||||
mm_token_count = self.info.get_num_mm_encoder_tokens(
|
||||
mm_token_count = self.info.get_num_mm_encoder_tokens( # type: ignore[attr-defined]
|
||||
pos_info.length
|
||||
)
|
||||
post_op_count = self.info.get_num_mm_connector_tokens(
|
||||
post_op_count = self.info.get_num_mm_connector_tokens( # type: ignore[attr-defined]
|
||||
mm_token_count
|
||||
)
|
||||
num_post_op_tokens.append(post_op_count)
|
||||
|
||||
last_mapping = self.lora_manager._adapter_manager._last_mapping
|
||||
assert last_mapping is not None
|
||||
lora_ids = np.array(
|
||||
self.lora_manager._adapter_manager._last_mapping.prompt_mapping,
|
||||
last_mapping.prompt_mapping,
|
||||
dtype=np.int32,
|
||||
)
|
||||
post_op_counts_np = np.array(num_post_op_tokens, dtype=np.int32)
|
||||
@ -2112,8 +2116,8 @@ class GPUModelRunner(
|
||||
|
||||
connector_mapping = LoRAMapping(
|
||||
index_mapping=tuple(new_token_indices.tolist()),
|
||||
prompt_mapping=self.lora_manager._adapter_manager._last_mapping.prompt_mapping,
|
||||
is_prefill=self.lora_manager._adapter_manager._last_mapping.is_prefill,
|
||||
prompt_mapping=last_mapping.prompt_mapping,
|
||||
is_prefill=last_mapping.is_prefill,
|
||||
type=LoRAMappingType.CONNECTOR,
|
||||
)
|
||||
|
||||
|
||||
@ -33,7 +33,7 @@ class LoRAModelRunnerMixin:
|
||||
model: nn.Module,
|
||||
vllm_config: VllmConfig,
|
||||
device: torch.device,
|
||||
model_config: ModelConfig = None,
|
||||
model_config: ModelConfig | None = None,
|
||||
) -> nn.Module:
|
||||
if not supports_lora(model):
|
||||
raise ValueError(f"{model.__class__.__name__} does not support LoRA yet.")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user