mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 09:25:44 +08:00
[CI/Build] Update VLM common tests (#22841)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
9bd9294f0e
commit
c9232d41f4
@ -561,7 +561,7 @@ VLM_TEST_SETTINGS = {
|
|||||||
get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']), # noqa: E501
|
get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']), # noqa: E501
|
||||||
hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
|
hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
|
||||||
patch_hf_runner=model_utils.minicpmo_26_patch_hf_runner,
|
patch_hf_runner=model_utils.minicpmo_26_patch_hf_runner,
|
||||||
# FIXME: https://huggingface.co/openbmb/MiniCPM-V-2_6/discussions/55
|
# FIXME: https://huggingface.co/openbmb/MiniCPM-o-2_6/discussions/49
|
||||||
marks=[pytest.mark.skip("HF import fails")],
|
marks=[pytest.mark.skip("HF import fails")],
|
||||||
),
|
),
|
||||||
"minicpmv_26": VLMTestInfo(
|
"minicpmv_26": VLMTestInfo(
|
||||||
@ -574,8 +574,6 @@ VLM_TEST_SETTINGS = {
|
|||||||
get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']), # noqa: E501
|
get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(['<|im_end|>', '<|endoftext|>']), # noqa: E501
|
||||||
hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
|
hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
|
||||||
patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner,
|
patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner,
|
||||||
# FIXME: https://huggingface.co/openbmb/MiniCPM-V-2_6/discussions/55
|
|
||||||
marks=[pytest.mark.skip("HF import fails")],
|
|
||||||
),
|
),
|
||||||
"minimax_vl_01": VLMTestInfo(
|
"minimax_vl_01": VLMTestInfo(
|
||||||
models=["MiniMaxAI/MiniMax-VL-01"],
|
models=["MiniMaxAI/MiniMax-VL-01"],
|
||||||
@ -611,18 +609,6 @@ VLM_TEST_SETTINGS = {
|
|||||||
patch_hf_runner=model_utils.ovis_patch_hf_runner,
|
patch_hf_runner=model_utils.ovis_patch_hf_runner,
|
||||||
marks=[large_gpu_mark(min_gb=32)],
|
marks=[large_gpu_mark(min_gb=32)],
|
||||||
),
|
),
|
||||||
"ovis1_6": VLMTestInfo(
|
|
||||||
models=["AIDC-AI/Ovis1.6-Llama3.2-3B"],
|
|
||||||
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
|
|
||||||
prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful and honest multimodal assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", # noqa: E501
|
|
||||||
img_idx_to_prompt=lambda idx: "<image>\n", # noqa: E501
|
|
||||||
max_model_len=4096,
|
|
||||||
max_num_seqs=2,
|
|
||||||
dtype="half",
|
|
||||||
# use sdpa mode for hf runner since ovis2 didn't work with flash_attn
|
|
||||||
hf_model_kwargs={"llm_attn_implementation": "sdpa"},
|
|
||||||
patch_hf_runner=model_utils.ovis_patch_hf_runner,
|
|
||||||
),
|
|
||||||
"ovis2": VLMTestInfo(
|
"ovis2": VLMTestInfo(
|
||||||
models=["AIDC-AI/Ovis2-1B"],
|
models=["AIDC-AI/Ovis2-1B"],
|
||||||
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
|
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
|
||||||
|
|||||||
@ -85,30 +85,13 @@ class MiniCPMVImagePixelInputs(TensorSchema):
|
|||||||
- w: Width
|
- w: Width
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _validate_nested_tensors(
|
|
||||||
self,
|
|
||||||
value: Union[list[torch.Tensor], tuple[torch.Tensor, ...]],
|
|
||||||
field_name: str,
|
|
||||||
expected_shape: tuple[Union[int, str], ...],
|
|
||||||
dynamic_dims: set[str],
|
|
||||||
) -> tuple[int, ...]:
|
|
||||||
# value[0] is the scaled image,
|
|
||||||
# and value[1:] is a collection of image slices.
|
|
||||||
# It is ensured that all slices in the collection
|
|
||||||
# have the same shape.
|
|
||||||
if field_name == "pixel_values":
|
|
||||||
value = value[1:] if len(value) > 1 else value
|
|
||||||
|
|
||||||
return super()._validate_nested_tensors(value, field_name,
|
|
||||||
expected_shape, dynamic_dims)
|
|
||||||
|
|
||||||
type: Literal["pixel_values"] = "pixel_values"
|
type: Literal["pixel_values"] = "pixel_values"
|
||||||
|
|
||||||
# Note that the image size may vary, so we pass it as a list instead of a
|
# Note that the image size may vary, so we pass it as a list instead of a
|
||||||
# batched tensor.
|
# batched tensor.
|
||||||
pixel_values: Annotated[
|
pixel_values: Annotated[
|
||||||
list[torch.Tensor],
|
list[torch.Tensor],
|
||||||
TensorShape("bns", "c", "h", "w"),
|
TensorShape("bns", "c", "h", "w", dynamic_dims={"h", "w"}),
|
||||||
]
|
]
|
||||||
tgt_sizes: Annotated[
|
tgt_sizes: Annotated[
|
||||||
torch.Tensor,
|
torch.Tensor,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user