mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 00:35:28 +08:00
[Misc] Continue refactoring model tests (#17573)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
b4003d11fc
commit
f89d0e11bf
@ -47,8 +47,7 @@ def get_mixed_modalities_query() -> QueryResult:
|
|||||||
"image":
|
"image":
|
||||||
ImageAsset("cherry_blossom").pil_image.convert("RGB"),
|
ImageAsset("cherry_blossom").pil_image.convert("RGB"),
|
||||||
"video":
|
"video":
|
||||||
VideoAsset(name="sample_demo_1.mp4",
|
VideoAsset(name="sample_demo_1", num_frames=16).np_ndarrays,
|
||||||
num_frames=16).np_ndarrays,
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
limit_mm_per_prompt={
|
limit_mm_per_prompt={
|
||||||
@ -66,7 +65,7 @@ def get_use_audio_in_video_query() -> QueryResult:
|
|||||||
"<|im_start|>user\n<|vision_bos|><|VIDEO|><|vision_eos|>"
|
"<|im_start|>user\n<|vision_bos|><|VIDEO|><|vision_eos|>"
|
||||||
f"{question}<|im_end|>\n"
|
f"{question}<|im_end|>\n"
|
||||||
f"<|im_start|>assistant\n")
|
f"<|im_start|>assistant\n")
|
||||||
asset = VideoAsset(name="sample_demo_1.mp4", num_frames=16)
|
asset = VideoAsset(name="sample_demo_1", num_frames=16)
|
||||||
audio = asset.get_audio(sampling_rate=16000)
|
audio = asset.get_audio(sampling_rate=16000)
|
||||||
assert not envs.VLLM_USE_V1, ("V1 does not support use_audio_in_video. "
|
assert not envs.VLLM_USE_V1, ("V1 does not support use_audio_in_video. "
|
||||||
"Please launch this example with "
|
"Please launch this example with "
|
||||||
|
|||||||
@ -1109,7 +1109,7 @@ def get_multi_modal_input(args):
|
|||||||
|
|
||||||
if args.modality == "video":
|
if args.modality == "video":
|
||||||
# Input video and question
|
# Input video and question
|
||||||
video = VideoAsset(name="sample_demo_1.mp4",
|
video = VideoAsset(name="sample_demo_1",
|
||||||
num_frames=args.num_frames).np_ndarrays
|
num_frames=args.num_frames).np_ndarrays
|
||||||
vid_questions = ["Why is this video funny?"]
|
vid_questions = ["Why is this video funny?"]
|
||||||
|
|
||||||
|
|||||||
@ -97,13 +97,18 @@ class _VideoAssets(_VideoAssetsBase):
|
|||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
super().__init__([
|
super().__init__([
|
||||||
VideoAsset("sample_demo_1.mp4"),
|
VideoAsset("sample_demo_1"),
|
||||||
])
|
])
|
||||||
|
|
||||||
def prompts(self, prompts: _VideoAssetPrompts) -> list[str]:
|
def prompts(self, prompts: _VideoAssetPrompts) -> list[str]:
|
||||||
return [prompts["sample_demo_1"]]
|
return [prompts["sample_demo_1"]]
|
||||||
|
|
||||||
|
|
||||||
|
class _AudioAssetPrompts(TypedDict):
|
||||||
|
mary_had_lamb: str
|
||||||
|
winning_call: str
|
||||||
|
|
||||||
|
|
||||||
class _AudioAssetsBase(UserList[AudioAsset]):
|
class _AudioAssetsBase(UserList[AudioAsset]):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -116,6 +121,9 @@ class _AudioAssets(_AudioAssetsBase):
|
|||||||
AudioAsset("winning_call"),
|
AudioAsset("winning_call"),
|
||||||
])
|
])
|
||||||
|
|
||||||
|
def prompts(self, prompts: _AudioAssetPrompts) -> list[str]:
|
||||||
|
return [prompts["mary_had_lamb"], prompts["winning_call"]]
|
||||||
|
|
||||||
|
|
||||||
IMAGE_ASSETS = _ImageAssets()
|
IMAGE_ASSETS = _ImageAssets()
|
||||||
"""Singleton instance of :class:`_ImageAssets`."""
|
"""Singleton instance of :class:`_ImageAssets`."""
|
||||||
|
|||||||
@ -29,7 +29,7 @@ def test_models(vllm_runner, model, dtype: str, max_tokens: int) -> None:
|
|||||||
image_cherry = ImageAsset("cherry_blossom").pil_image.convert("RGB")
|
image_cherry = ImageAsset("cherry_blossom").pil_image.convert("RGB")
|
||||||
image_stop = ImageAsset("stop_sign").pil_image.convert("RGB")
|
image_stop = ImageAsset("stop_sign").pil_image.convert("RGB")
|
||||||
images = [image_cherry, image_stop]
|
images = [image_cherry, image_stop]
|
||||||
video = VideoAsset(name="sample_demo_1.mp4", num_frames=16).np_ndarrays
|
video = VideoAsset(name="sample_demo_1", num_frames=16).np_ndarrays
|
||||||
|
|
||||||
inputs = [
|
inputs = [
|
||||||
(
|
(
|
||||||
|
|||||||
@ -1,13 +1,12 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
from huggingface_hub import snapshot_download
|
from huggingface_hub import snapshot_download
|
||||||
from transformers import AutoConfig, AutoModel, CLIPImageProcessor
|
from transformers import AutoConfig, AutoModel, CLIPImageProcessor
|
||||||
|
|
||||||
|
from vllm.distributed import cleanup_dist_env_and_memory
|
||||||
|
|
||||||
from ....conftest import _ImageAssets
|
from ....conftest import _ImageAssets
|
||||||
|
|
||||||
# we use snapshot_download to prevent conflicts between
|
# we use snapshot_download to prevent conflicts between
|
||||||
@ -20,7 +19,6 @@ def run_intern_vit_test(
|
|||||||
model_id: str,
|
model_id: str,
|
||||||
*,
|
*,
|
||||||
dtype: str,
|
dtype: str,
|
||||||
distributed_executor_backend: Optional[str] = None,
|
|
||||||
):
|
):
|
||||||
model = snapshot_download(model_id, allow_patterns=DOWNLOAD_PATTERN)
|
model = snapshot_download(model_id, allow_patterns=DOWNLOAD_PATTERN)
|
||||||
|
|
||||||
@ -43,7 +41,6 @@ def run_intern_vit_test(
|
|||||||
for pixel_value in pixel_values
|
for pixel_value in pixel_values
|
||||||
]
|
]
|
||||||
|
|
||||||
from vllm.distributed import cleanup_dist_env_and_memory
|
|
||||||
from vllm.model_executor.models.intern_vit import InternVisionModel
|
from vllm.model_executor.models.intern_vit import InternVisionModel
|
||||||
vllm_model = InternVisionModel(config)
|
vllm_model = InternVisionModel(config)
|
||||||
vllm_model.load_weights(hf_model.state_dict().items())
|
vllm_model.load_weights(hf_model.state_dict().items())
|
||||||
@ -71,7 +68,7 @@ def run_intern_vit_test(
|
|||||||
])
|
])
|
||||||
@pytest.mark.parametrize("dtype", [torch.half])
|
@pytest.mark.parametrize("dtype", [torch.half])
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
def test_models(dist_init, image_assets, model_id, dtype: str) -> None:
|
def test_models(image_assets, model_id, dtype: str) -> None:
|
||||||
run_intern_vit_test(
|
run_intern_vit_test(
|
||||||
image_assets,
|
image_assets,
|
||||||
model_id,
|
model_id,
|
||||||
@ -78,18 +78,18 @@ def video_to_pil_images_list(path: str,
|
|||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class VideoAsset:
|
class VideoAsset:
|
||||||
name: Literal["sample_demo_1.mp4"]
|
name: Literal["sample_demo_1"]
|
||||||
num_frames: int = -1
|
num_frames: int = -1
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def pil_images(self) -> list[Image.Image]:
|
def pil_images(self) -> list[Image.Image]:
|
||||||
video_path = download_video_asset(self.name)
|
video_path = download_video_asset(self.name + ".mp4")
|
||||||
ret = video_to_pil_images_list(video_path, self.num_frames)
|
ret = video_to_pil_images_list(video_path, self.num_frames)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def np_ndarrays(self) -> npt.NDArray:
|
def np_ndarrays(self) -> npt.NDArray:
|
||||||
video_path = download_video_asset(self.name)
|
video_path = download_video_asset(self.name + ".mp4")
|
||||||
ret = video_to_ndarrays(video_path, self.num_frames)
|
ret = video_to_ndarrays(video_path, self.num_frames)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
@ -99,5 +99,5 @@ class VideoAsset:
|
|||||||
|
|
||||||
See also: examples/offline_inference/qwen2_5_omni/only_thinker.py
|
See also: examples/offline_inference/qwen2_5_omni/only_thinker.py
|
||||||
"""
|
"""
|
||||||
video_path = download_video_asset(self.name)
|
video_path = download_video_asset(self.name + ".mp4")
|
||||||
return librosa.load(video_path, sr=sampling_rate)[0]
|
return librosa.load(video_path, sr=sampling_rate)[0]
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user