mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-07 16:05:47 +08:00
[Misc] Turn off encoder torch compile by default (#28634)
Signed-off-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
parent
b230286fbc
commit
d3387750f1
@ -10,8 +10,8 @@ from vllm.platforms import current_platform
|
|||||||
|
|
||||||
def test_compile():
|
def test_compile():
|
||||||
vllm_config = VllmConfig()
|
vllm_config = VllmConfig()
|
||||||
# Default configuration compiles mm encoder
|
# Default configuration does not compile mm encoder
|
||||||
assert vllm_config.compilation_config.compile_mm_encoder
|
assert not vllm_config.compilation_config.compile_mm_encoder
|
||||||
|
|
||||||
|
|
||||||
# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
|
# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
|
||||||
@ -39,7 +39,10 @@ def test_qwen2_5_vl_compilation(vllm_runner, monkeypatch):
|
|||||||
"Qwen/Qwen2.5-VL-3B-Instruct",
|
"Qwen/Qwen2.5-VL-3B-Instruct",
|
||||||
max_model_len=2048,
|
max_model_len=2048,
|
||||||
gpu_memory_utilization=0.8,
|
gpu_memory_utilization=0.8,
|
||||||
compilation_config={"mode": CompilationMode.VLLM_COMPILE},
|
compilation_config={
|
||||||
|
"mode": CompilationMode.VLLM_COMPILE,
|
||||||
|
"compile_mm_encoder": True,
|
||||||
|
},
|
||||||
) as _,
|
) as _,
|
||||||
):
|
):
|
||||||
pass
|
pass
|
||||||
|
|||||||
@ -131,6 +131,7 @@ VLM_TEST_SETTINGS = {
|
|||||||
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
|
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
|
||||||
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
|
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
|
||||||
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
|
video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
|
||||||
|
enforce_eager=False,
|
||||||
max_model_len=4096,
|
max_model_len=4096,
|
||||||
max_num_seqs=2,
|
max_num_seqs=2,
|
||||||
auto_cls=AutoModelForImageTextToText,
|
auto_cls=AutoModelForImageTextToText,
|
||||||
@ -160,6 +161,7 @@ VLM_TEST_SETTINGS = {
|
|||||||
VLMTestType.MULTI_IMAGE,
|
VLMTestType.MULTI_IMAGE,
|
||||||
VLMTestType.VIDEO,
|
VLMTestType.VIDEO,
|
||||||
),
|
),
|
||||||
|
enforce_eager=False,
|
||||||
needs_video_metadata=True,
|
needs_video_metadata=True,
|
||||||
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
|
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
|
||||||
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
|
img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501
|
||||||
|
|||||||
@ -320,9 +320,10 @@ class CompilationConfig:
|
|||||||
|
|
||||||
If None, defaults to attention ops for piecewise cudagraphs.
|
If None, defaults to attention ops for piecewise cudagraphs.
|
||||||
If empty list [], no ops are excluded (suitable for full cudagraphs)."""
|
If empty list [], no ops are excluded (suitable for full cudagraphs)."""
|
||||||
compile_mm_encoder: bool = True
|
compile_mm_encoder: bool = False
|
||||||
"""Whether or not to compile the multimodal encoder.
|
"""Whether or not to compile the multimodal encoder.
|
||||||
Currently, this only works for `Qwen2_5_vl`."""
|
Currently, this only works for `Qwen2_5_vl` on selected platforms.
|
||||||
|
Disabled by default until more models are supported/tested to work."""
|
||||||
|
|
||||||
# Inductor capture
|
# Inductor capture
|
||||||
use_inductor: bool | None = None
|
use_inductor: bool | None = None
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user