[multimodal][test] Reduce memory utilization for test_siglip to avoid OOM (#29504)

Signed-off-by: zhxchen17 <zhxchen17@fb.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
Zhengxu Chen 2025-12-01 07:41:48 -05:00 committed by GitHub
parent f37e8938d2
commit ad9d656bfa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -37,7 +37,12 @@ def _run_test(
dtype: str,
) -> None:
with vllm_runner(
model, runner="pooling", dtype=dtype, enforce_eager=True, max_model_len=64
model,
runner="pooling",
dtype=dtype,
enforce_eager=True,
max_model_len=64,
gpu_memory_utilization=0.7,
) as vllm_model:
vllm_outputs = vllm_model.embed(input_texts, images=input_images)
@ -134,6 +139,7 @@ def test_models_text_image_no_crash(
dtype=dtype,
enforce_eager=True,
max_model_len=64,
gpu_memory_utilization=0.7,
) as vllm_model:
with pytest.raises(ValueError, match="not both"):
vllm_model.embed(texts, images=images)