[multimodal][test] Reduce memory utilization for test_siglip to avoid OOM (#29504)

Signed-off-by: zhxchen17 <zhxchen17@fb.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
2026-03-16 16:27:15 +08:00 · 2025-12-01 07:41:48 -05:00 · 2025-12-01 07:41:48 -05:00 · ad9d656bfa
commit ad9d656bfa
parent f37e8938d2
1 changed files with 7 additions and 1 deletions
--- a/tests/models/multimodal/pooling/test_siglip.py
+++ b/tests/models/multimodal/pooling/test_siglip.py
@ -37,7 +37,12 @@ def _run_test(
    dtype: str,
 ) -> None:
    with vllm_runner(
-        model, runner="pooling", dtype=dtype, enforce_eager=True, max_model_len=64
+        model,
+        runner="pooling",
+        dtype=dtype,
+        enforce_eager=True,
+        max_model_len=64,
+        gpu_memory_utilization=0.7,
    ) as vllm_model:
        vllm_outputs = vllm_model.embed(input_texts, images=input_images)

@ -134,6 +139,7 @@ def test_models_text_image_no_crash(
        dtype=dtype,
        enforce_eager=True,
        max_model_len=64,
+        gpu_memory_utilization=0.7,
    ) as vllm_model:
        with pytest.raises(ValueError, match="not both"):
            vllm_model.embed(texts, images=images)