From ad9d656bfa19d937b3ecb4b33b2824a8440bd8f5 Mon Sep 17 00:00:00 2001 From: Zhengxu Chen Date: Mon, 1 Dec 2025 07:41:48 -0500 Subject: [PATCH] [multimodal][test] Reduce memory utilization for test_siglip to avoid OOM (#29504) Signed-off-by: zhxchen17 Co-authored-by: Cyrus Leung --- tests/models/multimodal/pooling/test_siglip.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/models/multimodal/pooling/test_siglip.py b/tests/models/multimodal/pooling/test_siglip.py index c973676ba0272..92ae115a19831 100644 --- a/tests/models/multimodal/pooling/test_siglip.py +++ b/tests/models/multimodal/pooling/test_siglip.py @@ -37,7 +37,12 @@ def _run_test( dtype: str, ) -> None: with vllm_runner( - model, runner="pooling", dtype=dtype, enforce_eager=True, max_model_len=64 + model, + runner="pooling", + dtype=dtype, + enforce_eager=True, + max_model_len=64, + gpu_memory_utilization=0.7, ) as vllm_model: vllm_outputs = vllm_model.embed(input_texts, images=input_images) @@ -134,6 +139,7 @@ def test_models_text_image_no_crash( dtype=dtype, enforce_eager=True, max_model_len=64, + gpu_memory_utilization=0.7, ) as vllm_model: with pytest.raises(ValueError, match="not both"): vllm_model.embed(texts, images=images)