From 4ba88757497b250e78d67e318a661b69e7b46da3 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Wed, 8 Oct 2025 13:13:47 -0400 Subject: [PATCH] [Bug] Fix Test in Batch Invariant (#26128) Signed-off-by: yewentao256 --- tests/v1/generation/test_batch_invariance.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/v1/generation/test_batch_invariance.py b/tests/v1/generation/test_batch_invariance.py index db1c757521f00..31f6f377da624 100644 --- a/tests/v1/generation/test_batch_invariance.py +++ b/tests/v1/generation/test_batch_invariance.py @@ -292,8 +292,11 @@ def LLM_with_max_seqs( # Allow some CPU offload if needed. swap_space=swap_space, # Keep things lean and CI-friendly. - dtype="float16", + dtype="auto", # Single-GPU by default; override externally if desired. tensor_parallel_size=int(os.getenv("VLLM_TP_SIZE", "1")), trust_remote_code=os.getenv("VLLM_TRUST_REMOTE_CODE", "0") == "1", + enable_prefix_caching=False, + # Enable for MOE models + # enable_expert_parallel=True, )