From 4ba88757497b250e78d67e318a661b69e7b46da3 Mon Sep 17 00:00:00 2001
From: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
Date: Wed, 8 Oct 2025 13:13:47 -0400
Subject: [PATCH] [Bug] Fix Test in Batch Invariant (#26128)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
---
 tests/v1/generation/test_batch_invariance.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/v1/generation/test_batch_invariance.py b/tests/v1/generation/test_batch_invariance.py
index db1c757521f00..31f6f377da624 100644
--- a/tests/v1/generation/test_batch_invariance.py
+++ b/tests/v1/generation/test_batch_invariance.py
@@ -292,8 +292,11 @@ def LLM_with_max_seqs(
         # Allow some CPU offload if needed.
         swap_space=swap_space,
         # Keep things lean and CI-friendly.
-        dtype="float16",
+        dtype="auto",
         # Single-GPU by default; override externally if desired.
         tensor_parallel_size=int(os.getenv("VLLM_TP_SIZE", "1")),
         trust_remote_code=os.getenv("VLLM_TRUST_REMOTE_CODE", "0") == "1",
+        enable_prefix_caching=False,
+        # Enable for MOE models
+        # enable_expert_parallel=True,
     )