diff --git a/tests/samplers/test_logprobs.py b/tests/samplers/test_logprobs.py
index ea40c48027205..5ae84c7ef4c66 100644
--- a/tests/samplers/test_logprobs.py
+++ b/tests/samplers/test_logprobs.py
@@ -25,7 +25,7 @@ def test_ranks(
     flat_logprobs,
     example_prompts,
 ):
-    with vllm_runner(model, dtype=dtype, max_logprobs=MAX_LOGPROBS) as vllm_model:
+    with vllm_runner(model, dtype=dtype, max_logprobs=MAX_LOGPROBS, enforce_eager=True) as vllm_model:
         tokenizer = vllm_model.llm.get_tokenizer()
         example_prompt_tokens = [tokenizer.encode(prompt) for prompt in example_prompts]
         sampling_params = SamplingParams(
diff --git a/tests/samplers/test_no_bad_words.py b/tests/samplers/test_no_bad_words.py
index 74047d2f03558..4171cb0d6cc48 100644
--- a/tests/samplers/test_no_bad_words.py
+++ b/tests/samplers/test_no_bad_words.py
@@ -94,7 +94,7 @@ class TestTwoTokenBadWord:
         )[0]
 
     def test_two_token_bad_word(self, vllm_runner):
-        with vllm_runner(self.MODEL, dtype="half") as llm:
+        with vllm_runner(self.MODEL, dtype="half", enforce_eager=True) as llm:
             output_token_ids = self._generate(llm)
             assert output_token_ids[:2] == [
                 self.target_token_id1,