diff --git a/tests/samplers/test_logprobs.py b/tests/samplers/test_logprobs.py index ea40c48027205..5ae84c7ef4c66 100644 --- a/tests/samplers/test_logprobs.py +++ b/tests/samplers/test_logprobs.py @@ -25,7 +25,7 @@ def test_ranks( flat_logprobs, example_prompts, ): - with vllm_runner(model, dtype=dtype, max_logprobs=MAX_LOGPROBS) as vllm_model: + with vllm_runner(model, dtype=dtype, max_logprobs=MAX_LOGPROBS, enforce_eager=True) as vllm_model: tokenizer = vllm_model.llm.get_tokenizer() example_prompt_tokens = [tokenizer.encode(prompt) for prompt in example_prompts] sampling_params = SamplingParams( diff --git a/tests/samplers/test_no_bad_words.py b/tests/samplers/test_no_bad_words.py index 74047d2f03558..4171cb0d6cc48 100644 --- a/tests/samplers/test_no_bad_words.py +++ b/tests/samplers/test_no_bad_words.py @@ -94,7 +94,7 @@ class TestTwoTokenBadWord: )[0] def test_two_token_bad_word(self, vllm_runner): - with vllm_runner(self.MODEL, dtype="half") as llm: + with vllm_runner(self.MODEL, dtype="half", enforce_eager=True) as llm: output_token_ids = self._generate(llm) assert output_token_ids[:2] == [ self.target_token_id1,