From eea8b6634cfb641baff99e079650f4a5ef819127 Mon Sep 17 00:00:00 2001 From: qli88 Date: Fri, 21 Nov 2025 07:22:43 +0000 Subject: [PATCH 1/4] [CI/Build] Enable eager mode for Samplers-Test group Signed-off-by: qli88 --- tests/samplers/test_logprobs.py | 2 +- tests/samplers/test_no_bad_words.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/samplers/test_logprobs.py b/tests/samplers/test_logprobs.py index ea40c48027205..5ae84c7ef4c66 100644 --- a/tests/samplers/test_logprobs.py +++ b/tests/samplers/test_logprobs.py @@ -25,7 +25,7 @@ def test_ranks( flat_logprobs, example_prompts, ): - with vllm_runner(model, dtype=dtype, max_logprobs=MAX_LOGPROBS) as vllm_model: + with vllm_runner(model, dtype=dtype, max_logprobs=MAX_LOGPROBS, enforce_eager=True) as vllm_model: tokenizer = vllm_model.llm.get_tokenizer() example_prompt_tokens = [tokenizer.encode(prompt) for prompt in example_prompts] sampling_params = SamplingParams( diff --git a/tests/samplers/test_no_bad_words.py b/tests/samplers/test_no_bad_words.py index 74047d2f03558..4171cb0d6cc48 100644 --- a/tests/samplers/test_no_bad_words.py +++ b/tests/samplers/test_no_bad_words.py @@ -94,7 +94,7 @@ class TestTwoTokenBadWord: )[0] def test_two_token_bad_word(self, vllm_runner): - with vllm_runner(self.MODEL, dtype="half") as llm: + with vllm_runner(self.MODEL, dtype="half", enforce_eager=True) as llm: output_token_ids = self._generate(llm) assert output_token_ids[:2] == [ self.target_token_id1, From a1033a9542a8f42e4d69e5ed95598c814956eb13 Mon Sep 17 00:00:00 2001 From: qli88 Date: Fri, 21 Nov 2025 15:58:03 +0000 Subject: [PATCH 2/4] ruff change Signed-off-by: qli88 --- tests/samplers/test_logprobs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/samplers/test_logprobs.py b/tests/samplers/test_logprobs.py index 5ae84c7ef4c66..8bc43b1f2e03c 100644 --- a/tests/samplers/test_logprobs.py +++ b/tests/samplers/test_logprobs.py @@ -25,7 +25,9 @@ def test_ranks( flat_logprobs, example_prompts, ): - with vllm_runner(model, dtype=dtype, max_logprobs=MAX_LOGPROBS, enforce_eager=True) as vllm_model: + with vllm_runner( + model, dtype=dtype, max_logprobs=MAX_LOGPROBS, enforce_eager=True + ) as vllm_model: tokenizer = vllm_model.llm.get_tokenizer() example_prompt_tokens = [tokenizer.encode(prompt) for prompt in example_prompts] sampling_params = SamplingParams( From 23b4fd1715d51d9626bd30b5524deb40dbb81bbc Mon Sep 17 00:00:00 2001 From: qli88 Date: Tue, 16 Dec 2025 16:40:19 +0000 Subject: [PATCH 3/4] Disable graph-mode only on ROCm platform Signed-off-by: qli88 --- tests/samplers/test_logprobs.py | 6 +++++- tests/samplers/test_no_bad_words.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/samplers/test_logprobs.py b/tests/samplers/test_logprobs.py index 8bc43b1f2e03c..27188c230d810 100644 --- a/tests/samplers/test_logprobs.py +++ b/tests/samplers/test_logprobs.py @@ -5,6 +5,7 @@ import pytest from vllm import SamplingParams from vllm.logprobs import FlatLogprobs +from vllm.platforms import current_platform MODELS = ["distilbert/distilgpt2"] MAX_TOKENS = 5 @@ -25,8 +26,11 @@ def test_ranks( flat_logprobs, example_prompts, ): + # TODO: Remove once graph mode is fixed for distilbert/distilgpt2 on ROCm. + eager_mode = current_platform.is_rocm() + with vllm_runner( - model, dtype=dtype, max_logprobs=MAX_LOGPROBS, enforce_eager=True + model, dtype=dtype, max_logprobs=MAX_LOGPROBS, enforce_eager=eager_mode ) as vllm_model: tokenizer = vllm_model.llm.get_tokenizer() example_prompt_tokens = [tokenizer.encode(prompt) for prompt in example_prompts] diff --git a/tests/samplers/test_no_bad_words.py b/tests/samplers/test_no_bad_words.py index 4171cb0d6cc48..9d58c8f1a613e 100644 --- a/tests/samplers/test_no_bad_words.py +++ b/tests/samplers/test_no_bad_words.py @@ -9,6 +9,7 @@ Run `pytest tests/samplers/test_no_bad_words.py`. from transformers import AutoTokenizer from vllm import LLM, SamplingParams +from vllm.platforms import current_platform def _generate( @@ -94,7 +95,10 @@ class TestTwoTokenBadWord: )[0] def test_two_token_bad_word(self, vllm_runner): - with vllm_runner(self.MODEL, dtype="half", enforce_eager=True) as llm: + # TODO: Remove once graph mode is fixed for distilbert/distilgpt2 on ROCm. + eager_mode = current_platform.is_rocm() + + with vllm_runner(self.MODEL, dtype="half", enforce_eager=eager_mode) as llm: output_token_ids = self._generate(llm) assert output_token_ids[:2] == [ self.target_token_id1, From c810903fc955b48de96af5c451f6a12d0c06eeeb Mon Sep 17 00:00:00 2001 From: qli88 Date: Tue, 16 Dec 2025 17:20:46 +0000 Subject: [PATCH 4/4] ruff Signed-off-by: qli88 --- tests/samplers/test_no_bad_words.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/samplers/test_no_bad_words.py b/tests/samplers/test_no_bad_words.py index 9d58c8f1a613e..5721efcdeaf7e 100644 --- a/tests/samplers/test_no_bad_words.py +++ b/tests/samplers/test_no_bad_words.py @@ -96,7 +96,8 @@ class TestTwoTokenBadWord: def test_two_token_bad_word(self, vllm_runner): # TODO: Remove once graph mode is fixed for distilbert/distilgpt2 on ROCm. - eager_mode = current_platform.is_rocm() + + eager_mode = current_platform.is_rocm() with vllm_runner(self.MODEL, dtype="half", enforce_eager=eager_mode) as llm: output_token_ids = self._generate(llm)