diff --git a/tests/models/language/generation/test_common.py b/tests/models/language/generation/test_common.py index 062258930fe1..a39f24c80f1c 100644 --- a/tests/models/language/generation/test_common.py +++ b/tests/models/language/generation/test_common.py @@ -119,6 +119,12 @@ def test_models(hf_runner, vllm_runner, example_prompts, model: str, # in parts of the operators pytest.skip(f"Skipping '{model}' model test with AITER kernel.") + # Note: can be removed when + # https://github.com/vllm-project/vllm/pull/24278 finished + if current_platform.is_cpu() and use_prompt_embeds: + pytest.skip("Skipping use_prompt_embeds=True with " + "V1-only CPU backend.") + with hf_runner(model) as hf_model: hf_outputs = hf_model.generate_greedy_logprobs_limit( example_prompts, max_tokens, num_logprobs)