Disable enforce_eager for V1 TPU sampler and structured output tests (#17016)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin 2025-04-24 03:50:09 -06:00 committed by GitHub
parent b411418ff0
commit 14288d1332
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 7 additions and 2 deletions

View File

@ -19,6 +19,7 @@ docker run --privileged --net host --shm-size=16G -it \
vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \ vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \
&& python3 -m pip install pytest pytest-asyncio tpu-info \ && python3 -m pip install pytest pytest-asyncio tpu-info \
&& python3 -m pip install lm_eval[api]==0.4.4 \ && python3 -m pip install lm_eval[api]==0.4.4 \
&& export VLLM_XLA_CACHE_PATH= \
&& export VLLM_USE_V1=1 \ && export VLLM_USE_V1=1 \
&& export VLLM_XLA_CHECK_RECOMPILATION=1 \ && export VLLM_XLA_CHECK_RECOMPILATION=1 \
&& echo HARDWARE \ && echo HARDWARE \

View File

@ -13,6 +13,7 @@ from pydantic import BaseModel
from vllm.entrypoints.llm import LLM from vllm.entrypoints.llm import LLM
from vllm.outputs import RequestOutput from vllm.outputs import RequestOutput
from vllm.platforms import current_platform
from vllm.sampling_params import GuidedDecodingParams, SamplingParams from vllm.sampling_params import GuidedDecodingParams, SamplingParams
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE = [ PARAMS_MODELS_BACKENDS_TOKENIZER_MODE = [
@ -63,10 +64,13 @@ def test_structured_output(
): ):
monkeypatch.setenv("VLLM_USE_V1", "1") monkeypatch.setenv("VLLM_USE_V1", "1")
# Don't use eager execution on TPUs because we want to test for no
# recompilation at runtime
enforce_eager = bool(not current_platform.is_tpu())
# Use a single LLM instance for several scenarios to # Use a single LLM instance for several scenarios to
# speed up the test suite. # speed up the test suite.
llm = LLM(model=model_name, llm = LLM(model=model_name,
enforce_eager=True, enforce_eager=enforce_eager,
max_model_len=1024, max_model_len=1024,
guided_decoding_backend=guided_decoding_backend, guided_decoding_backend=guided_decoding_backend,
tokenizer_mode=tokenizer_mode) tokenizer_mode=tokenizer_mode)

View File

@ -23,7 +23,7 @@ def test_sampler_different(model_name: str):
different results. different results.
""" """
llm = LLM(model_name, llm = LLM(model_name,
enforce_eager=True, enforce_eager=False,
max_num_seqs=1, max_num_seqs=1,
max_model_len=512, max_model_len=512,
max_num_batched_tokens=512) max_num_batched_tokens=512)