mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 12:15:40 +08:00
Disable enforce_eager for V1 TPU sampler and structured output tests (#17016)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
parent
b411418ff0
commit
14288d1332
@ -19,6 +19,7 @@ docker run --privileged --net host --shm-size=16G -it \
|
||||
vllm-tpu /bin/bash -c "python3 -m pip install git+https://github.com/thuml/depyf.git \
|
||||
&& python3 -m pip install pytest pytest-asyncio tpu-info \
|
||||
&& python3 -m pip install lm_eval[api]==0.4.4 \
|
||||
&& export VLLM_XLA_CACHE_PATH= \
|
||||
&& export VLLM_USE_V1=1 \
|
||||
&& export VLLM_XLA_CHECK_RECOMPILATION=1 \
|
||||
&& echo HARDWARE \
|
||||
|
||||
@ -13,6 +13,7 @@ from pydantic import BaseModel
|
||||
|
||||
from vllm.entrypoints.llm import LLM
|
||||
from vllm.outputs import RequestOutput
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.sampling_params import GuidedDecodingParams, SamplingParams
|
||||
|
||||
PARAMS_MODELS_BACKENDS_TOKENIZER_MODE = [
|
||||
@ -63,10 +64,13 @@ def test_structured_output(
|
||||
):
|
||||
monkeypatch.setenv("VLLM_USE_V1", "1")
|
||||
|
||||
# Don't use eager execution on TPUs because we want to test for no
|
||||
# recompilation at runtime
|
||||
enforce_eager = bool(not current_platform.is_tpu())
|
||||
# Use a single LLM instance for several scenarios to
|
||||
# speed up the test suite.
|
||||
llm = LLM(model=model_name,
|
||||
enforce_eager=True,
|
||||
enforce_eager=enforce_eager,
|
||||
max_model_len=1024,
|
||||
guided_decoding_backend=guided_decoding_backend,
|
||||
tokenizer_mode=tokenizer_mode)
|
||||
|
||||
@ -23,7 +23,7 @@ def test_sampler_different(model_name: str):
|
||||
different results.
|
||||
"""
|
||||
llm = LLM(model_name,
|
||||
enforce_eager=True,
|
||||
enforce_eager=False,
|
||||
max_num_seqs=1,
|
||||
max_model_len=512,
|
||||
max_num_batched_tokens=512)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user