mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 23:45:01 +08:00
parent
aa1e62d0db
commit
a64a84433d
@ -121,7 +121,7 @@ def test_cumem_with_cudagraph():
|
|||||||
"model, use_v1",
|
"model, use_v1",
|
||||||
[
|
[
|
||||||
# sleep mode with safetensors
|
# sleep mode with safetensors
|
||||||
(f"{MODEL_WEIGHTS_S3_BUCKET}/Llama-3.2-1B", True),
|
(f"{MODEL_WEIGHTS_S3_BUCKET}/meta-llama/Llama-3.2-1B", True),
|
||||||
# sleep mode with pytorch checkpoint
|
# sleep mode with pytorch checkpoint
|
||||||
("facebook/opt-125m", False),
|
("facebook/opt-125m", False),
|
||||||
])
|
])
|
||||||
|
|||||||
@ -746,8 +746,7 @@ class VllmRunner:
|
|||||||
**kwargs,
|
**kwargs,
|
||||||
) -> None:
|
) -> None:
|
||||||
if model_name in MODELS_ON_S3 and not load_format:
|
if model_name in MODELS_ON_S3 and not load_format:
|
||||||
model_name = (f"s3://vllm-ci-model-weights/"
|
model_name = (f"{MODEL_WEIGHTS_S3_BUCKET}/{model_name}")
|
||||||
f"{model_name.split('/')[-1]}")
|
|
||||||
load_format = LoadFormat.RUNAI_STREAMER
|
load_format = LoadFormat.RUNAI_STREAMER
|
||||||
if not load_format:
|
if not load_format:
|
||||||
load_format = LoadFormat.AUTO
|
load_format = LoadFormat.AUTO
|
||||||
|
|||||||
@ -10,7 +10,8 @@ from vllm.sampling_params import SamplingParams
|
|||||||
from ..conftest import MODEL_WEIGHTS_S3_BUCKET
|
from ..conftest import MODEL_WEIGHTS_S3_BUCKET
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"])
|
@pytest.mark.parametrize("model",
|
||||||
|
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||||
@pytest.mark.parametrize("block_size", [16])
|
@pytest.mark.parametrize("block_size", [16])
|
||||||
def test_computed_prefix_blocks(model: str, block_size: int):
|
def test_computed_prefix_blocks(model: str, block_size: int):
|
||||||
# This test checks if we are able to run the engine to completion
|
# This test checks if we are able to run the engine to completion
|
||||||
|
|||||||
@ -9,7 +9,8 @@ from vllm.sampling_params import SamplingParams
|
|||||||
from ..conftest import MODEL_WEIGHTS_S3_BUCKET
|
from ..conftest import MODEL_WEIGHTS_S3_BUCKET
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"])
|
@pytest.mark.parametrize("model",
|
||||||
|
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||||
def test_computed_prefix_blocks(model: str):
|
def test_computed_prefix_blocks(model: str):
|
||||||
# This test checks if the engine generates completions both with and
|
# This test checks if the engine generates completions both with and
|
||||||
# without optional detokenization, that detokenization includes text
|
# without optional detokenization, that detokenization includes text
|
||||||
|
|||||||
@ -38,7 +38,8 @@ class CustomUniExecutor(UniProcExecutor):
|
|||||||
CustomUniExecutorAsync = CustomUniExecutor
|
CustomUniExecutorAsync = CustomUniExecutor
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"])
|
@pytest.mark.parametrize("model",
|
||||||
|
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||||
def test_custom_executor_type_checking(model):
|
def test_custom_executor_type_checking(model):
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
engine_args = EngineArgs(model=model,
|
engine_args = EngineArgs(model=model,
|
||||||
@ -51,7 +52,8 @@ def test_custom_executor_type_checking(model):
|
|||||||
AsyncLLMEngine.from_engine_args(engine_args)
|
AsyncLLMEngine.from_engine_args(engine_args)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"])
|
@pytest.mark.parametrize("model",
|
||||||
|
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||||
def test_custom_executor(model, tmp_path):
|
def test_custom_executor(model, tmp_path):
|
||||||
cwd = os.path.abspath(".")
|
cwd = os.path.abspath(".")
|
||||||
os.chdir(tmp_path)
|
os.chdir(tmp_path)
|
||||||
@ -75,7 +77,8 @@ def test_custom_executor(model, tmp_path):
|
|||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"])
|
@pytest.mark.parametrize("model",
|
||||||
|
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||||
def test_custom_executor_async(model, tmp_path):
|
def test_custom_executor_async(model, tmp_path):
|
||||||
cwd = os.path.abspath(".")
|
cwd = os.path.abspath(".")
|
||||||
os.chdir(tmp_path)
|
os.chdir(tmp_path)
|
||||||
@ -103,7 +106,8 @@ def test_custom_executor_async(model, tmp_path):
|
|||||||
os.chdir(cwd)
|
os.chdir(cwd)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"])
|
@pytest.mark.parametrize("model",
|
||||||
|
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||||
def test_respect_ray(model):
|
def test_respect_ray(model):
|
||||||
# even for TP=1 and PP=1,
|
# even for TP=1 and PP=1,
|
||||||
# if users specify ray, we should use ray.
|
# if users specify ray, we should use ray.
|
||||||
|
|||||||
@ -9,7 +9,8 @@ from vllm.sampling_params import SamplingParams
|
|||||||
from ..conftest import MODEL_WEIGHTS_S3_BUCKET
|
from ..conftest import MODEL_WEIGHTS_S3_BUCKET
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"])
|
@pytest.mark.parametrize("model",
|
||||||
|
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
|
||||||
def test_skip_tokenizer_initialization(model: str):
|
def test_skip_tokenizer_initialization(model: str):
|
||||||
# This test checks if the flag skip_tokenizer_init skips the initialization
|
# This test checks if the flag skip_tokenizer_init skips the initialization
|
||||||
# of tokenizer and detokenizer. The generated output is expected to contain
|
# of tokenizer and detokenizer. The generated output is expected to contain
|
||||||
|
|||||||
@ -14,13 +14,14 @@ from .conftest import MODEL_WEIGHTS_S3_BUCKET
|
|||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
("model_id", "expected_runner_type", "expected_task"),
|
("model_id", "expected_runner_type", "expected_task"),
|
||||||
[
|
[
|
||||||
(f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2", "generate", "generate"),
|
(f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2", "generate",
|
||||||
(f"{MODEL_WEIGHTS_S3_BUCKET}/e5-mistral-7b-instruct", "pooling",
|
"generate"),
|
||||||
"embed"),
|
(f"{MODEL_WEIGHTS_S3_BUCKET}/intfloat/e5-mistral-7b-instruct",
|
||||||
(f"{MODEL_WEIGHTS_S3_BUCKET}/Qwen2.5-1.5B-apeach", "pooling",
|
"pooling", "embed"),
|
||||||
|
(f"{MODEL_WEIGHTS_S3_BUCKET}/jason9693/Qwen2.5-1.5B-apeach", "pooling",
|
||||||
"classify"),
|
"classify"),
|
||||||
(f"{MODEL_WEIGHTS_S3_BUCKET}/ms-marco-MiniLM-L-6-v2", "pooling",
|
(f"{MODEL_WEIGHTS_S3_BUCKET}/cross-encoder/ms-marco-MiniLM-L-6-v2",
|
||||||
"score"),
|
"pooling", "score"),
|
||||||
("Qwen/Qwen2.5-Math-RM-72B", "pooling", "reward"),
|
("Qwen/Qwen2.5-Math-RM-72B", "pooling", "reward"),
|
||||||
("openai/whisper-small", "transcription", "transcription"),
|
("openai/whisper-small", "transcription", "transcription"),
|
||||||
],
|
],
|
||||||
|
|||||||
@ -21,7 +21,7 @@ def test_duplicated_ignored_sequence_group():
|
|||||||
sampling_params = SamplingParams(temperature=0.01,
|
sampling_params = SamplingParams(temperature=0.01,
|
||||||
top_p=0.1,
|
top_p=0.1,
|
||||||
max_tokens=256)
|
max_tokens=256)
|
||||||
llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2",
|
llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2",
|
||||||
load_format=LoadFormat.RUNAI_STREAMER,
|
load_format=LoadFormat.RUNAI_STREAMER,
|
||||||
max_num_batched_tokens=4096,
|
max_num_batched_tokens=4096,
|
||||||
tensor_parallel_size=1)
|
tensor_parallel_size=1)
|
||||||
@ -35,7 +35,7 @@ def test_max_tokens_none():
|
|||||||
sampling_params = SamplingParams(temperature=0.01,
|
sampling_params = SamplingParams(temperature=0.01,
|
||||||
top_p=0.1,
|
top_p=0.1,
|
||||||
max_tokens=None)
|
max_tokens=None)
|
||||||
llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2",
|
llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2",
|
||||||
load_format=LoadFormat.RUNAI_STREAMER,
|
load_format=LoadFormat.RUNAI_STREAMER,
|
||||||
max_num_batched_tokens=4096,
|
max_num_batched_tokens=4096,
|
||||||
tensor_parallel_size=1)
|
tensor_parallel_size=1)
|
||||||
@ -46,7 +46,7 @@ def test_max_tokens_none():
|
|||||||
|
|
||||||
|
|
||||||
def test_gc():
|
def test_gc():
|
||||||
llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2",
|
llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2",
|
||||||
load_format=LoadFormat.RUNAI_STREAMER,
|
load_format=LoadFormat.RUNAI_STREAMER,
|
||||||
enforce_eager=True)
|
enforce_eager=True)
|
||||||
del llm
|
del llm
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user