[2/n][ci] S3: Use full model path (#13564)

Signed-off-by: <>
This commit is contained in:
Kevin H. Luu 2025-02-20 01:20:15 -08:00 committed by GitHub
parent aa1e62d0db
commit a64a84433d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 26 additions and 19 deletions

View File

@ -121,7 +121,7 @@ def test_cumem_with_cudagraph():
"model, use_v1", "model, use_v1",
[ [
# sleep mode with safetensors # sleep mode with safetensors
(f"{MODEL_WEIGHTS_S3_BUCKET}/Llama-3.2-1B", True), (f"{MODEL_WEIGHTS_S3_BUCKET}/meta-llama/Llama-3.2-1B", True),
# sleep mode with pytorch checkpoint # sleep mode with pytorch checkpoint
("facebook/opt-125m", False), ("facebook/opt-125m", False),
]) ])

View File

@ -746,8 +746,7 @@ class VllmRunner:
**kwargs, **kwargs,
) -> None: ) -> None:
if model_name in MODELS_ON_S3 and not load_format: if model_name in MODELS_ON_S3 and not load_format:
model_name = (f"s3://vllm-ci-model-weights/" model_name = (f"{MODEL_WEIGHTS_S3_BUCKET}/{model_name}")
f"{model_name.split('/')[-1]}")
load_format = LoadFormat.RUNAI_STREAMER load_format = LoadFormat.RUNAI_STREAMER
if not load_format: if not load_format:
load_format = LoadFormat.AUTO load_format = LoadFormat.AUTO

View File

@ -10,7 +10,8 @@ from vllm.sampling_params import SamplingParams
from ..conftest import MODEL_WEIGHTS_S3_BUCKET from ..conftest import MODEL_WEIGHTS_S3_BUCKET
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"]) @pytest.mark.parametrize("model",
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
@pytest.mark.parametrize("block_size", [16]) @pytest.mark.parametrize("block_size", [16])
def test_computed_prefix_blocks(model: str, block_size: int): def test_computed_prefix_blocks(model: str, block_size: int):
# This test checks if we are able to run the engine to completion # This test checks if we are able to run the engine to completion

View File

@ -9,7 +9,8 @@ from vllm.sampling_params import SamplingParams
from ..conftest import MODEL_WEIGHTS_S3_BUCKET from ..conftest import MODEL_WEIGHTS_S3_BUCKET
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"]) @pytest.mark.parametrize("model",
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
def test_computed_prefix_blocks(model: str): def test_computed_prefix_blocks(model: str):
# This test checks if the engine generates completions both with and # This test checks if the engine generates completions both with and
# without optional detokenization, that detokenization includes text # without optional detokenization, that detokenization includes text

View File

@ -38,7 +38,8 @@ class CustomUniExecutor(UniProcExecutor):
CustomUniExecutorAsync = CustomUniExecutor CustomUniExecutorAsync = CustomUniExecutor
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"]) @pytest.mark.parametrize("model",
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
def test_custom_executor_type_checking(model): def test_custom_executor_type_checking(model):
with pytest.raises(ValueError): with pytest.raises(ValueError):
engine_args = EngineArgs(model=model, engine_args = EngineArgs(model=model,
@ -51,7 +52,8 @@ def test_custom_executor_type_checking(model):
AsyncLLMEngine.from_engine_args(engine_args) AsyncLLMEngine.from_engine_args(engine_args)
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"]) @pytest.mark.parametrize("model",
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
def test_custom_executor(model, tmp_path): def test_custom_executor(model, tmp_path):
cwd = os.path.abspath(".") cwd = os.path.abspath(".")
os.chdir(tmp_path) os.chdir(tmp_path)
@ -75,7 +77,8 @@ def test_custom_executor(model, tmp_path):
os.chdir(cwd) os.chdir(cwd)
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"]) @pytest.mark.parametrize("model",
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
def test_custom_executor_async(model, tmp_path): def test_custom_executor_async(model, tmp_path):
cwd = os.path.abspath(".") cwd = os.path.abspath(".")
os.chdir(tmp_path) os.chdir(tmp_path)
@ -103,7 +106,8 @@ def test_custom_executor_async(model, tmp_path):
os.chdir(cwd) os.chdir(cwd)
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"]) @pytest.mark.parametrize("model",
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
def test_respect_ray(model): def test_respect_ray(model):
# even for TP=1 and PP=1, # even for TP=1 and PP=1,
# if users specify ray, we should use ray. # if users specify ray, we should use ray.

View File

@ -9,7 +9,8 @@ from vllm.sampling_params import SamplingParams
from ..conftest import MODEL_WEIGHTS_S3_BUCKET from ..conftest import MODEL_WEIGHTS_S3_BUCKET
@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"]) @pytest.mark.parametrize("model",
[f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"])
def test_skip_tokenizer_initialization(model: str): def test_skip_tokenizer_initialization(model: str):
# This test checks if the flag skip_tokenizer_init skips the initialization # This test checks if the flag skip_tokenizer_init skips the initialization
# of tokenizer and detokenizer. The generated output is expected to contain # of tokenizer and detokenizer. The generated output is expected to contain

View File

@ -14,13 +14,14 @@ from .conftest import MODEL_WEIGHTS_S3_BUCKET
@pytest.mark.parametrize( @pytest.mark.parametrize(
("model_id", "expected_runner_type", "expected_task"), ("model_id", "expected_runner_type", "expected_task"),
[ [
(f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2", "generate", "generate"), (f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2", "generate",
(f"{MODEL_WEIGHTS_S3_BUCKET}/e5-mistral-7b-instruct", "pooling", "generate"),
"embed"), (f"{MODEL_WEIGHTS_S3_BUCKET}/intfloat/e5-mistral-7b-instruct",
(f"{MODEL_WEIGHTS_S3_BUCKET}/Qwen2.5-1.5B-apeach", "pooling", "pooling", "embed"),
(f"{MODEL_WEIGHTS_S3_BUCKET}/jason9693/Qwen2.5-1.5B-apeach", "pooling",
"classify"), "classify"),
(f"{MODEL_WEIGHTS_S3_BUCKET}/ms-marco-MiniLM-L-6-v2", "pooling", (f"{MODEL_WEIGHTS_S3_BUCKET}/cross-encoder/ms-marco-MiniLM-L-6-v2",
"score"), "pooling", "score"),
("Qwen/Qwen2.5-Math-RM-72B", "pooling", "reward"), ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "reward"),
("openai/whisper-small", "transcription", "transcription"), ("openai/whisper-small", "transcription", "transcription"),
], ],

View File

@ -21,7 +21,7 @@ def test_duplicated_ignored_sequence_group():
sampling_params = SamplingParams(temperature=0.01, sampling_params = SamplingParams(temperature=0.01,
top_p=0.1, top_p=0.1,
max_tokens=256) max_tokens=256)
llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2", llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2",
load_format=LoadFormat.RUNAI_STREAMER, load_format=LoadFormat.RUNAI_STREAMER,
max_num_batched_tokens=4096, max_num_batched_tokens=4096,
tensor_parallel_size=1) tensor_parallel_size=1)
@ -35,7 +35,7 @@ def test_max_tokens_none():
sampling_params = SamplingParams(temperature=0.01, sampling_params = SamplingParams(temperature=0.01,
top_p=0.1, top_p=0.1,
max_tokens=None) max_tokens=None)
llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2", llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2",
load_format=LoadFormat.RUNAI_STREAMER, load_format=LoadFormat.RUNAI_STREAMER,
max_num_batched_tokens=4096, max_num_batched_tokens=4096,
tensor_parallel_size=1) tensor_parallel_size=1)
@ -46,7 +46,7 @@ def test_max_tokens_none():
def test_gc(): def test_gc():
llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2", llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2",
load_format=LoadFormat.RUNAI_STREAMER, load_format=LoadFormat.RUNAI_STREAMER,
enforce_eager=True) enforce_eager=True)
del llm del llm