From a64a84433d6d1162290bbcd48b161c8ddff2c801 Mon Sep 17 00:00:00 2001 From: "Kevin H. Luu" Date: Thu, 20 Feb 2025 01:20:15 -0800 Subject: [PATCH] [2/n][ci] S3: Use full model path (#13564) Signed-off-by: <> --- tests/basic_correctness/test_cumem.py | 2 +- tests/conftest.py | 3 +-- tests/engine/test_computed_prefix_blocks.py | 3 ++- tests/engine/test_detokenization.py | 3 ++- tests/engine/test_executor.py | 12 ++++++++---- tests/engine/test_skip_tokenizer_init.py | 3 ++- tests/test_config.py | 13 +++++++------ tests/test_regression.py | 6 +++--- 8 files changed, 26 insertions(+), 19 deletions(-) diff --git a/tests/basic_correctness/test_cumem.py b/tests/basic_correctness/test_cumem.py index 7ebccdb5caed..f1148fc8e3f4 100644 --- a/tests/basic_correctness/test_cumem.py +++ b/tests/basic_correctness/test_cumem.py @@ -121,7 +121,7 @@ def test_cumem_with_cudagraph(): "model, use_v1", [ # sleep mode with safetensors - (f"{MODEL_WEIGHTS_S3_BUCKET}/Llama-3.2-1B", True), + (f"{MODEL_WEIGHTS_S3_BUCKET}/meta-llama/Llama-3.2-1B", True), # sleep mode with pytorch checkpoint ("facebook/opt-125m", False), ]) diff --git a/tests/conftest.py b/tests/conftest.py index ca268dd6657c..9304b8f17dca 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -746,8 +746,7 @@ class VllmRunner: **kwargs, ) -> None: if model_name in MODELS_ON_S3 and not load_format: - model_name = (f"s3://vllm-ci-model-weights/" - f"{model_name.split('/')[-1]}") + model_name = (f"{MODEL_WEIGHTS_S3_BUCKET}/{model_name}") load_format = LoadFormat.RUNAI_STREAMER if not load_format: load_format = LoadFormat.AUTO diff --git a/tests/engine/test_computed_prefix_blocks.py b/tests/engine/test_computed_prefix_blocks.py index 93907ecae554..51e7c8e7739d 100644 --- a/tests/engine/test_computed_prefix_blocks.py +++ b/tests/engine/test_computed_prefix_blocks.py @@ -10,7 +10,8 @@ from vllm.sampling_params import SamplingParams from ..conftest import MODEL_WEIGHTS_S3_BUCKET -@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"]) +@pytest.mark.parametrize("model", + [f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"]) @pytest.mark.parametrize("block_size", [16]) def test_computed_prefix_blocks(model: str, block_size: int): # This test checks if we are able to run the engine to completion diff --git a/tests/engine/test_detokenization.py b/tests/engine/test_detokenization.py index ab594aeee40d..6ae4be2e4786 100644 --- a/tests/engine/test_detokenization.py +++ b/tests/engine/test_detokenization.py @@ -9,7 +9,8 @@ from vllm.sampling_params import SamplingParams from ..conftest import MODEL_WEIGHTS_S3_BUCKET -@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"]) +@pytest.mark.parametrize("model", + [f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"]) def test_computed_prefix_blocks(model: str): # This test checks if the engine generates completions both with and # without optional detokenization, that detokenization includes text diff --git a/tests/engine/test_executor.py b/tests/engine/test_executor.py index 31c07e709bd9..6a86401ce5db 100644 --- a/tests/engine/test_executor.py +++ b/tests/engine/test_executor.py @@ -38,7 +38,8 @@ class CustomUniExecutor(UniProcExecutor): CustomUniExecutorAsync = CustomUniExecutor -@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"]) +@pytest.mark.parametrize("model", + [f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"]) def test_custom_executor_type_checking(model): with pytest.raises(ValueError): engine_args = EngineArgs(model=model, @@ -51,7 +52,8 @@ def test_custom_executor_type_checking(model): AsyncLLMEngine.from_engine_args(engine_args) -@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"]) +@pytest.mark.parametrize("model", + [f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"]) def test_custom_executor(model, tmp_path): cwd = os.path.abspath(".") os.chdir(tmp_path) @@ -75,7 +77,8 @@ def test_custom_executor(model, tmp_path): os.chdir(cwd) -@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"]) +@pytest.mark.parametrize("model", + [f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"]) def test_custom_executor_async(model, tmp_path): cwd = os.path.abspath(".") os.chdir(tmp_path) @@ -103,7 +106,8 @@ def test_custom_executor_async(model, tmp_path): os.chdir(cwd) -@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"]) +@pytest.mark.parametrize("model", + [f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"]) def test_respect_ray(model): # even for TP=1 and PP=1, # if users specify ray, we should use ray. diff --git a/tests/engine/test_skip_tokenizer_init.py b/tests/engine/test_skip_tokenizer_init.py index fee7fd3f6aad..b0930eaac17b 100644 --- a/tests/engine/test_skip_tokenizer_init.py +++ b/tests/engine/test_skip_tokenizer_init.py @@ -9,7 +9,8 @@ from vllm.sampling_params import SamplingParams from ..conftest import MODEL_WEIGHTS_S3_BUCKET -@pytest.mark.parametrize("model", [f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2"]) +@pytest.mark.parametrize("model", + [f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2"]) def test_skip_tokenizer_initialization(model: str): # This test checks if the flag skip_tokenizer_init skips the initialization # of tokenizer and detokenizer. The generated output is expected to contain diff --git a/tests/test_config.py b/tests/test_config.py index 4a1718613302..bc87e6ccdfcc 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -14,13 +14,14 @@ from .conftest import MODEL_WEIGHTS_S3_BUCKET @pytest.mark.parametrize( ("model_id", "expected_runner_type", "expected_task"), [ - (f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2", "generate", "generate"), - (f"{MODEL_WEIGHTS_S3_BUCKET}/e5-mistral-7b-instruct", "pooling", - "embed"), - (f"{MODEL_WEIGHTS_S3_BUCKET}/Qwen2.5-1.5B-apeach", "pooling", + (f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2", "generate", + "generate"), + (f"{MODEL_WEIGHTS_S3_BUCKET}/intfloat/e5-mistral-7b-instruct", + "pooling", "embed"), + (f"{MODEL_WEIGHTS_S3_BUCKET}/jason9693/Qwen2.5-1.5B-apeach", "pooling", "classify"), - (f"{MODEL_WEIGHTS_S3_BUCKET}/ms-marco-MiniLM-L-6-v2", "pooling", - "score"), + (f"{MODEL_WEIGHTS_S3_BUCKET}/cross-encoder/ms-marco-MiniLM-L-6-v2", + "pooling", "score"), ("Qwen/Qwen2.5-Math-RM-72B", "pooling", "reward"), ("openai/whisper-small", "transcription", "transcription"), ], diff --git a/tests/test_regression.py b/tests/test_regression.py index e9b21e1a7232..8cecc2892b6e 100644 --- a/tests/test_regression.py +++ b/tests/test_regression.py @@ -21,7 +21,7 @@ def test_duplicated_ignored_sequence_group(): sampling_params = SamplingParams(temperature=0.01, top_p=0.1, max_tokens=256) - llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2", + llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2", load_format=LoadFormat.RUNAI_STREAMER, max_num_batched_tokens=4096, tensor_parallel_size=1) @@ -35,7 +35,7 @@ def test_max_tokens_none(): sampling_params = SamplingParams(temperature=0.01, top_p=0.1, max_tokens=None) - llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2", + llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2", load_format=LoadFormat.RUNAI_STREAMER, max_num_batched_tokens=4096, tensor_parallel_size=1) @@ -46,7 +46,7 @@ def test_max_tokens_none(): def test_gc(): - llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilgpt2", + llm = LLM(model=f"{MODEL_WEIGHTS_S3_BUCKET}/distilbert/distilgpt2", load_format=LoadFormat.RUNAI_STREAMER, enforce_eager=True) del llm