mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-11 03:45:02 +08:00
Add runai model streamer e2e test for GCS (#28079)
Signed-off-by: Alexis MacAskill <amacaskill@google.com>
This commit is contained in:
parent
e70fbc599b
commit
a47d94f18c
@ -546,8 +546,11 @@ steps:
|
|||||||
|
|
||||||
- label: Model Executor Test # 23min
|
- label: Model Executor Test # 23min
|
||||||
timeout_in_minutes: 35
|
timeout_in_minutes: 35
|
||||||
|
torch_nightly: true
|
||||||
mirror_hardwares: [amdexperimental]
|
mirror_hardwares: [amdexperimental]
|
||||||
source_file_dependencies:
|
source_file_dependencies:
|
||||||
|
- vllm/engine/arg_utils.py
|
||||||
|
- vllm/config/model.py
|
||||||
- vllm/model_executor
|
- vllm/model_executor
|
||||||
- tests/model_executor
|
- tests/model_executor
|
||||||
- tests/entrypoints/openai/test_tensorizer_entrypoint.py
|
- tests/entrypoints/openai/test_tensorizer_entrypoint.py
|
||||||
|
|||||||
@ -1,12 +1,16 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from vllm import SamplingParams
|
from vllm import SamplingParams
|
||||||
from vllm.config.load import LoadConfig
|
from vllm.config.load import LoadConfig
|
||||||
from vllm.model_executor.model_loader import get_model_loader
|
from vllm.model_executor.model_loader import get_model_loader
|
||||||
|
|
||||||
load_format = "runai_streamer"
|
load_format = "runai_streamer"
|
||||||
test_model = "openai-community/gpt2"
|
test_model = "openai-community/gpt2"
|
||||||
|
# TODO(amacaskill): Replace with a GKE owned GCS bucket.
|
||||||
|
test_gcs_model = "gs://vertex-model-garden-public-us/codegemma/codegemma-2b/"
|
||||||
|
|
||||||
prompts = [
|
prompts = [
|
||||||
"Hello, my name is",
|
"Hello, my name is",
|
||||||
@ -32,3 +36,16 @@ def test_runai_model_loader_download_files(vllm_runner):
|
|||||||
with vllm_runner(test_model, load_format=load_format) as llm:
|
with vllm_runner(test_model, load_format=load_format) as llm:
|
||||||
deserialized_outputs = llm.generate(prompts, sampling_params)
|
deserialized_outputs = llm.generate(prompts, sampling_params)
|
||||||
assert deserialized_outputs
|
assert deserialized_outputs
|
||||||
|
|
||||||
|
|
||||||
|
def test_runai_model_loader_download_files_gcs(
|
||||||
|
vllm_runner, monkeypatch: pytest.MonkeyPatch
|
||||||
|
):
|
||||||
|
monkeypatch.setenv("GOOGLE_CLOUD_PROJECT", "fake-project")
|
||||||
|
monkeypatch.setenv("RUNAI_STREAMER_GCS_USE_ANONYMOUS_CREDENTIALS", "true")
|
||||||
|
monkeypatch.setenv(
|
||||||
|
"CLOUD_STORAGE_EMULATOR_ENDPOINT", "https://storage.googleapis.com"
|
||||||
|
)
|
||||||
|
with vllm_runner(test_gcs_model, load_format=load_format) as llm:
|
||||||
|
deserialized_outputs = llm.generate(prompts, sampling_params)
|
||||||
|
assert deserialized_outputs
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user