Add runai model streamer e2e test for GCS (#28079)

Signed-off-by: Alexis MacAskill <amacaskill@google.com>
2025-12-10 23:15:00 +08:00 · 2025-11-06 19:07:54 -08:00 · 2025-11-06 19:07:54 -08:00 · a47d94f18c
commit a47d94f18c
parent e70fbc599b
2 changed files with 20 additions and 0 deletions
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -546,8 +546,11 @@ steps:

 - label: Model Executor Test # 23min
  timeout_in_minutes: 35
+  torch_nightly: true
  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
+  - vllm/engine/arg_utils.py
+  - vllm/config/model.py
  - vllm/model_executor
  - tests/model_executor
  - tests/entrypoints/openai/test_tensorizer_entrypoint.py
--- a/tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py
+++ b/tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py
@ -1,12 +1,16 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

+import pytest
+
 from vllm import SamplingParams
 from vllm.config.load import LoadConfig
 from vllm.model_executor.model_loader import get_model_loader

 load_format = "runai_streamer"
 test_model = "openai-community/gpt2"
+# TODO(amacaskill): Replace with a GKE owned GCS bucket.
+test_gcs_model = "gs://vertex-model-garden-public-us/codegemma/codegemma-2b/"

 prompts = [
    "Hello, my name is",
@ -32,3 +36,16 @@ def test_runai_model_loader_download_files(vllm_runner):
    with vllm_runner(test_model, load_format=load_format) as llm:
        deserialized_outputs = llm.generate(prompts, sampling_params)
        assert deserialized_outputs
+
+
+def test_runai_model_loader_download_files_gcs(
+    vllm_runner, monkeypatch: pytest.MonkeyPatch
+):
+    monkeypatch.setenv("GOOGLE_CLOUD_PROJECT", "fake-project")
+    monkeypatch.setenv("RUNAI_STREAMER_GCS_USE_ANONYMOUS_CREDENTIALS", "true")
+    monkeypatch.setenv(
+        "CLOUD_STORAGE_EMULATOR_ENDPOINT", "https://storage.googleapis.com"
+    )
+    with vllm_runner(test_gcs_model, load_format=load_format) as llm:
+        deserialized_outputs = llm.generate(prompts, sampling_params)
+        assert deserialized_outputs