mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 17:26:02 +08:00
Run:ai model streamer add GCS package support (#24909)
Signed-off-by: Peter Schuurman <psch@google.com>
This commit is contained in:
parent
169313b9f8
commit
be22bb6f3d
@ -24,6 +24,13 @@ vllm serve s3://core-llm/Llama-3-8b \
|
|||||||
--load-format runai_streamer
|
--load-format runai_streamer
|
||||||
```
|
```
|
||||||
|
|
||||||
|
To run model from Google Cloud Storage run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
vllm serve gs://core-llm/Llama-3-8b \
|
||||||
|
--load-format runai_streamer
|
||||||
|
```
|
||||||
|
|
||||||
To run model from a S3 compatible object store run:
|
To run model from a S3 compatible object store run:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|||||||
@ -43,6 +43,6 @@ tritonclient==2.51.0
|
|||||||
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
||||||
numba == 0.61.2; python_version > '3.9'
|
numba == 0.61.2; python_version > '3.9'
|
||||||
numpy
|
numpy
|
||||||
runai-model-streamer[s3]==0.14.0
|
runai-model-streamer[s3,gcs]==0.14.0
|
||||||
fastsafetensors>=0.1.10
|
fastsafetensors>=0.1.10
|
||||||
pydantic>=2.10 # 2.9 leads to error on python 3.10
|
pydantic>=2.10 # 2.9 leads to error on python 3.10
|
||||||
|
|||||||
@ -13,6 +13,6 @@ tensorizer==2.10.1
|
|||||||
packaging>=24.2
|
packaging>=24.2
|
||||||
setuptools>=77.0.3,<80.0.0
|
setuptools>=77.0.3,<80.0.0
|
||||||
setuptools-scm>=8
|
setuptools-scm>=8
|
||||||
runai-model-streamer[s3]==0.14.0
|
runai-model-streamer[s3,gcs]==0.14.0
|
||||||
conch-triton-kernels==1.2.1
|
conch-triton-kernels==1.2.1
|
||||||
timm>=1.0.17
|
timm>=1.0.17
|
||||||
@ -51,7 +51,7 @@ tritonclient==2.51.0
|
|||||||
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
||||||
numba == 0.61.2; python_version > '3.9'
|
numba == 0.61.2; python_version > '3.9'
|
||||||
numpy
|
numpy
|
||||||
runai-model-streamer[s3]==0.14.0
|
runai-model-streamer[s3,gcs]==0.14.0
|
||||||
fastsafetensors>=0.1.10
|
fastsafetensors>=0.1.10
|
||||||
pydantic>=2.10 # 2.9 leads to error on python 3.10
|
pydantic>=2.10 # 2.9 leads to error on python 3.10
|
||||||
decord==0.6.0
|
decord==0.6.0
|
||||||
|
|||||||
@ -251,11 +251,27 @@ gitdb==4.0.12
|
|||||||
gitpython==3.1.44
|
gitpython==3.1.44
|
||||||
# via mlflow-skinny
|
# via mlflow-skinny
|
||||||
google-api-core==2.24.2
|
google-api-core==2.24.2
|
||||||
# via opencensus
|
# via
|
||||||
|
# google-cloud-core
|
||||||
|
# google-cloud-storage
|
||||||
|
# opencensus
|
||||||
google-auth==2.40.2
|
google-auth==2.40.2
|
||||||
# via
|
# via
|
||||||
# databricks-sdk
|
# databricks-sdk
|
||||||
# google-api-core
|
# google-api-core
|
||||||
|
# google-cloud-core
|
||||||
|
# google-cloud-storage
|
||||||
|
# runai-model-streamer-gcs
|
||||||
|
google-cloud-core==2.4.3
|
||||||
|
# via google-cloud-storage
|
||||||
|
google-cloud-storage==3.4.0
|
||||||
|
# via runai-model-streamer-gcs
|
||||||
|
google-crc32c==1.7.1
|
||||||
|
# via
|
||||||
|
# google-cloud-storage
|
||||||
|
# google-resumable-media
|
||||||
|
google-resumable-media==2.7.2
|
||||||
|
# via google-cloud-storage
|
||||||
googleapis-common-protos==1.70.0
|
googleapis-common-protos==1.70.0
|
||||||
# via google-api-core
|
# via google-api-core
|
||||||
graphene==3.4.3
|
graphene==3.4.3
|
||||||
@ -890,6 +906,7 @@ requests==2.32.3
|
|||||||
# docker
|
# docker
|
||||||
# evaluate
|
# evaluate
|
||||||
# google-api-core
|
# google-api-core
|
||||||
|
# google-cloud-storage
|
||||||
# huggingface-hub
|
# huggingface-hub
|
||||||
# lightly
|
# lightly
|
||||||
# lm-eval
|
# lm-eval
|
||||||
@ -929,6 +946,8 @@ rtree==1.4.0
|
|||||||
# via torchgeo
|
# via torchgeo
|
||||||
runai-model-streamer==0.14.0
|
runai-model-streamer==0.14.0
|
||||||
# via -r requirements/test.in
|
# via -r requirements/test.in
|
||||||
|
runai-model-streamer-gcs==0.14.0
|
||||||
|
# via runai-model-streamer
|
||||||
runai-model-streamer-s3==0.14.0
|
runai-model-streamer-s3==0.14.0
|
||||||
# via runai-model-streamer
|
# via runai-model-streamer
|
||||||
s3transfer==0.10.3
|
s3transfer==0.10.3
|
||||||
|
|||||||
@ -2,6 +2,7 @@
|
|||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
import glob
|
import glob
|
||||||
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
@ -9,7 +10,8 @@ import huggingface_hub.constants
|
|||||||
|
|
||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
download_weights_from_hf)
|
download_weights_from_hf)
|
||||||
from vllm.transformers_utils.runai_utils import (is_runai_obj_uri,
|
from vllm.transformers_utils.runai_utils import (ObjectStorageModel,
|
||||||
|
is_runai_obj_uri,
|
||||||
list_safetensors)
|
list_safetensors)
|
||||||
|
|
||||||
|
|
||||||
@ -34,6 +36,23 @@ def test_runai_list_safetensors_local():
|
|||||||
assert len(safetensors) == len(files)
|
assert len(safetensors) == len(files)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
def test_runai_pull_files_gcs(monkeypatch):
|
||||||
test_is_runai_obj_uri()
|
monkeypatch.setenv("RUNAI_STREAMER_GCS_USE_ANONYMOUS_CREDENTIALS", "true")
|
||||||
test_runai_list_safetensors_local()
|
# Bypass default project lookup by setting GOOGLE_CLOUD_PROJECT
|
||||||
|
monkeypatch.setenv("GOOGLE_CLOUD_PROJECT", "fake-project")
|
||||||
|
filename = "LT08_L1GT_074061_20130309_20170505_01_T2_MTL.txt"
|
||||||
|
gcs_bucket = "gs://gcp-public-data-landsat/LT08/01/074/061/LT08_L1GT_074061_20130309_20170505_01_T2/"
|
||||||
|
gcs_url = f"{gcs_bucket}/{filename}"
|
||||||
|
model = ObjectStorageModel(gcs_url)
|
||||||
|
model.pull_files(gcs_bucket, allow_pattern=[f"*{filename}"])
|
||||||
|
# To re-generate / change URLs:
|
||||||
|
# gsutil ls -L gs://<gcs-url> | grep "Hash (md5)" | tr -d ' ' \
|
||||||
|
# | cut -d":" -f2 | base64 -d | xxd -p
|
||||||
|
expected_checksum = "f60dea775da1392434275b311b31a431"
|
||||||
|
hasher = hashlib.new("md5")
|
||||||
|
with open(os.path.join(model.dir, filename), 'rb') as f:
|
||||||
|
# Read the file in chunks to handle large files efficiently
|
||||||
|
for chunk in iter(lambda: f.read(4096), b''):
|
||||||
|
hasher.update(chunk)
|
||||||
|
actual_checksum = hasher.hexdigest()
|
||||||
|
assert actual_checksum == expected_checksum
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user