mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-10 23:02:22 +08:00
[CI/Build] Consolidate model loader tests and requirements (#25765)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
c242c98031
commit
d346ec695e
@ -465,29 +465,18 @@ steps:
|
|||||||
commands:
|
commands:
|
||||||
- pytest -v -s kernels/mamba
|
- pytest -v -s kernels/mamba
|
||||||
|
|
||||||
- label: Tensorizer Test # 14min
|
- label: Model Executor Test # ???
|
||||||
timeout_in_minutes: 25
|
timeout_in_minutes: 60
|
||||||
mirror_hardwares: [amdexperimental]
|
|
||||||
source_file_dependencies:
|
|
||||||
- vllm/model_executor/model_loader
|
|
||||||
- tests/tensorizer_loader
|
|
||||||
- tests/entrypoints/openai/test_tensorizer_entrypoint.py
|
|
||||||
commands:
|
|
||||||
- apt-get update && apt-get install -y curl libsodium23
|
|
||||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
|
||||||
- pytest -v -s tensorizer_loader
|
|
||||||
- pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
|
|
||||||
|
|
||||||
- label: Model Executor Test # 7min
|
|
||||||
timeout_in_minutes: 20
|
|
||||||
mirror_hardwares: [amdexperimental]
|
mirror_hardwares: [amdexperimental]
|
||||||
source_file_dependencies:
|
source_file_dependencies:
|
||||||
- vllm/model_executor
|
- vllm/model_executor
|
||||||
- tests/model_executor
|
- tests/model_executor
|
||||||
|
- tests/entrypoints/openai/test_tensorizer_entrypoint.py
|
||||||
commands:
|
commands:
|
||||||
- apt-get update && apt-get install -y curl libsodium23
|
- apt-get update && apt-get install -y curl libsodium23
|
||||||
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
|
||||||
- pytest -v -s model_executor
|
- pytest -v -s model_executor
|
||||||
|
- pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py
|
||||||
|
|
||||||
- label: Benchmarks # 11min
|
- label: Benchmarks # 11min
|
||||||
timeout_in_minutes: 20
|
timeout_in_minutes: 20
|
||||||
|
|||||||
2
.github/mergify.yml
vendored
2
.github/mergify.yml
vendored
@ -274,7 +274,7 @@ pull_request_rules:
|
|||||||
- files~=^vllm/model_executor/model_loader/tensorizer.py
|
- files~=^vllm/model_executor/model_loader/tensorizer.py
|
||||||
- files~=^vllm/model_executor/model_loader/tensorizer_loader.py
|
- files~=^vllm/model_executor/model_loader/tensorizer_loader.py
|
||||||
- files~=^tests/entrypoints/openai/test_tensorizer_entrypoint.py
|
- files~=^tests/entrypoints/openai/test_tensorizer_entrypoint.py
|
||||||
- files~=^tests/tensorizer_loader/
|
- files~=^tests/model_executor/model_loader/tensorizer_loader/
|
||||||
actions:
|
actions:
|
||||||
assign:
|
assign:
|
||||||
users:
|
users:
|
||||||
|
|||||||
@ -546,7 +546,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
|||||||
else \
|
else \
|
||||||
BITSANDBYTES_VERSION="0.46.1"; \
|
BITSANDBYTES_VERSION="0.46.1"; \
|
||||||
fi; \
|
fi; \
|
||||||
uv pip install --system accelerate hf_transfer modelscope "bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm>=1.0.17' boto3 runai-model-streamer runai-model-streamer[s3]
|
uv pip install --system accelerate hf_transfer modelscope "bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm>=1.0.17' 'runai-model-streamer[s3]>=0.14.0'
|
||||||
|
|
||||||
ENV VLLM_USAGE_SOURCE production-docker-image
|
ENV VLLM_USAGE_SOURCE production-docker-image
|
||||||
|
|
||||||
|
|||||||
@ -43,7 +43,6 @@ tritonclient==2.51.0
|
|||||||
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
||||||
numba == 0.61.2; python_version > '3.9'
|
numba == 0.61.2; python_version > '3.9'
|
||||||
numpy
|
numpy
|
||||||
runai-model-streamer==0.11.0
|
runai-model-streamer[s3]==0.14.0
|
||||||
runai-model-streamer-s3==0.11.0
|
|
||||||
fastsafetensors>=0.1.10
|
fastsafetensors>=0.1.10
|
||||||
pydantic>=2.10 # 2.9 leads to error on python 3.10
|
pydantic>=2.10 # 2.9 leads to error on python 3.10
|
||||||
|
|||||||
@ -5,8 +5,6 @@ numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Req
|
|||||||
numba == 0.61.2; python_version > '3.9'
|
numba == 0.61.2; python_version > '3.9'
|
||||||
|
|
||||||
# Dependencies for AMD GPUs
|
# Dependencies for AMD GPUs
|
||||||
boto3
|
|
||||||
botocore
|
|
||||||
datasets
|
datasets
|
||||||
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
|
ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1.
|
||||||
peft
|
peft
|
||||||
@ -15,7 +13,6 @@ tensorizer==2.10.1
|
|||||||
packaging>=24.2
|
packaging>=24.2
|
||||||
setuptools>=77.0.3,<80.0.0
|
setuptools>=77.0.3,<80.0.0
|
||||||
setuptools-scm>=8
|
setuptools-scm>=8
|
||||||
runai-model-streamer==0.11.0
|
runai-model-streamer[s3]==0.14.0
|
||||||
runai-model-streamer-s3==0.11.0
|
|
||||||
conch-triton-kernels==1.2.1
|
conch-triton-kernels==1.2.1
|
||||||
timm>=1.0.17
|
timm>=1.0.17
|
||||||
@ -51,8 +51,7 @@ tritonclient==2.51.0
|
|||||||
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding
|
||||||
numba == 0.61.2; python_version > '3.9'
|
numba == 0.61.2; python_version > '3.9'
|
||||||
numpy
|
numpy
|
||||||
runai-model-streamer==0.11.0
|
runai-model-streamer[s3]==0.14.0
|
||||||
runai-model-streamer-s3==0.11.0
|
|
||||||
fastsafetensors>=0.1.10
|
fastsafetensors>=0.1.10
|
||||||
pydantic>=2.10 # 2.9 leads to error on python 3.10
|
pydantic>=2.10 # 2.9 leads to error on python 3.10
|
||||||
decord==0.6.0
|
decord==0.6.0
|
||||||
|
|||||||
@ -72,7 +72,9 @@ blobfile==3.0.0
|
|||||||
bm25s==0.2.13
|
bm25s==0.2.13
|
||||||
# via mteb
|
# via mteb
|
||||||
boto3==1.35.57
|
boto3==1.35.57
|
||||||
# via tensorizer
|
# via
|
||||||
|
# runai-model-streamer-s3
|
||||||
|
# tensorizer
|
||||||
botocore==1.35.57
|
botocore==1.35.57
|
||||||
# via
|
# via
|
||||||
# boto3
|
# boto3
|
||||||
@ -925,10 +927,10 @@ rsa==4.9.1
|
|||||||
# via google-auth
|
# via google-auth
|
||||||
rtree==1.4.0
|
rtree==1.4.0
|
||||||
# via torchgeo
|
# via torchgeo
|
||||||
runai-model-streamer==0.11.0
|
runai-model-streamer==0.14.0
|
||||||
# via -r requirements/test.in
|
|
||||||
runai-model-streamer-s3==0.11.0
|
|
||||||
# via -r requirements/test.in
|
# via -r requirements/test.in
|
||||||
|
runai-model-streamer-s3==0.14.0
|
||||||
|
# via runai-model-streamer
|
||||||
s3transfer==0.10.3
|
s3transfer==0.10.3
|
||||||
# via boto3
|
# via boto3
|
||||||
sacrebleu==2.4.3
|
sacrebleu==2.4.3
|
||||||
|
|||||||
5
setup.py
5
setup.py
@ -654,10 +654,7 @@ setup(
|
|||||||
"bench": ["pandas", "datasets"],
|
"bench": ["pandas", "datasets"],
|
||||||
"tensorizer": ["tensorizer==2.10.1"],
|
"tensorizer": ["tensorizer==2.10.1"],
|
||||||
"fastsafetensors": ["fastsafetensors >= 0.1.10"],
|
"fastsafetensors": ["fastsafetensors >= 0.1.10"],
|
||||||
"runai": [
|
"runai": ["runai-model-streamer[s3,gcs] >= 0.14.0"],
|
||||||
"runai-model-streamer >= 0.14.0", "runai-model-streamer-gcs",
|
|
||||||
"google-cloud-storage", "runai-model-streamer-s3", "boto3"
|
|
||||||
],
|
|
||||||
"audio": ["librosa", "soundfile",
|
"audio": ["librosa", "soundfile",
|
||||||
"mistral_common[audio]"], # Required for audio processing
|
"mistral_common[audio]"], # Required for audio processing
|
||||||
"video": [], # Kept for backwards compatibility
|
"video": [], # Kept for backwards compatibility
|
||||||
|
|||||||
@ -1,52 +0,0 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def sample_regex():
|
|
||||||
return (r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}"
|
|
||||||
r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)")
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def sample_json_schema():
|
|
||||||
return {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"name": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"age": {
|
|
||||||
"type": "integer"
|
|
||||||
},
|
|
||||||
"skills": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string",
|
|
||||||
"maxLength": 10
|
|
||||||
},
|
|
||||||
"minItems": 3
|
|
||||||
},
|
|
||||||
"work_history": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"company": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"duration": {
|
|
||||||
"type": "number"
|
|
||||||
},
|
|
||||||
"position": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["company", "position"]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"required": ["name", "age", "skills", "work_history"]
|
|
||||||
}
|
|
||||||
@ -14,6 +14,7 @@ import pytest
|
|||||||
import torch
|
import torch
|
||||||
|
|
||||||
import vllm.model_executor.model_loader.tensorizer
|
import vllm.model_executor.model_loader.tensorizer
|
||||||
|
from tests.utils import VLLM_PATH, RemoteOpenAIServer
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
from vllm.engine.arg_utils import EngineArgs
|
from vllm.engine.arg_utils import EngineArgs
|
||||||
# yapf: disable
|
# yapf: disable
|
||||||
@ -27,7 +28,6 @@ from vllm.model_executor.model_loader.tensorizer_loader import (
|
|||||||
# yapf: enable
|
# yapf: enable
|
||||||
from vllm.utils import PlaceholderModule
|
from vllm.utils import PlaceholderModule
|
||||||
|
|
||||||
from ..utils import VLLM_PATH, RemoteOpenAIServer
|
|
||||||
from .conftest import DummyExecutor, assert_from_collective_rpc
|
from .conftest import DummyExecutor, assert_from_collective_rpc
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -639,6 +639,19 @@ def runai_safetensors_weights_iterator(
|
|||||||
yield from tensor_iter
|
yield from tensor_iter
|
||||||
|
|
||||||
|
|
||||||
|
def _init_loader(
|
||||||
|
pg: torch.distributed.ProcessGroup,
|
||||||
|
device: torch.device,
|
||||||
|
f_list: list[str],
|
||||||
|
*,
|
||||||
|
nogds: bool = False,
|
||||||
|
):
|
||||||
|
loader = SafeTensorsFileLoader(pg, device, nogds=nogds)
|
||||||
|
rank_file_map = {i: [f] for i, f in enumerate(f_list)}
|
||||||
|
loader.add_filenames(rank_file_map)
|
||||||
|
return loader
|
||||||
|
|
||||||
|
|
||||||
def fastsafetensors_weights_iterator(
|
def fastsafetensors_weights_iterator(
|
||||||
hf_weights_files: list[str],
|
hf_weights_files: list[str],
|
||||||
use_tqdm_on_load: bool,
|
use_tqdm_on_load: bool,
|
||||||
@ -656,17 +669,31 @@ def fastsafetensors_weights_iterator(
|
|||||||
for i in range(0, len(hf_weights_files), pg.size())
|
for i in range(0, len(hf_weights_files), pg.size())
|
||||||
]
|
]
|
||||||
|
|
||||||
|
nogds = False
|
||||||
|
|
||||||
for f_list in tqdm(
|
for f_list in tqdm(
|
||||||
weight_files_sub_lists,
|
weight_files_sub_lists,
|
||||||
desc="Loading safetensors using Fastsafetensor loader",
|
desc="Loading safetensors using Fastsafetensor loader",
|
||||||
disable=not enable_tqdm(use_tqdm_on_load),
|
disable=not enable_tqdm(use_tqdm_on_load),
|
||||||
bar_format=_BAR_FORMAT,
|
bar_format=_BAR_FORMAT,
|
||||||
):
|
):
|
||||||
loader = SafeTensorsFileLoader(pg, device)
|
loader = _init_loader(pg, device, f_list, nogds=nogds)
|
||||||
rank_file_map = {i: [f] for i, f in enumerate(f_list)}
|
|
||||||
loader.add_filenames(rank_file_map)
|
|
||||||
try:
|
try:
|
||||||
fb = loader.copy_files_to_device()
|
try:
|
||||||
|
fb = loader.copy_files_to_device()
|
||||||
|
except RuntimeError as e:
|
||||||
|
if "gds" not in str(e):
|
||||||
|
raise
|
||||||
|
|
||||||
|
loader.close()
|
||||||
|
nogds = True
|
||||||
|
logger.warning_once(
|
||||||
|
"GDS not enabled, setting `nogds=True`.\n"
|
||||||
|
"For more information, see: https://github.com/foundation-model-stack/fastsafetensors?tab=readme-ov-file#basic-api-usages"
|
||||||
|
)
|
||||||
|
loader = _init_loader(pg, device, f_list, nogds=nogds)
|
||||||
|
fb = loader.copy_files_to_device()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
keys = list(fb.key_to_rank_lidx.keys())
|
keys = list(fb.key_to_rank_lidx.keys())
|
||||||
for k in keys:
|
for k in keys:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user