mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-17 05:05:01 +08:00
[CI/Build] Fix amd model executor test (#27612)
Signed-off-by: zhewenli <zhewenli@meta.com>
This commit is contained in:
parent
b46e4a06f1
commit
0291fbf65c
@ -561,7 +561,7 @@ steps:
|
|||||||
|
|
||||||
- label: Model Executor Test # 23min
|
- label: Model Executor Test # 23min
|
||||||
timeout_in_minutes: 35
|
timeout_in_minutes: 35
|
||||||
mirror_hardwares: [amdexperimental]
|
mirror_hardwares: [amdexperimental, amdproduction]
|
||||||
agent_pool: mi325_1
|
agent_pool: mi325_1
|
||||||
# grade: Blocking
|
# grade: Blocking
|
||||||
source_file_dependencies:
|
source_file_dependencies:
|
||||||
|
|||||||
@ -1,7 +1,10 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from vllm import SamplingParams
|
from vllm import SamplingParams
|
||||||
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
test_model = "openai-community/gpt2"
|
test_model = "openai-community/gpt2"
|
||||||
|
|
||||||
@ -15,6 +18,9 @@ prompts = [
|
|||||||
sampling_params = SamplingParams(temperature=0.8, top_p=0.95, seed=0)
|
sampling_params = SamplingParams(temperature=0.8, top_p=0.95, seed=0)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
not current_platform.is_cuda(), reason="fastsafetensors requires CUDA/NVIDIA GPUs"
|
||||||
|
)
|
||||||
def test_model_loader_download_files(vllm_runner):
|
def test_model_loader_download_files(vllm_runner):
|
||||||
with vllm_runner(test_model, load_format="fastsafetensors") as llm:
|
with vllm_runner(test_model, load_format="fastsafetensors") as llm:
|
||||||
deserialized_outputs = llm.generate(prompts, sampling_params)
|
deserialized_outputs = llm.generate(prompts, sampling_params)
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import glob
|
|||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
import huggingface_hub.constants
|
import huggingface_hub.constants
|
||||||
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
from vllm.model_executor.model_loader.weight_utils import (
|
from vllm.model_executor.model_loader.weight_utils import (
|
||||||
@ -12,8 +13,12 @@ from vllm.model_executor.model_loader.weight_utils import (
|
|||||||
fastsafetensors_weights_iterator,
|
fastsafetensors_weights_iterator,
|
||||||
safetensors_weights_iterator,
|
safetensors_weights_iterator,
|
||||||
)
|
)
|
||||||
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
not current_platform.is_cuda(), reason="fastsafetensors requires CUDA/NVIDIA GPUs"
|
||||||
|
)
|
||||||
def test_fastsafetensors_model_loader():
|
def test_fastsafetensors_model_loader():
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
huggingface_hub.constants.HF_HUB_OFFLINE = False
|
huggingface_hub.constants.HF_HUB_OFFLINE = False
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user