mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-03-16 15:57:15 +08:00
[CI/Build][Bugfix]Fix Quantized Models Test on AMD (#27712)
Signed-off-by: zhewenli <zhewenli@meta.com>
This commit is contained in:
parent
a4a4f0f617
commit
83fd49b1fc
@ -908,7 +908,7 @@ steps:
|
||||
|
||||
- label: Quantized Models Test # 45 min
|
||||
timeout_in_minutes: 60
|
||||
mirror_hardwares: [amdexperimental]
|
||||
mirror_hardwares: [amdexperimental, amdproduction]
|
||||
agent_pool: mi325_1
|
||||
# grade: Blocking
|
||||
source_file_dependencies:
|
||||
|
||||
@ -9,10 +9,16 @@ import pytest
|
||||
from transformers import BitsAndBytesConfig
|
||||
|
||||
from tests.quantization.utils import is_quant_method_supported
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
from ...utils import compare_two_settings, multi_gpu_test
|
||||
from ..utils import check_embeddings_close, check_logprobs_close
|
||||
|
||||
pytestmark = pytest.mark.skipif(
|
||||
current_platform.is_rocm(),
|
||||
reason="bitsandbytes quantization not supported on ROCm (CUDA-only kernels)",
|
||||
)
|
||||
|
||||
models_4bit_to_test = [
|
||||
("facebook/opt-125m", "quantize opt model inflight"),
|
||||
(
|
||||
|
||||
@ -413,7 +413,7 @@ class RocmPlatform(Platform):
|
||||
"Using AWQ quantization with ROCm, but VLLM_USE_TRITON_AWQ"
|
||||
" is not set, enabling VLLM_USE_TRITON_AWQ."
|
||||
)
|
||||
envs.VLLM_USE_TRITON_AWQ = True
|
||||
os.environ["VLLM_USE_TRITON_AWQ"] = "1"
|
||||
|
||||
@classmethod
|
||||
def get_punica_wrapper(cls) -> str:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user