From 83fd49b1fc6d3af6a4c77ef8bfa6451c39402239 Mon Sep 17 00:00:00 2001 From: Zhewen Li Date: Tue, 28 Oct 2025 23:27:30 -0700 Subject: [PATCH] [CI/Build][Bugfix]Fix Quantized Models Test on AMD (#27712) Signed-off-by: zhewenli --- .buildkite/test-amd.yaml | 2 +- tests/models/quantization/test_bitsandbytes.py | 6 ++++++ vllm/platforms/rocm.py | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 524d2e121a10f..dceec159a9daf 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -908,7 +908,7 @@ steps: - label: Quantized Models Test # 45 min timeout_in_minutes: 60 - mirror_hardwares: [amdexperimental] + mirror_hardwares: [amdexperimental, amdproduction] agent_pool: mi325_1 # grade: Blocking source_file_dependencies: diff --git a/tests/models/quantization/test_bitsandbytes.py b/tests/models/quantization/test_bitsandbytes.py index 5e0421af1c17b..24220978534ca 100644 --- a/tests/models/quantization/test_bitsandbytes.py +++ b/tests/models/quantization/test_bitsandbytes.py @@ -9,10 +9,16 @@ import pytest from transformers import BitsAndBytesConfig from tests.quantization.utils import is_quant_method_supported +from vllm.platforms import current_platform from ...utils import compare_two_settings, multi_gpu_test from ..utils import check_embeddings_close, check_logprobs_close +pytestmark = pytest.mark.skipif( + current_platform.is_rocm(), + reason="bitsandbytes quantization not supported on ROCm (CUDA-only kernels)", +) + models_4bit_to_test = [ ("facebook/opt-125m", "quantize opt model inflight"), ( diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 059ed4430e367..e67a7a7e70f7d 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -413,7 +413,7 @@ class RocmPlatform(Platform): "Using AWQ quantization with ROCm, but VLLM_USE_TRITON_AWQ" " is not set, enabling VLLM_USE_TRITON_AWQ." ) - envs.VLLM_USE_TRITON_AWQ = True + os.environ["VLLM_USE_TRITON_AWQ"] = "1" @classmethod def get_punica_wrapper(cls) -> str: