From 83fd49b1fc6d3af6a4c77ef8bfa6451c39402239 Mon Sep 17 00:00:00 2001
From: Zhewen Li <zhewenli@meta.com>
Date: Tue, 28 Oct 2025 23:27:30 -0700
Subject: [PATCH] [CI/Build][Bugfix]Fix Quantized Models Test on AMD (#27712)

Signed-off-by: zhewenli <zhewenli@meta.com>
---
 .buildkite/test-amd.yaml                       | 2 +-
 tests/models/quantization/test_bitsandbytes.py | 6 ++++++
 vllm/platforms/rocm.py                         | 2 +-
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index 524d2e121a10f..dceec159a9daf 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -908,7 +908,7 @@ steps:
 
 - label: Quantized Models Test # 45 min
   timeout_in_minutes: 60
-  mirror_hardwares: [amdexperimental]
+  mirror_hardwares: [amdexperimental, amdproduction]
   agent_pool: mi325_1
   # grade: Blocking
   source_file_dependencies:
diff --git a/tests/models/quantization/test_bitsandbytes.py b/tests/models/quantization/test_bitsandbytes.py
index 5e0421af1c17b..24220978534ca 100644
--- a/tests/models/quantization/test_bitsandbytes.py
+++ b/tests/models/quantization/test_bitsandbytes.py
@@ -9,10 +9,16 @@ import pytest
 from transformers import BitsAndBytesConfig
 
 from tests.quantization.utils import is_quant_method_supported
+from vllm.platforms import current_platform
 
 from ...utils import compare_two_settings, multi_gpu_test
 from ..utils import check_embeddings_close, check_logprobs_close
 
+pytestmark = pytest.mark.skipif(
+    current_platform.is_rocm(),
+    reason="bitsandbytes quantization not supported on ROCm (CUDA-only kernels)",
+)
+
 models_4bit_to_test = [
     ("facebook/opt-125m", "quantize opt model inflight"),
     (
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index 059ed4430e367..e67a7a7e70f7d 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -413,7 +413,7 @@ class RocmPlatform(Platform):
                 "Using AWQ quantization with ROCm, but VLLM_USE_TRITON_AWQ"
                 " is not set, enabling VLLM_USE_TRITON_AWQ."
             )
-        envs.VLLM_USE_TRITON_AWQ = True
+        os.environ["VLLM_USE_TRITON_AWQ"] = "1"
 
     @classmethod
     def get_punica_wrapper(cls) -> str: