[Bugfix] Temporarily disable gptq_bitblas on ROCm (#17411)

Signed-off-by: Yan Cangang <nalanzeyu@gmail.com>
2025-12-10 03:15:20 +08:00 · 2025-05-01 10:51:45 +08:00 · 2025-05-01 10:51:45 +08:00 · 1144a8efe7
commit 1144a8efe7
parent 08fb5587b4
2 changed files with 6 additions and 1 deletions
--- a/docs/source/features/quantization/supported_hardware.md
+++ b/docs/source/features/quantization/supported_hardware.md
@ -80,7 +80,7 @@ The table below shows the compatibility of various quantization implementations
  * ✅︎
  * ✅︎
  * ✅︎
-  * ✅︎
+  * ❌
  * ❌
  * ❌
  * ❌
--- a/vllm/model_executor/layers/quantization/gptq_bitblas.py
+++ b/vllm/model_executor/layers/quantization/gptq_bitblas.py
@ -25,6 +25,7 @@ from vllm.model_executor.parameter import (ChannelQuantScaleParameter,
                                           PackedColumnParameter,
                                           PackedvLLMParameter,
                                           RowvLLMParameter)
+from vllm.platforms import current_platform
 from vllm.scalar_type import scalar_types

 logger = init_logger(__name__)
@ -191,6 +192,10 @@ class GPTQBitBLASConfig(QuantizationConfig):
        sym = quant_config.get("sym")
        desc_act = quant_config.get("desc_act")

+        # temporarily disable on ROCm platform
+        if not current_platform.is_cuda():
+            return False
+
        # If we cannot find the info needed in the config, cannot convert.
        if (num_bits is None or group_size is None or sym is None
                or desc_act is None):