[Bugfix] Temporarily disable gptq_bitblas on ROCm (#17411)

Signed-off-by: Yan Cangang <nalanzeyu@gmail.com>
This commit is contained in:
NaLan ZeYu 2025-05-01 10:51:45 +08:00 committed by GitHub
parent 08fb5587b4
commit 1144a8efe7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 6 additions and 1 deletions

View File

@ -80,7 +80,7 @@ The table below shows the compatibility of various quantization implementations
* ✅︎ * ✅︎
* ✅︎ * ✅︎
* ✅︎ * ✅︎
* ✅︎ *
* ❌ * ❌
* ❌ * ❌
* ❌ * ❌

View File

@ -25,6 +25,7 @@ from vllm.model_executor.parameter import (ChannelQuantScaleParameter,
PackedColumnParameter, PackedColumnParameter,
PackedvLLMParameter, PackedvLLMParameter,
RowvLLMParameter) RowvLLMParameter)
from vllm.platforms import current_platform
from vllm.scalar_type import scalar_types from vllm.scalar_type import scalar_types
logger = init_logger(__name__) logger = init_logger(__name__)
@ -191,6 +192,10 @@ class GPTQBitBLASConfig(QuantizationConfig):
sym = quant_config.get("sym") sym = quant_config.get("sym")
desc_act = quant_config.get("desc_act") desc_act = quant_config.get("desc_act")
# temporarily disable on ROCm platform
if not current_platform.is_cuda():
return False
# If we cannot find the info needed in the config, cannot convert. # If we cannot find the info needed in the config, cannot convert.
if (num_bits is None or group_size is None or sym is None if (num_bits is None or group_size is None or sym is None
or desc_act is None): or desc_act is None):