mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 00:06:06 +08:00
[Bugfix] Temporarily disable gptq_bitblas on ROCm (#17411)
Signed-off-by: Yan Cangang <nalanzeyu@gmail.com>
This commit is contained in:
parent
08fb5587b4
commit
1144a8efe7
@ -80,7 +80,7 @@ The table below shows the compatibility of various quantization implementations
|
||||
* ✅︎
|
||||
* ✅︎
|
||||
* ✅︎
|
||||
* ✅︎
|
||||
* ❌
|
||||
* ❌
|
||||
* ❌
|
||||
* ❌
|
||||
|
||||
@ -25,6 +25,7 @@ from vllm.model_executor.parameter import (ChannelQuantScaleParameter,
|
||||
PackedColumnParameter,
|
||||
PackedvLLMParameter,
|
||||
RowvLLMParameter)
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.scalar_type import scalar_types
|
||||
|
||||
logger = init_logger(__name__)
|
||||
@ -191,6 +192,10 @@ class GPTQBitBLASConfig(QuantizationConfig):
|
||||
sym = quant_config.get("sym")
|
||||
desc_act = quant_config.get("desc_act")
|
||||
|
||||
# temporarily disable on ROCm platform
|
||||
if not current_platform.is_cuda():
|
||||
return False
|
||||
|
||||
# If we cannot find the info needed in the config, cannot convert.
|
||||
if (num_bits is None or group_size is None or sym is None
|
||||
or desc_act is None):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user