[ROCm] Temporarily remove GPTQ ROCm support (#2138)

This commit is contained in:
Woosuk Kwon 2023-12-15 17:13:58 -08:00 committed by GitHub
parent b81a6a6bb3
commit 2acd76f346
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 2 additions and 2 deletions

View File

@ -219,13 +219,13 @@ vllm_extension_sources = [
"csrc/activation_kernels.cu",
"csrc/layernorm_kernels.cu",
"csrc/quantization/squeezellm/quant_cuda_kernel.cu",
"csrc/quantization/gptq/q_gemm.cu",
"csrc/cuda_utils_kernels.cu",
"csrc/pybind.cpp",
]
if _is_cuda():
vllm_extension_sources.append("csrc/quantization/awq/gemm_kernels.cu")
vllm_extension_sources.append("csrc/quantization/gptq/q_gemm.cu")
vllm_extension = CUDAExtension(
name="vllm._C",

View File

@ -143,7 +143,7 @@ class ModelConfig:
def _verify_quantization(self) -> None:
supported_quantization = ["awq", "gptq", "squeezellm"]
rocm_not_supported_quantization = ["awq"]
rocm_not_supported_quantization = ["awq", "gptq"]
if self.quantization is not None:
self.quantization = self.quantization.lower()