From 2acd76f346efcdff4f6ca1d92fe1575c448e4b70 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Fri, 15 Dec 2023 17:13:58 -0800 Subject: [PATCH] [ROCm] Temporarily remove GPTQ ROCm support (#2138) --- setup.py | 2 +- vllm/config.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 811d494e7a01..45a18776798f 100644 --- a/setup.py +++ b/setup.py @@ -219,13 +219,13 @@ vllm_extension_sources = [ "csrc/activation_kernels.cu", "csrc/layernorm_kernels.cu", "csrc/quantization/squeezellm/quant_cuda_kernel.cu", - "csrc/quantization/gptq/q_gemm.cu", "csrc/cuda_utils_kernels.cu", "csrc/pybind.cpp", ] if _is_cuda(): vllm_extension_sources.append("csrc/quantization/awq/gemm_kernels.cu") + vllm_extension_sources.append("csrc/quantization/gptq/q_gemm.cu") vllm_extension = CUDAExtension( name="vllm._C", diff --git a/vllm/config.py b/vllm/config.py index 15020e793fa1..79c9609ebea2 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -143,7 +143,7 @@ class ModelConfig: def _verify_quantization(self) -> None: supported_quantization = ["awq", "gptq", "squeezellm"] - rocm_not_supported_quantization = ["awq"] + rocm_not_supported_quantization = ["awq", "gptq"] if self.quantization is not None: self.quantization = self.quantization.lower()