From 2acd76f346efcdff4f6ca1d92fe1575c448e4b70 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Fri, 15 Dec 2023 17:13:58 -0800
Subject: [PATCH] [ROCm] Temporarily remove GPTQ ROCm support (#2138)

---
 setup.py       | 2 +-
 vllm/config.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 811d494e7a01..45a18776798f 100644
--- a/setup.py
+++ b/setup.py
@@ -219,13 +219,13 @@ vllm_extension_sources = [
     "csrc/activation_kernels.cu",
     "csrc/layernorm_kernels.cu",
     "csrc/quantization/squeezellm/quant_cuda_kernel.cu",
-    "csrc/quantization/gptq/q_gemm.cu",
     "csrc/cuda_utils_kernels.cu",
     "csrc/pybind.cpp",
 ]
 
 if _is_cuda():
     vllm_extension_sources.append("csrc/quantization/awq/gemm_kernels.cu")
+    vllm_extension_sources.append("csrc/quantization/gptq/q_gemm.cu")
 
 vllm_extension = CUDAExtension(
     name="vllm._C",
diff --git a/vllm/config.py b/vllm/config.py
index 15020e793fa1..79c9609ebea2 100644
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -143,7 +143,7 @@ class ModelConfig:
 
     def _verify_quantization(self) -> None:
         supported_quantization = ["awq", "gptq", "squeezellm"]
-        rocm_not_supported_quantization = ["awq"]
+        rocm_not_supported_quantization = ["awq", "gptq"]
         if self.quantization is not None:
             self.quantization = self.quantization.lower()