mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 05:15:42 +08:00
[ROCm] Temporarily remove GPTQ ROCm support (#2138)
This commit is contained in:
parent
b81a6a6bb3
commit
2acd76f346
2
setup.py
2
setup.py
@ -219,13 +219,13 @@ vllm_extension_sources = [
|
||||
"csrc/activation_kernels.cu",
|
||||
"csrc/layernorm_kernels.cu",
|
||||
"csrc/quantization/squeezellm/quant_cuda_kernel.cu",
|
||||
"csrc/quantization/gptq/q_gemm.cu",
|
||||
"csrc/cuda_utils_kernels.cu",
|
||||
"csrc/pybind.cpp",
|
||||
]
|
||||
|
||||
if _is_cuda():
|
||||
vllm_extension_sources.append("csrc/quantization/awq/gemm_kernels.cu")
|
||||
vllm_extension_sources.append("csrc/quantization/gptq/q_gemm.cu")
|
||||
|
||||
vllm_extension = CUDAExtension(
|
||||
name="vllm._C",
|
||||
|
||||
@ -143,7 +143,7 @@ class ModelConfig:
|
||||
|
||||
def _verify_quantization(self) -> None:
|
||||
supported_quantization = ["awq", "gptq", "squeezellm"]
|
||||
rocm_not_supported_quantization = ["awq"]
|
||||
rocm_not_supported_quantization = ["awq", "gptq"]
|
||||
if self.quantization is not None:
|
||||
self.quantization = self.quantization.lower()
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user