mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 10:46:08 +08:00
[ROCm] Temporarily remove GPTQ ROCm support (#2138)
This commit is contained in:
parent
b81a6a6bb3
commit
2acd76f346
2
setup.py
2
setup.py
@ -219,13 +219,13 @@ vllm_extension_sources = [
|
|||||||
"csrc/activation_kernels.cu",
|
"csrc/activation_kernels.cu",
|
||||||
"csrc/layernorm_kernels.cu",
|
"csrc/layernorm_kernels.cu",
|
||||||
"csrc/quantization/squeezellm/quant_cuda_kernel.cu",
|
"csrc/quantization/squeezellm/quant_cuda_kernel.cu",
|
||||||
"csrc/quantization/gptq/q_gemm.cu",
|
|
||||||
"csrc/cuda_utils_kernels.cu",
|
"csrc/cuda_utils_kernels.cu",
|
||||||
"csrc/pybind.cpp",
|
"csrc/pybind.cpp",
|
||||||
]
|
]
|
||||||
|
|
||||||
if _is_cuda():
|
if _is_cuda():
|
||||||
vllm_extension_sources.append("csrc/quantization/awq/gemm_kernels.cu")
|
vllm_extension_sources.append("csrc/quantization/awq/gemm_kernels.cu")
|
||||||
|
vllm_extension_sources.append("csrc/quantization/gptq/q_gemm.cu")
|
||||||
|
|
||||||
vllm_extension = CUDAExtension(
|
vllm_extension = CUDAExtension(
|
||||||
name="vllm._C",
|
name="vllm._C",
|
||||||
|
|||||||
@ -143,7 +143,7 @@ class ModelConfig:
|
|||||||
|
|
||||||
def _verify_quantization(self) -> None:
|
def _verify_quantization(self) -> None:
|
||||||
supported_quantization = ["awq", "gptq", "squeezellm"]
|
supported_quantization = ["awq", "gptq", "squeezellm"]
|
||||||
rocm_not_supported_quantization = ["awq"]
|
rocm_not_supported_quantization = ["awq", "gptq"]
|
||||||
if self.quantization is not None:
|
if self.quantization is not None:
|
||||||
self.quantization = self.quantization.lower()
|
self.quantization = self.quantization.lower()
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user