Reorder kv dtype check to avoid nvcc not found error on AMD platform (#3104)

2025-12-13 17:25:38 +08:00 · 2024-03-02 14:34:48 +08:00 · 2024-03-02 14:34:48 +08:00 · baee28c46c
commit baee28c46c
parent 29e70e3e88
1 changed files with 3 additions and 4 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -330,15 +330,14 @@ class CacheConfig:
        if self.cache_dtype == "auto":
            pass
        elif self.cache_dtype == "fp8_e5m2":
            if is_hip():
                raise NotImplementedError(
                    "FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
            nvcc_cuda_version = get_nvcc_cuda_version()
            if nvcc_cuda_version and nvcc_cuda_version < Version("11.8"):
                raise ValueError(
                    "FP8 is not supported when cuda version is lower than 11.8."
                )
            device_name = torch.cuda.get_device_name()
            if "AMD" in device_name:
                raise NotImplementedError(
                    "FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
            logger.info(
                "Using fp8_e5m2 data type to store kv cache. It reduces "
                "the GPU memory footprint and boosts the performance. "