diff --git a/vllm/config.py b/vllm/config.py index e260e6a0cb1d..ff8536c1aca5 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -330,15 +330,14 @@ class CacheConfig: if self.cache_dtype == "auto": pass elif self.cache_dtype == "fp8_e5m2": + if is_hip(): + raise NotImplementedError( + "FP8_E5M2 KV Cache on AMD GPU has not been supported yet.") nvcc_cuda_version = get_nvcc_cuda_version() if nvcc_cuda_version and nvcc_cuda_version < Version("11.8"): raise ValueError( "FP8 is not supported when cuda version is lower than 11.8." ) - device_name = torch.cuda.get_device_name() - if "AMD" in device_name: - raise NotImplementedError( - "FP8_E5M2 KV Cache on AMD GPU has not been supported yet.") logger.info( "Using fp8_e5m2 data type to store kv cache. It reduces " "the GPU memory footprint and boosts the performance. "