mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-18 04:35:03 +08:00
[V1] Enable V1 for compute capability < 8.0 + FP32 (#23614)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
b5d34af328
commit
50fede6634
@ -1433,14 +1433,14 @@ class EngineArgs:
|
|||||||
recommend_to_remove=True)
|
recommend_to_remove=True)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Need at least Ampere for now (FA support required).
|
# Triton v3.3 has f16 conversion regression issue on Turing and Volta,
|
||||||
# Skip this check if we are running on a non-GPU platform,
|
# which broke fp16 inference
|
||||||
# or if the device capability is not available
|
# see: https://github.com/triton-lang/triton/issues/6698
|
||||||
# (e.g. in a Ray actor without GPUs).
|
|
||||||
if (current_platform.is_cuda()
|
if (current_platform.is_cuda()
|
||||||
and current_platform.get_device_capability()
|
and not current_platform.has_device_capability(80)
|
||||||
and current_platform.get_device_capability().major < 8):
|
and model_config.dtype == torch.float16):
|
||||||
_raise_or_fallback(feature_name="Compute Capability < 8.0",
|
_raise_or_fallback(
|
||||||
|
feature_name="Compute Capability < 8.0 with FP16",
|
||||||
recommend_to_remove=False)
|
recommend_to_remove=False)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user