mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 07:15:01 +08:00
[Kernel] Enable FP8 Cutlass for Ada Lovelace (#6950)
Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
This commit is contained in:
parent
460c1884e3
commit
93548eb37e
@ -38,13 +38,7 @@ bool cutlass_scaled_mm_supports_fp8(int64_t cuda_device_capability) {
|
||||
if (cuda_device_capability >= 90) {
|
||||
return CUDA_VERSION >= 12000;
|
||||
} else if (cuda_device_capability >= 89) {
|
||||
// CUTLASS Kernels have not been tuned for Ada Lovelace systems
|
||||
// and are slower than torch.mm. Return false unconditionally in this case.
|
||||
return false;
|
||||
|
||||
// Once the CUTLASS kernels have been optimized for Lovelace systems,
|
||||
// use the following check:
|
||||
// return CUDA_VERSION >= 12040;
|
||||
return CUDA_VERSION >= 12040;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user