[Kernel] Enable FP8 Cutlass for Ada Lovelace (#6950)

Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
This commit is contained in:
Varun Sundar Rabindranath 2024-07-31 17:40:22 -04:00 committed by GitHub
parent 460c1884e3
commit 93548eb37e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -38,13 +38,7 @@ bool cutlass_scaled_mm_supports_fp8(int64_t cuda_device_capability) {
if (cuda_device_capability >= 90) {
return CUDA_VERSION >= 12000;
} else if (cuda_device_capability >= 89) {
// CUTLASS Kernels have not been tuned for Ada Lovelace systems
// and are slower than torch.mm. Return false unconditionally in this case.
return false;
// Once the CUTLASS kernels have been optimized for Lovelace systems,
// use the following check:
// return CUDA_VERSION >= 12040;
return CUDA_VERSION >= 12040;
}
#endif