[Kernel] Enable FP8 Cutlass for Ada Lovelace (#6950)
Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
This commit is contained in:
parent
460c1884e3
commit
93548eb37e
@ -38,13 +38,7 @@ bool cutlass_scaled_mm_supports_fp8(int64_t cuda_device_capability) {
|
|||||||
if (cuda_device_capability >= 90) {
|
if (cuda_device_capability >= 90) {
|
||||||
return CUDA_VERSION >= 12000;
|
return CUDA_VERSION >= 12000;
|
||||||
} else if (cuda_device_capability >= 89) {
|
} else if (cuda_device_capability >= 89) {
|
||||||
// CUTLASS Kernels have not been tuned for Ada Lovelace systems
|
return CUDA_VERSION >= 12040;
|
||||||
// and are slower than torch.mm. Return false unconditionally in this case.
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// Once the CUTLASS kernels have been optimized for Lovelace systems,
|
|
||||||
// use the following check:
|
|
||||||
// return CUDA_VERSION >= 12040;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user