[Kernel] Enable FP8 Cutlass for Ada Lovelace (#6950)

Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
2024-07-31 17:40:22 -04:00 · 2024-07-31 17:40:22 -04:00 · 93548eb37e
commit 93548eb37e
parent 460c1884e3
1 changed files with 1 additions and 7 deletions
--- a/csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu
+++ b/csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu
@ -38,13 +38,7 @@ bool cutlass_scaled_mm_supports_fp8(int64_t cuda_device_capability) {
  if (cuda_device_capability >= 90) {
    return CUDA_VERSION >= 12000;
  } else if (cuda_device_capability >= 89) {
-    // CUTLASS Kernels have not been tuned for Ada Lovelace systems
+    return CUDA_VERSION >= 12040;
    // and are slower than torch.mm. Return false unconditionally in this case.
    return false;
    // Once the CUTLASS kernels have been optimized for Lovelace systems,
    // use the following check:
    // return CUDA_VERSION >= 12040;
  }
 #endif