From cfe4b933efede02bbd1d7a13bc7bb3d4389b9c66 Mon Sep 17 00:00:00 2001
From: akerr <akerr@nvidia.com>
Date: Sat, 29 Sep 2018 15:04:20 -0700
Subject: [PATCH] CUDA 9 lacks host-side conversions from float=>half. Instead,
 we must reinterpret_cast<> from cutlass::half_t => half.

---
 .../02_cutlass_utilities/cutlass_utilities.cu  | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/examples/02_cutlass_utilities/cutlass_utilities.cu b/examples/02_cutlass_utilities/cutlass_utilities.cu
index 29669932..6b3d6454 100644
--- a/examples/02_cutlass_utilities/cutlass_utilities.cu
+++ b/examples/02_cutlass_utilities/cutlass_utilities.cu
@@ -144,18 +144,18 @@ cudaError_t Cutlass_FP16_SgemmNN(
   typename Gemm::Params params;
 
   int result = params.initialize(
-    M,                  // GEMM M dimension
-    N,                  // GEMM N dimension
-    K,                  // GEMM K dimension
-    half(float(alpha)), // scalar alpha - This is a legal conversion from cutlass::half_t to CUDA's half.
-    A,                  // matrix A operand
+    M,                                     // GEMM M dimension
+    N,                                     // GEMM N dimension
+    K,                                     // GEMM K dimension
+    reinterpret_cast<half const &>(alpha), // scalar alpha - This is a legal conversion from cutlass::half_t to CUDA's half.
+    A,                                     // matrix A operand
     lda,
-    B,                  // matrix B operand
+    B,                                     // matrix B operand
     ldb,
-    half(float(beta)),  // scalar beta - This is a legal conversion from cutlass::half_t to CUDA's half.
-    C,                  // source matrix C
+    reinterpret_cast<half const &>(beta),  // scalar beta - This is a legal conversion from cutlass::half_t to CUDA's half.
+    C,                                     // source matrix C
     ldc,
-    C,                  // destination matrix C (may be different memory than source C matrix)
+    C,                                     // destination matrix C (may be different memory than source C matrix)
     ldc
   );