| 
									
										
										
										
											2018-05-17 02:44:56 +08:00
										 |  |  | /***************************************************************************************************
 | 
					
						
							|  |  |  |  * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * Redistribution and use in source and binary forms, with or without modification, are permitted | 
					
						
							|  |  |  |  * provided that the following conditions are met: | 
					
						
							|  |  |  |  *     * Redistributions of source code must retain the above copyright notice, this list of | 
					
						
							|  |  |  |  *       conditions and the following disclaimer. | 
					
						
							|  |  |  |  *     * Redistributions in binary form must reproduce the above copyright notice, this list of | 
					
						
							|  |  |  |  *       conditions and the following disclaimer in the documentation and/or other materials | 
					
						
							|  |  |  |  *       provided with the distribution. | 
					
						
							|  |  |  |  *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used | 
					
						
							|  |  |  |  *       to endorse or promote products derived from this software without specific prior written | 
					
						
							|  |  |  |  *       permission. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR | 
					
						
							|  |  |  |  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | 
					
						
							|  |  |  |  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE | 
					
						
							|  |  |  |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, | 
					
						
							|  |  |  |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; | 
					
						
							|  |  |  |  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | 
					
						
							|  |  |  |  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
					
						
							|  |  |  |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
					
						
							|  |  |  |  * | 
					
						
							|  |  |  |  **************************************************************************************************/ | 
					
						
							|  |  |  | /*! \file
 | 
					
						
							|  |  |  |     \brief Test environment for GEMM | 
					
						
							|  |  |  | */ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #pragma once
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <fstream>
 | 
					
						
							|  |  |  | #include <iomanip>
 | 
					
						
							|  |  |  | #include <sstream>
 | 
					
						
							|  |  |  | #include <string>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <cublas_v2.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <cutlass/matrix_traits.h>
 | 
					
						
							|  |  |  | #include <cutlass/util/platform.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | #include <tools/util/host_tensor.h>
 | 
					
						
							|  |  |  | #include <tools/util/tensor_view_io.h>
 | 
					
						
							|  |  |  | #include <tools/util/type_traits.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace cutlass { | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-12 02:47:15 +08:00
										 |  |  | ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-05-17 02:44:56 +08:00
										 |  |  | template <cutlass::GemmOperand::Kind kOperand_, | 
					
						
							|  |  |  |           cutlass::MatrixLayout::Kind kLayout_, | 
					
						
							|  |  |  |           typename Scalar_, | 
					
						
							|  |  |  |           typename WmmaShape_> | 
					
						
							|  |  |  | struct WmmaMatrix; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | namespace test { | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-12 02:47:15 +08:00
										 |  |  | ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-05-17 02:44:56 +08:00
										 |  |  | template <typename T> | 
					
						
							|  |  |  | struct GemmTestbedTraits : public cutlass::TypeTraits<T> {}; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | template <cutlass::GemmOperand::Kind kOperand_, | 
					
						
							|  |  |  |           cutlass::MatrixLayout::Kind kLayout_, | 
					
						
							|  |  |  |           typename Scalar_, | 
					
						
							|  |  |  |           typename WmmaShape_> | 
					
						
							|  |  |  | struct GemmTestbedTraits<cutlass::WmmaMatrix<kOperand_, kLayout_, Scalar_, WmmaShape_> > { | 
					
						
							|  |  |  |   static cudaDataType_t const cublas_type = cutlass::TypeTraits<Scalar_>::cublas_type; | 
					
						
							|  |  |  |   typedef Scalar_ host_type; | 
					
						
							|  |  |  |   typedef Scalar_ device_type; | 
					
						
							|  |  |  |   static inline double remove_negative_zero(double x) { return x == -0.0 ? 0.0 : x; } | 
					
						
							|  |  |  |   static inline double to_print(double x) { return x; } | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-12 02:47:15 +08:00
										 |  |  | ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-05-17 02:44:56 +08:00
										 |  |  | template <typename AType, typename BType, typename CType, typename Accumulator, typename Scalar> | 
					
						
							|  |  |  | struct GemmTestbed { | 
					
						
							|  |  |  |   //
 | 
					
						
							|  |  |  |   // Type definitions
 | 
					
						
							|  |  |  |   //
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Host tensor for operand A
 | 
					
						
							|  |  |  |   typedef cutlass::HostTensor<AType> HostTensorA; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Host tensor for operand B
 | 
					
						
							|  |  |  |   typedef cutlass::HostTensor<BType> HostTensorB; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Host tensor for operand C
 | 
					
						
							|  |  |  |   typedef cutlass::HostTensor<CType> HostTensorC; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Functor to print errors
 | 
					
						
							|  |  |  |   struct PrintErrors { | 
					
						
							|  |  |  |     /// Equivalently sized integer type
 | 
					
						
							|  |  |  |     typedef typename GemmTestbedTraits<CType>::integer_type integer_t; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /// Output stream to write to
 | 
					
						
							|  |  |  |     std::ostream& out; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /// Reference tensor view
 | 
					
						
							|  |  |  |     cutlass::HostTensorView<CType> const& reference; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /// Computed tensor view
 | 
					
						
							|  |  |  |     cutlass::HostTensorView<CType> const& experimental; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /// Errors greater than or this amount result in printing
 | 
					
						
							|  |  |  |     integer_t ulps_threshold; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ///
 | 
					
						
							|  |  |  |     PrintErrors(std::ostream& _out, | 
					
						
							|  |  |  |                 cutlass::HostTensorView<CType> const& _reference, | 
					
						
							|  |  |  |                 cutlass::HostTensorView<CType> const& _experimental, | 
					
						
							|  |  |  |                 integer_t _ulps_threshold = 1) | 
					
						
							|  |  |  |         : out(_out), | 
					
						
							|  |  |  |           reference(_reference), | 
					
						
							|  |  |  |           experimental(_experimental), | 
					
						
							|  |  |  |           ulps_threshold(_ulps_threshold) {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     /// Compares one element
 | 
					
						
							|  |  |  |     void operator()(CType const& element, typename HostTensorC::Coord_t coord) { | 
					
						
							|  |  |  |       CType exp = experimental.at(coord); | 
					
						
							|  |  |  |       CType ref = reference.at(coord); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       int64_t int_exp = 0; | 
					
						
							|  |  |  |       int64_t int_ref = 0; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       *reinterpret_cast<CType*>(&int_exp) = exp; | 
					
						
							|  |  |  |       *reinterpret_cast<CType*>(&int_ref) = ref; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       integer_t ulps = integer_t(int_exp - int_ref); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |       if (std::abs(ulps) >= ulps_threshold) { | 
					
						
							|  |  |  |         // width in hexadecimal digits of value
 | 
					
						
							|  |  |  |         int const width = sizeof(integer_t) * 2; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         double relative = double(exp) - double(ref); | 
					
						
							|  |  |  |         if (ref != CType(0)) { | 
					
						
							|  |  |  |           relative /= double(ref); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         out << "[" << coord << "] expected: " << GemmTestbedTraits<CType>::to_print(ref) << " (0x" | 
					
						
							|  |  |  |             << std::hex << std::setw(width) << std::setfill('0') << integer_t(int_ref) << std::dec | 
					
						
							|  |  |  |             << ")" | 
					
						
							|  |  |  |             << ",  got: " << GemmTestbedTraits<CType>::to_print(exp) << " (0x" << std::hex | 
					
						
							|  |  |  |             << std::setw(width) << std::setfill('0') << integer_t(int_exp) << std::dec << ")" | 
					
						
							|  |  |  |             << "  relative error: " << relative << ", ulps: " << ulps << "\n"; | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Generates random elements
 | 
					
						
							|  |  |  |   template <typename T> | 
					
						
							|  |  |  |   struct RandomGenerator { | 
					
						
							|  |  |  |     RandomGenerator(int seed = -1, bool only_ones_ = false) : only_ones(only_ones_) { srand(seed); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     T operator()() { | 
					
						
							|  |  |  |       if (only_ones) { | 
					
						
							|  |  |  |         return T(1); | 
					
						
							|  |  |  |       } else { | 
					
						
							|  |  |  |         int val = (rand() % 16) - 8; | 
					
						
							|  |  |  |         return T(val); | 
					
						
							|  |  |  |       } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     bool only_ones; | 
					
						
							|  |  |  |   }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   //
 | 
					
						
							|  |  |  |   // Data members
 | 
					
						
							|  |  |  |   //
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Status
 | 
					
						
							|  |  |  |   cublasStatus_t status; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// cuBLAS handle
 | 
					
						
							|  |  |  |   cublasHandle_t handle; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// cuBLAS GEMM algorithm selector
 | 
					
						
							|  |  |  |   cublasGemmAlgo_t algorithm; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// A matrix operand
 | 
					
						
							|  |  |  |   HostTensorA A; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Layout of A matrix
 | 
					
						
							|  |  |  |   cublasOperation_t layout_A; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// B matrix operand
 | 
					
						
							|  |  |  |   HostTensorB B; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Layout of B matrix
 | 
					
						
							|  |  |  |   cublasOperation_t layout_B; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// C matrix operand
 | 
					
						
							|  |  |  |   HostTensorC C_initial; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Reference result computed on the host
 | 
					
						
							|  |  |  |   cutlass::HostTensor<CType, false> ref_host; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Reference result computed with cublas
 | 
					
						
							|  |  |  |   HostTensorC ref_cublas; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Computed result
 | 
					
						
							|  |  |  |   HostTensorC computed; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Linear scalaring factor
 | 
					
						
							|  |  |  |   Scalar alpha; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Linear scaling factor
 | 
					
						
							|  |  |  |   Scalar beta; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   //
 | 
					
						
							|  |  |  |   // Static helpers
 | 
					
						
							|  |  |  |   //
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Helper to resize a matrix with a given size and layout
 | 
					
						
							|  |  |  |   template <typename T, bool DeviceBacked> | 
					
						
							|  |  |  |   static void resize(cutlass::HostTensor<T, DeviceBacked>& tensor, | 
					
						
							|  |  |  |                      int rows, | 
					
						
							|  |  |  |                      int columns, | 
					
						
							|  |  |  |                      cublasOperation_t layout, | 
					
						
							|  |  |  |                      int ldm = 0) { | 
					
						
							|  |  |  |     if (!ldm) { | 
					
						
							|  |  |  |       ldm = (layout == CUBLAS_OP_N ? rows : columns); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     typedef cutlass::Coord<cutlass::HostTensor<T>::Rank> Coord_t; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-12 02:47:15 +08:00
										 |  |  |     size_t matrix_stride = layout == CUBLAS_OP_N ? columns * ldm : rows * ldm; | 
					
						
							|  |  |  |     // TODO: Remove that (int) cast.
 | 
					
						
							| 
									
										
										
										
											2018-05-17 02:44:56 +08:00
										 |  |  |     Coord_t stride = cutlass::make_Coord( | 
					
						
							| 
									
										
										
										
											2018-06-12 02:47:15 +08:00
										 |  |  |         (int)matrix_stride, layout == CUBLAS_OP_N ? 1 : ldm, layout == CUBLAS_OP_N ? ldm : 1, 1); | 
					
						
							| 
									
										
										
										
											2018-05-17 02:44:56 +08:00
										 |  |  |     Coord_t size = cutlass::make_Coord(1, rows, columns, 1); | 
					
						
							|  |  |  |     tensor.reset(stride, size); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   //
 | 
					
						
							|  |  |  |   // Methods
 | 
					
						
							|  |  |  |   //
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-12 02:47:15 +08:00
										 |  |  |   /// Constructs a workspace for verifying GEMM.
 | 
					
						
							| 
									
										
										
										
											2018-05-17 02:44:56 +08:00
										 |  |  |   GemmTestbed(int M_, | 
					
						
							|  |  |  |               int N_, | 
					
						
							|  |  |  |               int K_, | 
					
						
							| 
									
										
										
										
											2018-06-12 02:47:15 +08:00
										 |  |  |               int lda, | 
					
						
							|  |  |  |               int ldb, | 
					
						
							| 
									
										
										
										
											2018-05-17 02:44:56 +08:00
										 |  |  |               int ldc, | 
					
						
							|  |  |  |               cublasOperation_t layout_a, | 
					
						
							|  |  |  |               cublasOperation_t layout_b, | 
					
						
							|  |  |  |               Scalar alpha_ = Scalar(1), | 
					
						
							|  |  |  |               Scalar beta_ = Scalar(0), | 
					
						
							|  |  |  |               cublasGemmAlgo_t algorithm_ = CUBLAS_GEMM_DEFAULT, | 
					
						
							|  |  |  |               cublasOperation_t layout_c = CUBLAS_OP_N) | 
					
						
							| 
									
										
										
										
											2018-06-12 02:47:15 +08:00
										 |  |  |       : layout_A(layout_a), layout_B(layout_b), alpha(alpha_), beta(beta_), algorithm(algorithm_) { | 
					
						
							| 
									
										
										
										
											2018-05-17 02:44:56 +08:00
										 |  |  |     status = cublasCreate(&handle); | 
					
						
							|  |  |  |     if (status != CUBLAS_STATUS_SUCCESS) { | 
					
						
							|  |  |  |       throw cutlass::cuda_exception("Failed to create CUBLAS handle"); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     resize(A, M_, K_, layout_a, lda); | 
					
						
							|  |  |  |     resize(B, K_, N_, layout_b, ldb); | 
					
						
							|  |  |  |     resize(C_initial, M_, N_, layout_c, ldc); | 
					
						
							|  |  |  |     resize(ref_host, M_, N_, layout_c, ldc); | 
					
						
							|  |  |  |     resize(ref_cublas, M_, N_, layout_c, ldc); | 
					
						
							|  |  |  |     resize(computed, M_, N_, layout_c, ldc); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   ~GemmTestbed() { status = cublasDestroy(handle); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Returns true if the last CUBLAS call returned successfully
 | 
					
						
							|  |  |  |   bool good() const { return status == CUBLAS_STATUS_SUCCESS; } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Returns a pointer to the A operand
 | 
					
						
							|  |  |  |   typename HostTensorA::DeviceType* ptr_A() const { return A.device_data(); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Stride of A matrix
 | 
					
						
							|  |  |  |   int lda() const { return std::max(A.stride(HostTensorA::Dim_H), A.stride(HostTensorA::Dim_W)); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Returns a pointer to the B operand
 | 
					
						
							|  |  |  |   typename HostTensorB::DeviceType* ptr_B() const { return B.device_data(); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Stride of B matrix
 | 
					
						
							|  |  |  |   int ldb() const { return std::max(B.stride(HostTensorB::Dim_H), B.stride(HostTensorB::Dim_W)); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Returns a pointer to the initial state of the result tensor in device memory
 | 
					
						
							|  |  |  |   typename HostTensorC::DeviceType* ptr_C_initial() const { return C_initial.device_data(); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Returns a pointer to the result tensor in device memory
 | 
					
						
							|  |  |  |   typename HostTensorC::DeviceType* ptr_computed() const { return computed.device_data(); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Returns a pointer to the result tensor in device memory
 | 
					
						
							|  |  |  |   typename HostTensorC::DeviceType* ptr_cublas() const { return ref_cublas.device_data(); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Stride of C matrix
 | 
					
						
							|  |  |  |   int ldc() const { | 
					
						
							|  |  |  |     return std::max(C_initial.stride(HostTensorC::Dim_H), C_initial.stride(HostTensorC::Dim_W)); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Returns the number of flops implied by the computation (1 multiply-accumulate = 2 flops)
 | 
					
						
							|  |  |  |   uint64_t flops() const { return uint64_t(M()) * uint64_t(N()) * uint64_t(K()) * 2ULL; } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Computes the speed of the computation in GFLOPs/s
 | 
					
						
							|  |  |  |   double GFLOPs_per_sec(double runtime_ms) const { return double(flops()) / runtime_ms / 1.0e6; } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Matrix layout of A
 | 
					
						
							|  |  |  |   cublasOperation_t layout_a() const { return layout_A; } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Matrix layout of B
 | 
					
						
							|  |  |  |   cublasOperation_t layout_b() const { return layout_B; } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Number of rows of problem
 | 
					
						
							|  |  |  |   int M() const { return C_initial.size(HostTensorC::Dim_H); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Number of columns of problem
 | 
					
						
							|  |  |  |   int N() const { return C_initial.size(HostTensorC::Dim_W); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Number of columns of problem
 | 
					
						
							|  |  |  |   int K() const { return A.size(HostTensorA::Dim_W); } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Initializes data, randomly
 | 
					
						
							|  |  |  |   void initialize(int seed = -1) { | 
					
						
							|  |  |  |     A.fill_random(RandomGenerator<AType>(seed)); | 
					
						
							|  |  |  |     B.fill_random(RandomGenerator<BType>(seed + 11)); | 
					
						
							|  |  |  |     C_initial.fill_random(RandomGenerator<CType>(seed + 13)); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Computes the matrix product on the host
 | 
					
						
							|  |  |  |   void compute_host() { | 
					
						
							|  |  |  |     ref_host.fill(C_initial); | 
					
						
							|  |  |  |     ref_host.template gemm<AType, BType, Accumulator, Scalar>(A, B, alpha, beta); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Excutes an equivalent GEMM using cuBLAS
 | 
					
						
							|  |  |  |   bool execute_cublas() { | 
					
						
							|  |  |  |     status = cublasGemmEx(handle, | 
					
						
							|  |  |  |                           layout_a(), | 
					
						
							|  |  |  |                           layout_b(), | 
					
						
							|  |  |  |                           M(), | 
					
						
							|  |  |  |                           N(), | 
					
						
							|  |  |  |                           K(), | 
					
						
							|  |  |  |                           &alpha, | 
					
						
							|  |  |  |                           ptr_A(), | 
					
						
							|  |  |  |                           cutlass::TypeTraits<AType>::cublas_type, | 
					
						
							|  |  |  |                           lda(), | 
					
						
							|  |  |  |                           ptr_B(), | 
					
						
							|  |  |  |                           cutlass::TypeTraits<BType>::cublas_type, | 
					
						
							|  |  |  |                           ldb(), | 
					
						
							|  |  |  |                           &beta, | 
					
						
							|  |  |  |                           ref_cublas.device_data(), | 
					
						
							|  |  |  |                           cutlass::TypeTraits<CType>::cublas_type, | 
					
						
							|  |  |  |                           ldc(), | 
					
						
							|  |  |  |                           cutlass::TypeTraits<Accumulator>::cublas_type, | 
					
						
							|  |  |  |                           algorithm); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return status == CUBLAS_STATUS_SUCCESS; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Computes the matrix product using cuBLAS
 | 
					
						
							|  |  |  |   void compute_cublas() { | 
					
						
							|  |  |  |     ref_cublas.fill(C_initial); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (!execute_cublas()) { | 
					
						
							|  |  |  |       throw std::runtime_error("compute_cublas() failed"); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   //
 | 
					
						
							|  |  |  |   // Compute the GEMM yourself
 | 
					
						
							|  |  |  |   //
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Names a probelm based on data type and problem size
 | 
					
						
							|  |  |  |   std::string workspace_name() const { | 
					
						
							|  |  |  |     std::stringstream ss; | 
					
						
							|  |  |  |     ss << "gemm_" << (layout_a() == CUBLAS_OP_N ? "n" : "t") | 
					
						
							|  |  |  |        << (layout_b() == CUBLAS_OP_N ? "n" : "t") << "_" << typeid(AType).name() << "_" | 
					
						
							|  |  |  |        << typeid(BType).name() << "_" << typeid(CType).name() << "_" << typeid(Accumulator).name() | 
					
						
							|  |  |  |        << "_" << typeid(Scalar).name() << "_" << M() << "x" << N() << "x" << K(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return ss.str(); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Writes the workspace to an ostream
 | 
					
						
							|  |  |  |   std::ostream& write(std::ostream& out) const { | 
					
						
							|  |  |  |     out << "A = " << A << "\nB = " << B << "\nC_initial = " << C_initial | 
					
						
							|  |  |  |         << "\nref_host = " << ref_host << "\nref_cublas = " << ref_cublas | 
					
						
							|  |  |  |         << "\ncomputed = " << computed << std::endl; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return out; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Outputs each mismatching element
 | 
					
						
							|  |  |  |   std::ostream& write_errors(std::ostream& out, | 
					
						
							|  |  |  |                              cutlass::HostTensorView<CType> const& experimental, | 
					
						
							|  |  |  |                              cutlass::HostTensorView<CType> const& ref) const { | 
					
						
							|  |  |  |     PrintErrors printer(out, ref, experimental); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     computed.visit(printer); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return out; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Sync's all input tensors to device
 | 
					
						
							|  |  |  |   void sync_device() { | 
					
						
							|  |  |  |     A.sync_device(); | 
					
						
							|  |  |  |     B.sync_device(); | 
					
						
							|  |  |  |     C_initial.sync_device(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ref_host.fill(C_initial); | 
					
						
							|  |  |  |     ref_cublas.fill(C_initial); | 
					
						
							|  |  |  |     computed.fill(C_initial); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ref_cublas.sync_device(); | 
					
						
							|  |  |  |     computed.sync_device(); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Sync's all output tensors to host
 | 
					
						
							|  |  |  |   void sync_host() { | 
					
						
							|  |  |  |     computed.sync_host(); | 
					
						
							|  |  |  |     ref_cublas.sync_host(); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Saves the workspace to files
 | 
					
						
							|  |  |  |   void save_workspace(cutlass::HostTensorView<CType> const& experimental, | 
					
						
							|  |  |  |                       cutlass::HostTensorView<CType> const& ref) { | 
					
						
							|  |  |  |     std::string name = workspace_name(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     std::string results_name = name + "_results.txt"; | 
					
						
							|  |  |  |     std::string errors_name = name + "_errors.txt"; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     std::ofstream results(results_name.c_str()); | 
					
						
							|  |  |  |     std::ofstream errors(errors_name.c_str()); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     write(results); | 
					
						
							|  |  |  |     write_errors(errors, experimental, ref); | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Verifies the contents of C equal the host-side reference
 | 
					
						
							|  |  |  |   bool verify_with_host(bool save_on_error = true, bool always_print = false) { | 
					
						
							|  |  |  |     compute_host(); | 
					
						
							|  |  |  |     computed.sync_host(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     bool passed = computed.bit_equals(ref_host); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ((!passed && save_on_error) || always_print) { | 
					
						
							|  |  |  |       save_workspace(computed, ref_host); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return passed; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Verifies the contents of computed equal cuBLAS
 | 
					
						
							|  |  |  |   bool verify_with_cublas(bool save_on_error = true, bool always_print = false) { | 
					
						
							|  |  |  |     compute_cublas(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     ref_cublas.sync_host(); | 
					
						
							|  |  |  |     computed.sync_host(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     bool passed = computed.bit_equals(ref_cublas); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ((!passed && save_on_error) || always_print) { | 
					
						
							|  |  |  |       save_workspace(computed, ref_cublas); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return passed; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Verifies the host computation with cuBLAS
 | 
					
						
							|  |  |  |   bool verify_host_with_cublas(bool save_on_error = true, bool always_print = false) { | 
					
						
							|  |  |  |     compute_host(); | 
					
						
							|  |  |  |     compute_cublas(); | 
					
						
							|  |  |  |     ref_cublas.sync_host(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     bool passed = ref_host.bit_equals(ref_cublas); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if ((!passed && save_on_error) || always_print) { | 
					
						
							|  |  |  |       save_workspace(ref_host, ref_cublas); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return passed; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   /// Verifies with host-side and device-side computations
 | 
					
						
							|  |  |  |   bool verify_with_all() { | 
					
						
							|  |  |  |     bool passed = true; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     computed.sync_host(); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // verify on host
 | 
					
						
							|  |  |  |     passed = (passed && verify_with_host()); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // verify with cublas
 | 
					
						
							|  |  |  |     passed = (passed && verify_with_cublas()); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return passed; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   bool has_cublas_support() const { return cutlass::platform::is_same<Accumulator, Scalar>::value; } | 
					
						
							|  |  |  | }; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | }  // namespace test
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-12 02:47:15 +08:00
										 |  |  | ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-05-17 02:44:56 +08:00
										 |  |  | namespace cutlass { | 
					
						
							|  |  |  | inline cublasOperation_t convert(cutlass::MatrixLayout::Kind layout) { | 
					
						
							|  |  |  |   switch (layout) { | 
					
						
							|  |  |  |     case cutlass::MatrixLayout::kRowMajor: | 
					
						
							|  |  |  |       return CUBLAS_OP_T; | 
					
						
							|  |  |  |     case cutlass::MatrixLayout::kColumnMajor: | 
					
						
							|  |  |  |       return CUBLAS_OP_N; | 
					
						
							|  |  |  |     default: | 
					
						
							|  |  |  |       break; | 
					
						
							|  |  |  |   } | 
					
						
							|  |  |  |   return CUBLAS_OP_N; | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2018-06-12 02:47:15 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | ////////////////////////////////////////////////////////////////////////////////////////////////////
 | 
					
						
							| 
									
										
										
										
											2018-05-17 02:44:56 +08:00
										 |  |  | } |