25ff2824031b98cbbde7f1455c1f04cb305b6fd2/docs/wmma__gemm__multiply__add_8h_source.html

 /***************************************************************************************************
  * Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without modification, are permitted
  * provided that the following conditions are met:
  *     * Redistributions of source code must retain the above copyright notice, this list of
  *       conditions and the following disclaimer.
  *     * Redistributions in binary form must reproduce the above copyright notice, this list of
  *       conditions and the following disclaimer in the documentation and/or other materials
  *       provided with the distribution.
  *     * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
  *       to endorse or promote products derived from this software without specific prior written
  *       permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  **************************************************************************************************/
 #pragma once

 #include <cutlass/wmma_matrix.h>
 #ifdef CUTLASS_USE_WMMA_API
 #include <cutlass/fragment.h>

 namespace cutlass {
 namespace gemm {


 template <MatrixLayout::Kind kLayoutA_,
           typename ScalarA_,
           MatrixLayout::Kind kLayoutB_,
           typename ScalarB_,
           MatrixLayout::Kind kLayoutC_,
           typename ScalarC_,
           typename AccumulatorsPerWarp_,
           typename InstructionShape_>
 struct WmmaGemmMultiplyAdd {
   typedef InstructionShape_ InstructionShape;
   typedef Shape<1, InstructionShape_::kH, InstructionShape_::kW> ThreadsPerWarp;
   typedef AccumulatorsPerWarp_ AccumulatorsPerWarp;
   typedef ScalarA_ ScalarA;
   typedef ScalarB_ ScalarB;
   typedef ScalarC_ ScalarC;
   typedef typename ShapeDiv<AccumulatorsPerWarp, InstructionShape>::Shape Iterations;

   typedef WmmaMatrix<GemmOperand::kA, kLayoutA_, ScalarA, InstructionShape> ElementA;
   typedef Fragment<ElementA, Iterations::kW> FragmentA;

   typedef WmmaMatrix<GemmOperand::kB, kLayoutB_, ScalarB, InstructionShape> ElementB;
   typedef Fragment<ElementB, Iterations::kH> FragmentB;

   typedef WmmaMatrix<GemmOperand::kC, kLayoutC_, ScalarC, InstructionShape> ElementC;
   typedef Fragment<ElementC, Iterations::kH * Iterations::kW> Accumulators;

   CUTLASS_DEVICE WmmaGemmMultiplyAdd() {}

   CUTLASS_DEVICE void multiply_add(FragmentA const& a,
                                    FragmentB const& b,
                                    Accumulators const& c,
                                    Accumulators& d) {
     for (int j = 0; j < Iterations::kH; ++j) {
       for (int i = 0; i < Iterations::kW; ++i) {
         // The input elements.
         ElementA const& elt_a = a[i];
         ElementB const& elt_b = b[j];
         ElementC const& elt_c = c[j * Iterations::kW + i];

         // The output element.
         ElementC& elt_d = d[j * Iterations::kW + i];

         // The wmma instruction.
         nvcuda::wmma::mma_sync(elt_d, elt_a, elt_b, elt_c);
       }
     }
   }
 };


 }  // namespace gemm
 }  // namespace cutlass

 #endif  // defined CUTLASS_USE_WMMA_API
wmma_matrix.h
Abstractions for loading and storing matrices using the CUDA WMMA API.

cutlass
Definition: convert.h:33

cutlass::ShapeDiv::Shape
Shape< A_::kD/B_::kD, A_::kH/B_::kH, A_::kW/B_::kW, A_::kC/B_::kC > Shape
Definition: shape.h:126

cutlass::MatrixLayout::Kind
Kind
Definition: matrix_traits.h:36

fragment.h
Defines Fragment, a statically-sized array for storing parts of matrices within a thread&#39;s registers...